Autopsy  4.4
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.List;
24 import java.util.SortedSet;
25 import java.util.TreeSet;
26 import java.util.logging.Level;
27 import java.util.stream.Collectors;
28 import org.apache.tika.Tika;
29 import org.apache.tika.mime.MimeTypes;
30 import org.openide.util.NbBundle;
35 import org.sleuthkit.datamodel.AbstractFile;
36 import org.sleuthkit.datamodel.BlackboardArtifact;
37 import org.sleuthkit.datamodel.BlackboardAttribute;
38 import org.sleuthkit.datamodel.TskCoreException;
39 import org.sleuthkit.datamodel.TskData;
40 
48 public class FileTypeDetector {
49 
50  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
51  private static final Tika tika = new Tika();
52  private static final int BUFFER_SIZE = 64 * 1024;
53  private final byte buffer[] = new byte[BUFFER_SIZE];
54  private final List<FileType> userDefinedFileTypes;
55  private final List<FileType> autopsyDefinedFileTypes;
56  private static SortedSet<String> tikaDetectedTypes;
57 
68  public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
69  TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
70  int result = String.CASE_INSENSITIVE_ORDER.compare(string1, string2);
71  if (result == 0) {
72  result = string1.compareTo(string2);
73  }
74  return result;
75  });
76  detectedTypes.addAll(FileTypeDetector.getTikaDetectedTypes());
77  try {
78  for (FileType fileType : CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes()) {
79  detectedTypes.add(fileType.getMimeType());
80  }
81  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
82  throw new FileTypeDetectorInitException("Error loading Autopsy custom file types", ex);
83  }
84  try {
85  for (FileType fileType : CustomFileTypesManager.getInstance().getUserDefinedFileTypes()) {
86  detectedTypes.add(fileType.getMimeType());
87  }
88  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
89  throw new FileTypeDetectorInitException("Error loading user custom file types", ex);
90  }
91  return detectedTypes;
92  }
93 
101  private static SortedSet<String> getTikaDetectedTypes() {
102  if (null == tikaDetectedTypes) {
103  tikaDetectedTypes = org.apache.tika.mime.MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes()
104  .stream().filter(t -> !t.hasParameters()).map(s -> s.toString()).collect(Collectors.toCollection(TreeSet::new));
105  }
106  return Collections.unmodifiableSortedSet(tikaDetectedTypes);
107  }
108 
122  try {
123  userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
124  autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
125  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
126  throw new FileTypeDetectorInitException("Error loading custom file types", ex); //NON-NLS
127  }
128  }
129 
138  public boolean isDetectable(String mimeType) {
139  return isDetectableAsCustomType(userDefinedFileTypes, mimeType)
140  || isDetectableAsCustomType(autopsyDefinedFileTypes, mimeType)
141  || isDetectableByTika(mimeType);
142  }
143 
153  private boolean isDetectableAsCustomType(List<FileType> customTypes, String mimeType) {
154  for (FileType fileType : customTypes) {
155  if (fileType.getMimeType().equals(mimeType)) {
156  return true;
157  }
158  }
159  return false;
160  }
161 
169  private boolean isDetectableByTika(String mimeType) {
171  }
172 
189  public String getFileType(AbstractFile file) throws TskCoreException {
190  return detect(file, true);
191  }
192 
205  public String detect(AbstractFile file) throws TskCoreException {
206  return detect(file, false);
207  }
208 
225  private String detect(AbstractFile file, boolean addToCaseDb) throws TskCoreException {
226  /*
227  * Check to see if the file has already been typed. This is the "check"
228  * part of a check-then-act race condition (see note below).
229  */
230  String mimeType = file.getMIMEType();
231  if (null != mimeType) {
232  // We remove the optional parameter to allow this method to work
233  // with legacy databases that may contain MIME types with the
234  // optional parameter attached.
235  return removeOptionalParameter(mimeType);
236  }
237 
238  /*
239  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
240  * zero-sized files, unallocated space, and unused blocks (refer to
241  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
242  */
243  if (!file.isFile() || file.getSize() <= 0
244  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
245  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
246  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
247  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.SLACK)) {
248  mimeType = MimeTypes.OCTET_STREAM;
249  }
250 
251  /*
252  * If the file is a regular file, give precedence to user-defined custom
253  * file types.
254  */
255  if (null == mimeType) {
256  mimeType = detectUserDefinedType(file);
257  }
258 
259  /*
260  * If the file does not match a user-defined type, give precedence to
261  * custom file types defined by Autopsy.
262  */
263  if (null == mimeType) {
264  mimeType = detectAutopsyDefinedType(file);
265  }
266 
267  /*
268  * If the file does not match a user-defined type, send the initial
269  * bytes to Tika.
270  */
271  if (null == mimeType) {
272  try {
273  byte buf[];
274  int len = file.read(buffer, 0, BUFFER_SIZE);
275  if (len < BUFFER_SIZE) {
276  buf = new byte[len];
277  System.arraycopy(buffer, 0, buf, 0, len);
278  } else {
279  buf = buffer;
280  }
281  String tikaType = tika.detect(buf, file.getName());
282 
283  /*
284  * Remove the Tika suffix from the MIME type name.
285  */
286  mimeType = tikaType.replace("tika-", ""); //NON-NLS
287  /*
288  * Remove the optional parameter from the MIME type.
289  */
290  mimeType = removeOptionalParameter(mimeType);
291 
292  } catch (Exception ignored) {
293  /*
294  * This exception is swallowed and not logged rather than
295  * propagated because files in data sources are not always
296  * consistent with their file system metadata, making for read
297  * errors. Also, Tika can be a bit flaky at times, making this a
298  * best effort endeavor. Default to octet-stream.
299  */
300  mimeType = MimeTypes.OCTET_STREAM;
301  }
302  }
303 
304  /*
305  * If adding the result to the case database, do so now.
306  *
307  * NOTE: This condtional is a way to deal with the check-then-act race
308  * condition created by the gap between querying the MIME type and
309  * recording it. It is not really a problem for the mime_type column of
310  * the tsk_files table, but it can lead to duplicate blackboard posts,
311  * and the posts are required to maintain backward compatibility.
312  * Various mitigation strategies were considered. It was decided to go
313  * with the policy that only ingest modules are allowed to add file
314  * types to the case database, at least until such time as file types
315  * are no longer posted to the blackboard. Of course, this is not a
316  * perfect solution. It's not really enforceable for community
317  * contributed plug ins and it does not handle the unlikely but possible
318  * scenario of multiple processes typing the same file for a multi-user
319  * case.
320  */
321  if (addToCaseDb) {
322  /*
323  * Add the MIME type to the files table in the case database.
324  */
325  Case.getCurrentCase().getSleuthkitCase().setFileMIMEType(file, mimeType);
326  }
327 
328  return mimeType;
329  }
330 
338  private String removeOptionalParameter(String mimeType) {
339  int indexOfSemicolon = mimeType.indexOf(';');
340  if (indexOfSemicolon != -1) {
341  return mimeType.substring(0, indexOfSemicolon).trim();
342  } else {
343  return mimeType;
344  }
345  }
346 
357  private String detectUserDefinedType(AbstractFile file) throws TskCoreException {
358  for (FileType fileType : userDefinedFileTypes) {
359  if (fileType.matches(file)) {
360  if (fileType.createInterestingFileHit()) {
361  BlackboardArtifact artifact;
362  artifact = file.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT);
363  BlackboardAttribute setNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME, FileTypeIdModuleFactory.getModuleName(), fileType.getInterestingFilesSetName());
364  artifact.addAttribute(setNameAttribute);
365 
366  /*
367  * Use the MIME type as the category attribute, i.e., the
368  * rule that determined this file belongs to the interesting
369  * files set.
370  */
371  BlackboardAttribute ruleNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CATEGORY, FileTypeIdModuleFactory.getModuleName(), fileType.getMimeType());
372  artifact.addAttribute(ruleNameAttribute);
373 
374  /*
375  * Index the artifact for keyword search.
376  */
377  try {
379  } catch (Blackboard.BlackboardException ex) {
380  logger.log(Level.SEVERE, String.format("Unable to index blackboard artifact %d", artifact.getArtifactID()), ex); //NON-NLS
382  NbBundle.getMessage(Blackboard.class, "Blackboard.unableToIndexArtifact.exception.msg"), artifact.getDisplayName());
383  }
384  }
385 
386  return fileType.getMimeType();
387  }
388  }
389  return null;
390  }
391 
402  private String detectAutopsyDefinedType(AbstractFile file) throws TskCoreException {
403  for (FileType fileType : autopsyDefinedFileTypes) {
404  if (fileType.matches(file)) {
405  return fileType.getMimeType();
406  }
407  }
408  return null;
409  }
410 
411  /*
412  * Exception thrown if an initialization error occurs, e.g., user-defined
413  * file type definitions exist but cannot be loaded.
414  */
415  public static class FileTypeDetectorInitException extends Exception {
416 
417  private static final long serialVersionUID = 1L;
418 
425  FileTypeDetectorInitException(String message) {
426  super(message);
427  }
428 
436  FileTypeDetectorInitException(String message, Throwable throwable) {
437  super(message, throwable);
438  }
439 
440  }
441 
450  @Deprecated
451  public List<String> getUserDefinedTypes() {
452  List<String> customFileTypes = new ArrayList<>();
453  userDefinedFileTypes.forEach((fileType) -> {
454  customFileTypes.add(fileType.getMimeType());
455  });
456  autopsyDefinedFileTypes.forEach((fileType) -> {
457  customFileTypes.add(fileType.getMimeType());
458  });
459  return customFileTypes;
460  }
461 
475  @Deprecated
476  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
477  return getFileType(file);
478  }
479 
480 }
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
static void error(String title, String message)
synchronized void indexArtifact(BlackboardArtifact artifact)
Definition: Blackboard.java:59
synchronized static Logger getLogger(String name)
Definition: Logger.java:161
String detect(AbstractFile file, boolean addToCaseDb)
static synchronized SortedSet< String > getDetectedTypes()

Copyright © 2012-2016 Basis Technology. Generated on: Tue Jun 13 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.