Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2016 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.List;
24 import java.util.SortedSet;
25 import java.util.TreeSet;
26 import java.util.logging.Level;
27 import java.util.stream.Collectors;
28 import org.apache.tika.Tika;
29 import org.apache.tika.mime.MimeTypes;
30 import org.openide.util.NbBundle;
35 import org.sleuthkit.datamodel.AbstractFile;
36 import org.sleuthkit.datamodel.BlackboardArtifact;
37 import org.sleuthkit.datamodel.BlackboardAttribute;
38 import org.sleuthkit.datamodel.TskCoreException;
39 import org.sleuthkit.datamodel.TskData;
40 
46 public class FileTypeDetector {
47 
48  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
49  private static final Tika tika = new Tika();
50  private static final int BUFFER_SIZE = 64 * 1024;
51  private final byte buffer[] = new byte[BUFFER_SIZE];
52  private final List<FileType> userDefinedFileTypes;
53  private final List<FileType> autopsyDefinedFileTypes;
54  private static SortedSet<String> detectedTypes; //no optional parameters
55 
67  try {
68  userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
69  autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
70  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
71  throw new FileTypeDetectorInitException("Error loading custom file types", ex); //NON-NLS
72  }
73  }
74 
81  public List<String> getUserDefinedTypes() {
82  List<String> customFileTypes = new ArrayList<>();
83  for (FileType fileType : userDefinedFileTypes) {
84  customFileTypes.add(fileType.getMimeType());
85  }
86  for (FileType fileType : autopsyDefinedFileTypes) {
87  customFileTypes.add(fileType.getMimeType());
88  }
89  return customFileTypes;
90  }
91 
100  public boolean isDetectable(String mimeType) {
101  return isDetectableAsCustomType(userDefinedFileTypes, mimeType)
102  || isDetectableAsCustomType(autopsyDefinedFileTypes, mimeType)
103  || isDetectableByTika(mimeType);
104  }
105 
113  public static synchronized SortedSet<String> getStandardDetectedTypes() {
114  if (detectedTypes == null) {
115  detectedTypes = org.apache.tika.mime.MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes()
116  .stream().filter(t -> !t.hasParameters()).map(s -> s.toString()).collect(Collectors.toCollection(TreeSet::new));
117  }
118  return Collections.unmodifiableSortedSet(detectedTypes);
119  }
120 
130  private boolean isDetectableAsCustomType(List<FileType> customTypes, String mimeType) {
131  for (FileType fileType : customTypes) {
132  if (fileType.getMimeType().equals(mimeType)) {
133  return true;
134  }
135  }
136  return false;
137  }
138 
146  private boolean isDetectableByTika(String mimeType) {
148  }
149 
166  public String getFileType(AbstractFile file) throws TskCoreException {
167  return detect(file, true);
168  }
169 
182  public String detect(AbstractFile file) throws TskCoreException {
183  return detect(file, false);
184  }
185 
202  private String detect(AbstractFile file, boolean addToCaseDb) throws TskCoreException {
203  /*
204  * Check to see if the file has already been typed. This is the "check"
205  * part of a check-then-act race condition (see note below).
206  */
207  String mimeType = file.getMIMEType();
208  if (null != mimeType) {
209  // We remove the optional parameter to allow this method to work
210  // with legacy databases that may contain MIME types with the
211  // optional parameter attached.
212  return removeOptionalParameter(mimeType);
213  }
214 
215  /*
216  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
217  * zero-sized files, unallocated space, and unused blocks (refer to
218  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
219  */
220  if (!file.isFile() || file.getSize() <= 0
221  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
222  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
223  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
224  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.SLACK)) {
225  mimeType = MimeTypes.OCTET_STREAM;
226  }
227 
228  /*
229  * If the file is a regular file, give precedence to user-defined custom
230  * file types.
231  */
232  if (null == mimeType) {
233  mimeType = detectUserDefinedType(file);
234  }
235 
236  /*
237  * If the file does not match a user-defined type, give precedence to
238  * custom file types defined by Autopsy.
239  */
240  if (null == mimeType) {
241  mimeType = detectAutopsyDefinedType(file);
242  }
243 
244  /*
245  * If the file does not match a user-defined type, send the initial
246  * bytes to Tika.
247  */
248  if (null == mimeType) {
249  try {
250  byte buf[];
251  int len = file.read(buffer, 0, BUFFER_SIZE);
252  if (len < BUFFER_SIZE) {
253  buf = new byte[len];
254  System.arraycopy(buffer, 0, buf, 0, len);
255  } else {
256  buf = buffer;
257  }
258  String tikaType = tika.detect(buf, file.getName());
259 
260  /*
261  * Remove the Tika suffix from the MIME type name.
262  */
263  mimeType = tikaType.replace("tika-", ""); //NON-NLS
264  /*
265  * Remove the optional parameter from the MIME type.
266  */
267  mimeType = removeOptionalParameter(mimeType);
268 
269  } catch (Exception ignored) {
270  /*
271  * This exception is swallowed and not logged rather than
272  * propagated because files in data sources are not always
273  * consistent with their file system metadata, making for read
274  * errors. Also, Tika can be a bit flaky at times, making this a
275  * best effort endeavor. Default to octet-stream.
276  */
277  mimeType = MimeTypes.OCTET_STREAM;
278  }
279  }
280 
281  /*
282  * If adding the result to the case database, do so now.
283  *
284  * NOTE: This condtional is a way to deal with the check-then-act race
285  * condition created by the gap between querying the MIME type and
286  * recording it. It is not really a problem for the mime_type column of
287  * the tsk_files table, but it can lead to duplicate blackboard posts,
288  * and the posts are required to maintain backward compatibility.
289  * Various mitigation strategies were considered. It was decided to go
290  * with the policy that only ingest modules are allowed to add file
291  * types to the case database, at least until such time as file types
292  * are no longer posted to the blackboard. Of course, this is not a
293  * perfect solution. It's not really enforceable for community
294  * contributed plug ins and it does not handle the unlikely but possible
295  * scenario of multiple processes typing the same file for a multi-user
296  * case.
297  */
298  if (addToCaseDb) {
299  /*
300  * Add the MIME type to the files table in the case database.
301  */
302  Case.getCurrentCase().getSleuthkitCase().setFileMIMEType(file, mimeType);
303  }
304 
305  return mimeType;
306  }
307 
313  private String removeOptionalParameter(String mimeType) {
314  int indexOfSemicolon = mimeType.indexOf(";");
315  if (indexOfSemicolon != -1 ) {
316  return mimeType.substring(0, indexOfSemicolon).trim();
317  } else {
318  return mimeType;
319  }
320  }
321 
332  private String detectUserDefinedType(AbstractFile file) throws TskCoreException {
333  for (FileType fileType : userDefinedFileTypes) {
334  if (fileType.matches(file)) {
335  if (fileType.createInterestingFileHit()) {
336  BlackboardArtifact artifact;
337  artifact = file.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT);
338  BlackboardAttribute setNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME, FileTypeIdModuleFactory.getModuleName(), fileType.getInterestingFilesSetName());
339  artifact.addAttribute(setNameAttribute);
340 
341  /*
342  * Use the MIME type as the category attribute, i.e., the
343  * rule that determined this file belongs to the interesting
344  * files set.
345  */
346  BlackboardAttribute ruleNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CATEGORY, FileTypeIdModuleFactory.getModuleName(), fileType.getMimeType());
347  artifact.addAttribute(ruleNameAttribute);
348 
349  /*
350  * Index the artifact for keyword search.
351  */
352  try {
354  } catch (Blackboard.BlackboardException ex) {
355  logger.log(Level.SEVERE, String.format("Unable to index blackboard artifact %d", artifact.getArtifactID()), ex); //NON-NLS
357  NbBundle.getMessage(Blackboard.class, "Blackboard.unableToIndexArtifact.exception.msg"), artifact.getDisplayName());
358  }
359  }
360 
361  return fileType.getMimeType();
362  }
363  }
364  return null;
365  }
366 
377  private String detectAutopsyDefinedType(AbstractFile file) throws TskCoreException {
378  for (FileType fileType : autopsyDefinedFileTypes) {
379  if (fileType.matches(file)) {
380  return fileType.getMimeType();
381  }
382  }
383  return null;
384  }
385 
386  /*
387  * Exception thrown when a file type detector experiences an error
388  * condition.
389  */
390  public static class FileTypeDetectorInitException extends Exception {
391 
392  private static final long serialVersionUID = 1L;
393 
400  FileTypeDetectorInitException(String message) {
401  super(message);
402  }
403 
411  FileTypeDetectorInitException(String message, Throwable throwable) {
412  super(message, throwable);
413  }
414 
415  }
416 
430  @Deprecated
431  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
432  return getFileType(file);
433  }
434 
435 }
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
static synchronized SortedSet< String > getStandardDetectedTypes()
static void error(String title, String message)
synchronized void indexArtifact(BlackboardArtifact artifact)
Definition: Blackboard.java:59
synchronized static Logger getLogger(String name)
Definition: Logger.java:161
String detect(AbstractFile file, boolean addToCaseDb)

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.