Autopsy  4.11.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.List;
24 import java.util.SortedSet;
25 import java.util.TreeSet;
26 import java.util.logging.Level;
27 import java.util.stream.Collectors;
28 import org.apache.tika.Tika;
29 import org.apache.tika.io.TikaInputStream;
30 import org.apache.tika.mime.MimeTypes;
32 import org.sleuthkit.datamodel.AbstractFile;
33 import org.sleuthkit.datamodel.ReadContentInputStream;
34 import org.sleuthkit.datamodel.TskCoreException;
35 import org.sleuthkit.datamodel.TskData;
36 
44 public class FileTypeDetector {
45 
46  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
47  private static final Tika tika = new Tika();
48  private static final int SLACK_FILE_THRESHOLD = 4096;
49  private final List<FileType> userDefinedFileTypes;
50  private final List<FileType> autopsyDefinedFileTypes;
51  private static SortedSet<String> tikaDetectedTypes;
52 
63  public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
64  TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
65  int result = String.CASE_INSENSITIVE_ORDER.compare(string1, string2);
66  if (result == 0) {
67  result = string1.compareTo(string2);
68  }
69  return result;
70  });
71  detectedTypes.addAll(FileTypeDetector.getTikaDetectedTypes());
72  try {
73  for (FileType fileType : CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes()) {
74  detectedTypes.add(fileType.getMimeType());
75  }
76  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
77  throw new FileTypeDetectorInitException("Error loading Autopsy custom file types", ex);
78  }
79  try {
80  for (FileType fileType : CustomFileTypesManager.getInstance().getUserDefinedFileTypes()) {
81  detectedTypes.add(fileType.getMimeType());
82  }
83  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
84  throw new FileTypeDetectorInitException("Error loading user custom file types", ex);
85  }
86  return detectedTypes;
87  }
88 
96  private static SortedSet<String> getTikaDetectedTypes() {
97  if (null == tikaDetectedTypes) {
98  tikaDetectedTypes = org.apache.tika.mime.MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes()
99  .stream().filter(t -> !t.hasParameters()).map(s -> s.toString().replace("tika-", "")).collect(Collectors.toCollection(TreeSet::new));
100  }
101  return Collections.unmodifiableSortedSet(tikaDetectedTypes);
102  }
103 
117  try {
118  userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
119  autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
120  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
121  throw new FileTypeDetectorInitException("Error loading custom file types", ex); //NON-NLS
122  }
123  }
124 
133  public boolean isDetectable(String mimeType) {
134  return isDetectableAsCustomType(userDefinedFileTypes, mimeType)
135  || isDetectableAsCustomType(autopsyDefinedFileTypes, mimeType)
136  || isDetectableByTika(mimeType);
137  }
138 
148  private boolean isDetectableAsCustomType(List<FileType> customTypes, String mimeType) {
149  for (FileType fileType : customTypes) {
150  if (fileType.getMimeType().equals(mimeType)) {
151  return true;
152  }
153  }
154  return false;
155  }
156 
164  private boolean isDetectableByTika(String mimeType) {
166  }
167 
179  public String getMIMEType(AbstractFile file) {
180  /*
181  * Check to see if the file has already been typed.
182  */
183  String mimeType = file.getMIMEType();
184  if (null != mimeType) {
185  // We remove the optional parameter to allow this method to work
186  // with legacy databases that may contain MIME types with the
187  // optional parameter attached.
188  return removeOptionalParameter(mimeType);
189  }
190 
191  /*
192  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
193  * zero-sized files, unallocated space, and unused blocks (refer to
194  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
195  */
196  if (!file.isFile() || file.getSize() <= 0
197  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
198  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
199  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
200  || ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) && file.getSize() < SLACK_FILE_THRESHOLD)) {
201  mimeType = MimeTypes.OCTET_STREAM;
202  }
203 
204  /*
205  * If the file is a regular file, give precedence to user-defined custom
206  * file types.
207  */
208  if (null == mimeType) {
209  mimeType = detectUserDefinedType(file);
210  }
211 
212  /*
213  * If the file does not match a user-defined type, give precedence to
214  * custom file types defined by Autopsy.
215  */
216  if (null == mimeType) {
217  mimeType = detectAutopsyDefinedType(file);
218  }
219 
220  /*
221  * If the file does not match a user-defined type, send the initial
222  * bytes to Tika.
223  */
224  if (null == mimeType) {
225  ReadContentInputStream stream = new ReadContentInputStream(file);
226 
227  try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
228  String tikaType = tika.detect(tikaInputStream);
229 
230  /*
231  * Remove the Tika suffix from the MIME type name.
232  */
233  mimeType = tikaType.replace("tika-", ""); //NON-NLS
234  /*
235  * Remove the optional parameter from the MIME type.
236  */
237  mimeType = removeOptionalParameter(mimeType);
238 
239  /*
240  * If Tika recognizes the file signature, then use the file
241  * name to refine the type. In short, this is to exclude the
242  * mime types that are determined solely by file extension.
243  * More details in JIRA-4871.
244  */
245  if (!mimeType.equals(MimeTypes.OCTET_STREAM)) {
246  ReadContentInputStream secondPassStream = new ReadContentInputStream(file);
247  try (TikaInputStream secondPassTikaStream = TikaInputStream.get(secondPassStream)) {
248  tikaType = tika.detect(secondPassTikaStream, file.getName());
249  mimeType = tikaType.replace("tika-", ""); //NON-NLS
250  mimeType = removeOptionalParameter(mimeType);
251  }
252  }
253 
259  if (mimeType.contains("audio/mpeg")) {
260  try {
261  byte[] header = getNBytes(file, 0, 2);
262  if (byteIs0xFF(header[0]) && byteIs0xFF(header[1])) {
263  mimeType = MimeTypes.OCTET_STREAM;
264  }
265  } catch (TskCoreException ex) {
266  //Oh well, the mimetype is what it is.
267  logger.log(Level.WARNING, String.format("Could not verify audio/mpeg mimetype for file %s with id=%d", file.getName(), file.getId()), ex);
268  }
269  }
270  } catch (Exception ignored) {
271  /*
272  * This exception is swallowed and not logged rather than
273  * propagated because files in data sources are not always
274  * consistent with their file system metadata, making for read
275  * errors. Also, Tika can be a bit flaky at times, making this a
276  * best effort endeavor. Default to octet-stream.
277  */
278  mimeType = MimeTypes.OCTET_STREAM;
279  }
280  }
281 
282  /*
283  * Documented side effect: write the result to the AbstractFile object.
284  */
285  file.setMIMEType(mimeType);
286 
287  return mimeType;
288  }
289 
298  private boolean byteIs0xFF(byte x) {
299  return (x & 0x0F) == 0x0F && (x & 0xF0) == 0xF0;
300  }
301 
313  private byte[] getNBytes(AbstractFile file, int offset, int n) throws TskCoreException {
314  byte[] headerCache = new byte[n];
315  file.read(headerCache, offset, n);
316  return headerCache;
317  }
318 
326  private String removeOptionalParameter(String mimeType) {
327  int indexOfSemicolon = mimeType.indexOf(';');
328  if (indexOfSemicolon != -1) {
329  return mimeType.substring(0, indexOfSemicolon).trim();
330  } else {
331  return mimeType;
332  }
333  }
334 
342  private String detectUserDefinedType(AbstractFile file) {
343  String retValue = null;
344 
345  for (FileType fileType : userDefinedFileTypes) {
346  if (fileType.matches(file)) {
347  retValue = fileType.getMimeType();
348  break;
349  }
350  }
351  return retValue;
352  }
353 
362  private String detectAutopsyDefinedType(AbstractFile file) {
363  for (FileType fileType : autopsyDefinedFileTypes) {
364  if (fileType.matches(file)) {
365  return fileType.getMimeType();
366  }
367  }
368  return null;
369  }
370 
371  /*
372  * Exception thrown if an initialization error occurs, e.g., user-defined
373  * file type definitions exist but cannot be loaded.
374  */
375  public static class FileTypeDetectorInitException extends Exception {
376 
377  private static final long serialVersionUID = 1L;
378 
385  FileTypeDetectorInitException(String message) {
386  super(message);
387  }
388 
396  FileTypeDetectorInitException(String message, Throwable throwable) {
397  super(message, throwable);
398  }
399 
400  }
401 
410  @Deprecated
411  public List<String> getUserDefinedTypes() {
412  List<String> customFileTypes = new ArrayList<>();
413  userDefinedFileTypes.forEach((fileType) -> {
414  customFileTypes.add(fileType.getMimeType());
415  });
416  autopsyDefinedFileTypes.forEach((fileType) -> {
417  customFileTypes.add(fileType.getMimeType());
418  });
419  return customFileTypes;
420  }
421 
436  @Deprecated
437  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
438  String fileType = getMIMEType(file);
439  file.setMIMEType(fileType);
440  file.save();
441  return fileType;
442  }
443 
460  @Deprecated
461  public String getFileType(AbstractFile file) throws TskCoreException {
462  String fileType = getMIMEType(file);
463  file.setMIMEType(fileType);
464  file.save();
465  return fileType;
466  }
467 
480  @Deprecated
481  public String detect(AbstractFile file) throws TskCoreException {
482  String fileType = getMIMEType(file);
483  return fileType;
484  }
485 
486 }
byte[] getNBytes(AbstractFile file, int offset, int n)
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
synchronized static Logger getLogger(String name)
Definition: Logger.java:124
static synchronized SortedSet< String > getDetectedTypes()

Copyright © 2012-2018 Basis Technology. Generated on: Fri Jun 21 2019
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.