Autopsy  4.9.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.List;
24 import java.util.SortedSet;
25 import java.util.TreeSet;
26 import java.util.stream.Collectors;
27 import org.apache.tika.Tika;
28 import org.apache.tika.io.TikaInputStream;
29 import org.apache.tika.mime.MimeTypes;
31 import org.sleuthkit.datamodel.AbstractFile;
32 import org.sleuthkit.datamodel.ReadContentInputStream;
33 import org.sleuthkit.datamodel.TskCoreException;
34 import org.sleuthkit.datamodel.TskData;
35 
43 public class FileTypeDetector {
44 
45  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
46  private static final Tika tika = new Tika();
47  private static final int SLACK_FILE_THRESHOLD = 4096;
48  private final List<FileType> userDefinedFileTypes;
49  private final List<FileType> autopsyDefinedFileTypes;
50  private static SortedSet<String> tikaDetectedTypes;
51 
62  public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
63  TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
64  int result = String.CASE_INSENSITIVE_ORDER.compare(string1, string2);
65  if (result == 0) {
66  result = string1.compareTo(string2);
67  }
68  return result;
69  });
70  detectedTypes.addAll(FileTypeDetector.getTikaDetectedTypes());
71  try {
72  for (FileType fileType : CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes()) {
73  detectedTypes.add(fileType.getMimeType());
74  }
75  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
76  throw new FileTypeDetectorInitException("Error loading Autopsy custom file types", ex);
77  }
78  try {
79  for (FileType fileType : CustomFileTypesManager.getInstance().getUserDefinedFileTypes()) {
80  detectedTypes.add(fileType.getMimeType());
81  }
82  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
83  throw new FileTypeDetectorInitException("Error loading user custom file types", ex);
84  }
85  return detectedTypes;
86  }
87 
95  private static SortedSet<String> getTikaDetectedTypes() {
96  if (null == tikaDetectedTypes) {
97  tikaDetectedTypes = org.apache.tika.mime.MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes()
98  .stream().filter(t -> !t.hasParameters()).map(s -> s.toString().replace("tika-", "")).collect(Collectors.toCollection(TreeSet::new));
99  }
100  return Collections.unmodifiableSortedSet(tikaDetectedTypes);
101  }
102 
116  try {
117  userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
118  autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
119  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
120  throw new FileTypeDetectorInitException("Error loading custom file types", ex); //NON-NLS
121  }
122  }
123 
132  public boolean isDetectable(String mimeType) {
133  return isDetectableAsCustomType(userDefinedFileTypes, mimeType)
134  || isDetectableAsCustomType(autopsyDefinedFileTypes, mimeType)
135  || isDetectableByTika(mimeType);
136  }
137 
147  private boolean isDetectableAsCustomType(List<FileType> customTypes, String mimeType) {
148  for (FileType fileType : customTypes) {
149  if (fileType.getMimeType().equals(mimeType)) {
150  return true;
151  }
152  }
153  return false;
154  }
155 
163  private boolean isDetectableByTika(String mimeType) {
165  }
166 
178  public String getMIMEType(AbstractFile file) {
179  /*
180  * Check to see if the file has already been typed.
181  */
182  String mimeType = file.getMIMEType();
183  if (null != mimeType) {
184  // We remove the optional parameter to allow this method to work
185  // with legacy databases that may contain MIME types with the
186  // optional parameter attached.
187  return removeOptionalParameter(mimeType);
188  }
189 
190  /*
191  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
192  * zero-sized files, unallocated space, and unused blocks (refer to
193  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
194  */
195  if (!file.isFile() || file.getSize() <= 0
196  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
197  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
198  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
199  || ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) && file.getSize() < SLACK_FILE_THRESHOLD)) {
200  mimeType = MimeTypes.OCTET_STREAM;
201  }
202 
203  /*
204  * If the file is a regular file, give precedence to user-defined custom
205  * file types.
206  */
207  if (null == mimeType) {
208  mimeType = detectUserDefinedType(file);
209  }
210 
211  /*
212  * If the file does not match a user-defined type, give precedence to
213  * custom file types defined by Autopsy.
214  */
215  if (null == mimeType) {
216  mimeType = detectAutopsyDefinedType(file);
217  }
218 
219  /*
220  * If the file does not match a user-defined type, send the initial
221  * bytes to Tika.
222  */
223  if (null == mimeType) {
224  ReadContentInputStream stream = new ReadContentInputStream(file);
225 
226  try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
227  String tikaType = tika.detect(tikaInputStream, file.getName());
228 
229  /*
230  * Remove the Tika suffix from the MIME type name.
231  */
232  mimeType = tikaType.replace("tika-", ""); //NON-NLS
233  /*
234  * Remove the optional parameter from the MIME type.
235  */
236  mimeType = removeOptionalParameter(mimeType);
237 
238  } catch (Exception ignored) {
239  /*
240  * This exception is swallowed and not logged rather than
241  * propagated because files in data sources are not always
242  * consistent with their file system metadata, making for read
243  * errors. Also, Tika can be a bit flaky at times, making this a
244  * best effort endeavor. Default to octet-stream.
245  */
246  mimeType = MimeTypes.OCTET_STREAM;
247  }
248  }
249 
250  /*
251  * Documented side effect: write the result to the AbstractFile object.
252  */
253  file.setMIMEType(mimeType);
254 
255  return mimeType;
256  }
257 
265  private String removeOptionalParameter(String mimeType) {
266  int indexOfSemicolon = mimeType.indexOf(';');
267  if (indexOfSemicolon != -1) {
268  return mimeType.substring(0, indexOfSemicolon).trim();
269  } else {
270  return mimeType;
271  }
272  }
273 
281  private String detectUserDefinedType(AbstractFile file) {
282  String retValue = null;
283 
284  for (FileType fileType : userDefinedFileTypes) {
285  if (fileType.matches(file)) {
286  retValue = fileType.getMimeType();
287  break;
288  }
289  }
290  return retValue;
291  }
292 
300  private String detectAutopsyDefinedType(AbstractFile file) {
301  for (FileType fileType : autopsyDefinedFileTypes) {
302  if (fileType.matches(file)) {
303  return fileType.getMimeType();
304  }
305  }
306  return null;
307  }
308 
309  /*
310  * Exception thrown if an initialization error occurs, e.g., user-defined
311  * file type definitions exist but cannot be loaded.
312  */
313  public static class FileTypeDetectorInitException extends Exception {
314 
315  private static final long serialVersionUID = 1L;
316 
323  FileTypeDetectorInitException(String message) {
324  super(message);
325  }
326 
334  FileTypeDetectorInitException(String message, Throwable throwable) {
335  super(message, throwable);
336  }
337 
338  }
339 
348  @Deprecated
349  public List<String> getUserDefinedTypes() {
350  List<String> customFileTypes = new ArrayList<>();
351  userDefinedFileTypes.forEach((fileType) -> {
352  customFileTypes.add(fileType.getMimeType());
353  });
354  autopsyDefinedFileTypes.forEach((fileType) -> {
355  customFileTypes.add(fileType.getMimeType());
356  });
357  return customFileTypes;
358  }
359 
374  @Deprecated
375  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
376  String fileType = getMIMEType(file);
377  file.setMIMEType(fileType);
378  file.save();
379  return fileType;
380  }
381 
398  @Deprecated
399  public String getFileType(AbstractFile file) throws TskCoreException {
400  String fileType = getMIMEType(file);
401  file.setMIMEType(fileType);
402  file.save();
403  return fileType;
404  }
405 
418  @Deprecated
419  public String detect(AbstractFile file) throws TskCoreException {
420  String fileType = getMIMEType(file);
421  return fileType;
422  }
423 
424 }
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
synchronized static Logger getLogger(String name)
Definition: Logger.java:124
static synchronized SortedSet< String > getDetectedTypes()

Copyright © 2012-2018 Basis Technology. Generated on: Tue Dec 18 2018
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.