Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2016 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.util.ArrayList;
22 import java.util.List;
23 import java.util.SortedSet;
24 import java.util.logging.Level;
25 import org.apache.tika.Tika;
26 import org.apache.tika.mime.MediaType;
27 import org.apache.tika.mime.MimeTypes;
28 import org.openide.util.NbBundle;
33 import org.sleuthkit.datamodel.AbstractFile;
34 import org.sleuthkit.datamodel.BlackboardArtifact;
35 import org.sleuthkit.datamodel.BlackboardAttribute;
36 import org.sleuthkit.datamodel.TskCoreException;
37 import org.sleuthkit.datamodel.TskData;
38 
44 public class FileTypeDetector {
45 
46  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
47  private static final Tika tika = new Tika();
48  private static final int BUFFER_SIZE = 64 * 1024;
49  private final byte buffer[] = new byte[BUFFER_SIZE];
50  private final List<FileType> userDefinedFileTypes;
51  private final List<FileType> autopsyDefinedFileTypes;
52 
64  try {
65  userDefinedFileTypes = CustomFileTypesManager.getInstance().getUserDefinedFileTypes();
66  autopsyDefinedFileTypes = CustomFileTypesManager.getInstance().getAutopsyDefinedFileTypes();
67  } catch (CustomFileTypesManager.CustomFileTypesException ex) {
68  throw new FileTypeDetectorInitException("Error loading custom file types", ex); //NON-NLS
69  }
70  }
71 
78  public List<String> getUserDefinedTypes() {
79  List<String> customFileTypes = new ArrayList<>();
80  for (FileType fileType : userDefinedFileTypes) {
81  customFileTypes.add(fileType.getMimeType());
82  }
83  for (FileType fileType : autopsyDefinedFileTypes) {
84  customFileTypes.add(fileType.getMimeType());
85  }
86  return customFileTypes;
87  }
88 
97  public boolean isDetectable(String mimeType) {
98  return isDetectableAsCustomType(userDefinedFileTypes, mimeType)
99  || isDetectableAsCustomType(autopsyDefinedFileTypes, mimeType)
100  || isDetectableByTika(mimeType);
101  }
102 
112  private boolean isDetectableAsCustomType(List<FileType> customTypes, String mimeType) {
113  for (FileType fileType : customTypes) {
114  if (fileType.getMimeType().equals(mimeType)) {
115  return true;
116  }
117  }
118  return false;
119  }
120 
128  private boolean isDetectableByTika(String mimeType) {
129  String[] split = mimeType.split("/");
130  if (split.length == 2) {
131  String type = split[0];
132  String subtype = split[1];
133  MediaType mediaType = new MediaType(type, subtype);
134  SortedSet<MediaType> m = MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes();
135  return m.contains(mediaType);
136  }
137  return false;
138  }
139 
156  public String getFileType(AbstractFile file) throws TskCoreException {
157  return detect(file, true);
158  }
159 
172  public String detect(AbstractFile file) throws TskCoreException {
173  return detect(file, false);
174  }
175 
192  private String detect(AbstractFile file, boolean addToCaseDb) throws TskCoreException {
193  /*
194  * Check to see if the file has already been typed. This is the "check"
195  * part of a check-then-act race condition (see note below).
196  */
197  String mimeType = file.getMIMEType();
198  if (null != mimeType) {
199  return mimeType;
200  }
201 
202  /*
203  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
204  * zero-sized files, unallocated space, and unused blocks (refer to
205  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
206  */
207  if (!file.isFile() || file.getSize() <= 0
208  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
209  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
210  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
211  mimeType = MimeTypes.OCTET_STREAM;
212  }
213 
214  /*
215  * If the file is a regular file, give precedence to user-defined custom
216  * file types.
217  */
218  if (null == mimeType) {
219  mimeType = detectUserDefinedType(file);
220  }
221 
222  /*
223  * If the file does not match a user-defined type, give precedence to
224  * custom file types defined by Autopsy.
225  */
226  if (null == mimeType) {
227  mimeType = detectAutopsyDefinedType(file);
228  }
229 
230  /*
231  * If the file does not match a user-defined type, send the initial
232  * bytes to Tika.
233  */
234  if (null == mimeType) {
235  try {
236  byte buf[];
237  int len = file.read(buffer, 0, BUFFER_SIZE);
238  if (len < BUFFER_SIZE) {
239  buf = new byte[len];
240  System.arraycopy(buffer, 0, buf, 0, len);
241  } else {
242  buf = buffer;
243  }
244  String tikaType = tika.detect(buf, file.getName());
245 
246  /*
247  * Remove the Tika suffix from the MIME type name.
248  */
249  mimeType = tikaType.replace("tika-", ""); //NON-NLS
250 
251  } catch (Exception ignored) {
252  /*
253  * This exception is swallowed and not logged rather than
254  * propagated because files in data sources are not always
255  * consistent with their file system metadata, making for read
256  * errors. Also, Tika can be a bit flaky at times, making this a
257  * best effort endeavor. Default to octet-stream.
258  */
259  mimeType = MimeTypes.OCTET_STREAM;
260  }
261  }
262 
263  /*
264  * If adding the result to the case database, do so now.
265  *
266  * NOTE: This condtional is a way to deal with the check-then-act race
267  * condition created by the gap between querying the MIME type and
268  * recording it. It is not really a problem for the mime_type column of
269  * the tsk_files table, but it can lead to duplicate blackboard posts,
270  * and the posts are required to maintain backward compatibility.
271  * Various mitigation strategies were considered. It was decided to go
272  * with the policy that only ingest modules are allowed to add file
273  * types to the case database, at least until such time as file types
274  * are no longer posted to the blackboard. Of course, this is not a
275  * perfect solution. It's not really enforceable for community
276  * contributed plug ins and it does not handle the unlikely but possible
277  * scenario of multiple processes typing the same file for a multi-user
278  * case.
279  */
280  if (addToCaseDb) {
281  /*
282  * Add the MIME type to the files table in the case database.
283  */
284  Case.getCurrentCase().getSleuthkitCase().setFileMIMEType(file, mimeType);
285 
286  /*
287  * Post to the blackboard, adding the file type attribute to the
288  * general info artifact. A property change is not fired for this
289  * posting because general info artifacts are different from other
290  * artifacts, e.g., they are not displayed in the results tree.
291  */
292  BlackboardArtifact getInfoArt = file.getGenInfoArtifact();
293  @SuppressWarnings("deprecation")
294  BlackboardAttribute batt = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG, FileTypeIdModuleFactory.getModuleName(), mimeType);
295  getInfoArt.addAttribute(batt);
296  }
297 
298  return mimeType;
299  }
300 
311  private String detectUserDefinedType(AbstractFile file) throws TskCoreException {
312  for (FileType fileType : userDefinedFileTypes) {
313  if (fileType.matches(file)) {
314  if (fileType.createInterestingFileHit()) {
315  BlackboardArtifact artifact;
316  artifact = file.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT);
317  BlackboardAttribute setNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME, FileTypeIdModuleFactory.getModuleName(), fileType.getInterestingFilesSetName());
318  artifact.addAttribute(setNameAttribute);
319 
320  /*
321  * Use the MIME type as the category attribute, i.e., the
322  * rule that determined this file belongs to the interesting
323  * files set.
324  */
325  BlackboardAttribute ruleNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CATEGORY, FileTypeIdModuleFactory.getModuleName(), fileType.getMimeType());
326  artifact.addAttribute(ruleNameAttribute);
327 
328  /*
329  * Index the artifact for keyword search.
330  */
331  try {
333  } catch (Blackboard.BlackboardException ex) {
334  logger.log(Level.SEVERE, String.format("Unable to index blackboard artifact %d", artifact.getArtifactID()), ex); //NON-NLS
336  NbBundle.getMessage(Blackboard.class, "Blackboard.unableToIndexArtifact.exception.msg"), artifact.getDisplayName());
337  }
338  }
339 
340  return fileType.getMimeType();
341  }
342  }
343  return null;
344  }
345 
356  private String detectAutopsyDefinedType(AbstractFile file) throws TskCoreException {
357  for (FileType fileType : autopsyDefinedFileTypes) {
358  if (fileType.matches(file)) {
359  return fileType.getMimeType();
360  }
361  }
362  return null;
363  }
364 
365  /*
366  * Exception thrown when a file type detector experiences an error
367  * condition.
368  */
369  public static class FileTypeDetectorInitException extends Exception {
370 
371  private static final long serialVersionUID = 1L;
372 
379  FileTypeDetectorInitException(String message) {
380  super(message);
381  }
382 
390  FileTypeDetectorInitException(String message, Throwable throwable) {
391  super(message, throwable);
392  }
393 
394  }
395 
409  @Deprecated
410  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
411  return getFileType(file);
412  }
413 
414 }
boolean isDetectableAsCustomType(List< FileType > customTypes, String mimeType)
static void error(String title, String message)
synchronized void indexArtifact(BlackboardArtifact artifact)
Definition: Blackboard.java:59
synchronized static Logger getLogger(String name)
Definition: Logger.java:161
String detect(AbstractFile file, boolean addToCaseDb)

Copyright © 2012-2016 Basis Technology. Generated on: Tue Oct 25 2016
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.