Autopsy  4.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
FileTypeDetector.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2016 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.filetypeid;
20 
21 import java.util.ArrayList;
22 import java.util.List;
23 import java.util.SortedSet;
24 import java.util.logging.Level;
25 import org.apache.tika.Tika;
26 import org.apache.tika.mime.MediaType;
27 import org.apache.tika.mime.MimeTypes;
28 import org.openide.util.NbBundle;
33 import org.sleuthkit.datamodel.AbstractFile;
34 import org.sleuthkit.datamodel.BlackboardArtifact;
35 import org.sleuthkit.datamodel.BlackboardAttribute;
36 import org.sleuthkit.datamodel.TskCoreException;
37 import org.sleuthkit.datamodel.TskData;
38 
43 public class FileTypeDetector {
44 
45  private static final Tika tika = new Tika();
46  private static final int BUFFER_SIZE = 64 * 1024;
47  private final byte buffer[] = new byte[BUFFER_SIZE];
48  private final List<FileType> userDefinedFileTypes;
49  private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName());
50 
62  try {
63  userDefinedFileTypes = UserDefinedFileTypesManager.getInstance().getFileTypes();
64  } catch (UserDefinedFileTypesManager.UserDefinedFileTypesException ex) {
65  throw new FileTypeDetectorInitException("Error loading user-defined file types", ex); //NON-NLS
66  }
67  }
68 
74  public List<String> getUserDefinedTypes() {
75  List<String> list = new ArrayList<>();
76  if (userDefinedFileTypes != null) {
77  for (FileType fileType : userDefinedFileTypes) {
78  list.add(fileType.getMimeType());
79  }
80  }
81  return list;
82  }
83 
92  public boolean isDetectable(String mimeType) {
93  return isDetectableAsUserDefinedType(mimeType) || isDetectableByTika(mimeType);
94  }
95 
104  private boolean isDetectableAsUserDefinedType(String mimeType) {
105  for (FileType fileType : userDefinedFileTypes) {
106  if (fileType.getMimeType().equals(mimeType)) {
107  return true;
108  }
109  }
110  return false;
111  }
112 
120  private boolean isDetectableByTika(String mimeType) {
121  String[] split = mimeType.split("/");
122  if (split.length == 2) {
123  String type = split[0];
124  String subtype = split[1];
125  MediaType mediaType = new MediaType(type, subtype);
126  SortedSet<MediaType> m = MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry().getTypes();
127  return m.contains(mediaType);
128  }
129  return false;
130  }
131 
148  public String getFileType(AbstractFile file) throws TskCoreException {
149  return detect(file, true);
150  }
151 
164  public String detect(AbstractFile file) throws TskCoreException {
165  return detect(file, false);
166  }
167 
184  private String detect(AbstractFile file, boolean addToCaseDb) throws TskCoreException {
185  /*
186  * Check to see if the file has already been typed. This is the "check"
187  * part of a check-then-act race condition (see note below).
188  */
189  String mimeType = file.getMIMEType();
190  if (null != mimeType) {
191  return mimeType;
192  }
193 
194  /*
195  * Mark non-regular files (refer to TskData.TSK_FS_META_TYPE_ENUM),
196  * zero-sized files, unallocated space, and unused blocks (refer to
197  * TskData.TSK_DB_FILES_TYPE_ENUM) as octet-stream.
198  */
199  if (!file.isFile() || file.getSize() <= 0
200  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
201  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
202  || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
203  mimeType = MimeTypes.OCTET_STREAM;
204  }
205 
206  /*
207  * If the file is a regular file, give precedence to user-defined types.
208  */
209  if (null == mimeType) {
210  mimeType = detectUserDefinedType(file, addToCaseDb);
211  }
212 
213  /*
214  * If the file does not match a user-defined type, send the initial
215  * bytes to Tika.
216  */
217  if (null == mimeType) {
218  try {
219  byte buf[];
220  int len = file.read(buffer, 0, BUFFER_SIZE);
221  if (len < BUFFER_SIZE) {
222  buf = new byte[len];
223  System.arraycopy(buffer, 0, buf, 0, len);
224  } else {
225  buf = buffer;
226  }
227  String tikaType = tika.detect(buf, file.getName());
228 
229  /*
230  * Remove the Tika suffix from the MIME type name.
231  */
232  mimeType = tikaType.replace("tika-", ""); //NON-NLS
233 
234  } catch (Exception ignored) {
235  /*
236  * This exception is swallowed and not logged rather than
237  * propagated because files in data sources are not always
238  * consistent with their file system metadata, making for read
239  * errors. Also, Tika can be a bit flaky at times, making this a
240  * best effort endeavor. Default to octet-stream.
241  */
242  mimeType = MimeTypes.OCTET_STREAM;
243  }
244  }
245 
246  /*
247  * If adding the result to the case database, do so now.
248  *
249  * NOTE: This condtional is a way to deal with the check-then-act race
250  * condition created by the gap between querying the MIME type and
251  * recording it. It is not really a problem for the mime_type column of
252  * the tsk_files table, but it can lead to duplicate blackboard posts,
253  * and the posts are required to maintain backward compatibility.
254  * Various mitigation strategies were considered. It was decided to go
255  * with the policy that only ingest modules are allowed to add file
256  * types to the case database, at least until such time as file types
257  * are no longer posted to the blackboard. Of course, this is not a
258  * perfect solution. It's not really enforceable for community
259  * contributed plug ins and it does not handle the unlikely but possible
260  * scenario of multiple processes typing the same file for a multi-user
261  * case.
262  */
263  if (addToCaseDb) {
264  /*
265  * Add the MIME type to the files table in the case database.
266  */
267  Case.getCurrentCase().getSleuthkitCase().setFileMIMEType(file, mimeType);
268 
269  /*
270  * Post to the blackboard, adding the file type attribute to the
271  * general info artifact. A property change is not fired for this
272  * posting because general info artifacts are different from other
273  * artifacts, e.g., they are not displayed in the results tree.
274  */
275  BlackboardArtifact getInfoArt = file.getGenInfoArtifact();
276  @SuppressWarnings("deprecation")
277  BlackboardAttribute batt = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG, FileTypeIdModuleFactory.getModuleName(), mimeType);
278  getInfoArt.addAttribute(batt);
279  }
280 
281  return mimeType;
282  }
283 
298  private String detectUserDefinedType(AbstractFile file, boolean postToBlackBoard) throws TskCoreException {
299  for (FileType fileType : userDefinedFileTypes) {
300  if (fileType.matches(file)) {
301  if (postToBlackBoard && fileType.alertOnMatch()) {
302  /*
303  * Create an interesting file hit artifact.
304  */
305  BlackboardArtifact artifact;
306  artifact = file.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT);
307  BlackboardAttribute setNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME, FileTypeIdModuleFactory.getModuleName(), fileType.getFilesSetName());
308  artifact.addAttribute(setNameAttribute);
309 
310  /*
311  * Use the MIME type as the category attribute, i.e., the
312  * rule that determined this file belongs to the interesting
313  * files set.
314  */
315  BlackboardAttribute ruleNameAttribute = new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CATEGORY, FileTypeIdModuleFactory.getModuleName(), fileType.getMimeType());
316  artifact.addAttribute(ruleNameAttribute);
317 
318  /*
319  * Index the artifact for keyword search.
320  */
321  try {
323  } catch (Blackboard.BlackboardException | IllegalStateException ex) {
324  logger.log(Level.SEVERE, String.format("Unable to index blackboard artifact %d", artifact.getArtifactID()), ex); //NON-NLS
326  NbBundle.getMessage(Blackboard.class, "Blackboard.unableToIndexArtifact.exception.msg"), artifact.getDisplayName());
327  }
328  }
329  return fileType.getMimeType();
330  }
331  }
332  return null;
333  }
334 
335  /*
336  * Exception thrown when a file type detector experiences an error
337  * condition.
338  */
339  public static class FileTypeDetectorInitException extends Exception {
340 
341  private static final long serialVersionUID = 1L;
342 
349  FileTypeDetectorInitException(String message) {
350  super(message);
351  }
352 
360  FileTypeDetectorInitException(String message, Throwable throwable) {
361  super(message, throwable);
362  }
363 
364  }
365 
379  @Deprecated
380  @SuppressWarnings("deprecation")
381  public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException {
382  return getFileType(file);
383  }
384 
385 }
void indexArtifact(BlackboardArtifact artifact)
Definition: Blackboard.java:45
String detectUserDefinedType(AbstractFile file, boolean postToBlackBoard)
static void error(String title, String message)
synchronized static Logger getLogger(String name)
Definition: Logger.java:166
String detect(AbstractFile file, boolean addToCaseDb)

Copyright © 2012-2015 Basis Technology. Generated on: Wed Apr 6 2016
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.