Autopsy  4.6.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
CommonFilesMetadataBuilder.java
Go to the documentation of this file.
1 /*
2  *
3  * Autopsy Forensic Browser
4  *
5  * Copyright 2018 Basis Technology Corp.
6  * Contact: carrier <at> sleuthkit <dot> org
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 package org.sleuthkit.autopsy.commonfilesearch;
21 
22 import java.sql.ResultSet;
23 import java.sql.SQLException;
24 import java.util.ArrayList;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30 import java.util.stream.Collectors;
31 import java.util.stream.Stream;
32 import org.openide.util.NbBundle;
35 import org.sleuthkit.datamodel.HashUtility;
36 import org.sleuthkit.datamodel.SleuthkitCase;
37 import org.sleuthkit.datamodel.SleuthkitCase.CaseDbQuery;
38 import org.sleuthkit.datamodel.TskCoreException;
39 
48 @SuppressWarnings("PMD.AbstractNaming")
49 abstract class CommonFilesMetadataBuilder {
50 
51  private final Map<Long, String> dataSourceIdToNameMap;
52  private final boolean filterByMedia;
53  private final boolean filterByDoc;
54  private static final String filterByMimeTypesWhereClause = " and mime_type in (%s)"; //NON-NLS // where %s is csv list of mime_types to filter on
55 
56  /*
57  * The set of the MIME types that will be checked for extension mismatches
58  * when checkType is ONLY_MEDIA.
59  * ".jpg", ".jpeg", ".png", ".psd", ".nef", ".tiff", ".bmp", ".tec"
60  * ".aaf", ".3gp", ".asf", ".avi", ".m1v", ".m2v", //NON-NLS
61  * ".m4v", ".mp4", ".mov", ".mpeg", ".mpg", ".mpe", ".mp4", ".rm", ".wmv", ".mpv", ".flv", ".swf"
62  */
63  private static final Set<String> MEDIA_PICS_VIDEO_MIME_TYPES = Stream.of(
64  "image/bmp", //NON-NLS
65  "image/gif", //NON-NLS
66  "image/jpeg", //NON-NLS
67  "image/png", //NON-NLS
68  "image/tiff", //NON-NLS
69  "image/vnd.adobe.photoshop", //NON-NLS
70  "image/x-raw-nikon", //NON-NLS
71  "image/x-ms-bmp", //NON-NLS
72  "image/x-icon", //NON-NLS
73  "video/webm", //NON-NLS
74  "video/3gpp", //NON-NLS
75  "video/3gpp2", //NON-NLS
76  "video/ogg", //NON-NLS
77  "video/mpeg", //NON-NLS
78  "video/mp4", //NON-NLS
79  "video/quicktime", //NON-NLS
80  "video/x-msvideo", //NON-NLS
81  "video/x-flv", //NON-NLS
82  "video/x-m4v", //NON-NLS
83  "video/x-ms-wmv", //NON-NLS
84  "application/vnd.ms-asf", //NON-NLS
85  "application/vnd.rn-realmedia", //NON-NLS
86  "application/x-shockwave-flash" //NON-NLS
87  ).collect(Collectors.toSet());
88 
89  /*
90  * The set of the MIME types that will be checked for extension mismatches
91  * when checkType is ONLY_TEXT_FILES.
92  * ".doc", ".docx", ".odt", ".xls", ".xlsx", ".ppt", ".pptx"
93  * ".txt", ".rtf", ".log", ".text", ".xml"
94  * ".html", ".htm", ".css", ".js", ".php", ".aspx"
95  * ".pdf"
96  */
97  private static final Set<String> TEXT_FILES_MIME_TYPES = Stream.of(
98  "text/plain", //NON-NLS
99  "application/rtf", //NON-NLS
100  "application/pdf", //NON-NLS
101  "text/css", //NON-NLS
102  "text/html", //NON-NLS
103  "text/csv", //NON-NLS
104  "application/json", //NON-NLS
105  "application/javascript", //NON-NLS
106  "application/xml", //NON-NLS
107  "text/calendar", //NON-NLS
108  "application/x-msoffice", //NON-NLS
109  "application/x-ooxml", //NON-NLS
110  "application/msword", //NON-NLS
111  "application/vnd.openxmlformats-officedocument.wordprocessingml.document", //NON-NLS
112  "application/vnd.ms-powerpoint", //NON-NLS
113  "application/vnd.openxmlformats-officedocument.presentationml.presentation", //NON-NLS
114  "application/vnd.ms-excel", //NON-NLS
115  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", //NON-NLS
116  "application/vnd.oasis.opendocument.presentation", //NON-NLS
117  "application/vnd.oasis.opendocument.spreadsheet", //NON-NLS
118  "application/vnd.oasis.opendocument.text" //NON-NLS
119  ).collect(Collectors.toSet());
120 
130  CommonFilesMetadataBuilder(Map<Long, String> dataSourceIdMap, boolean filterByMediaMimeType, boolean filterByDocMimeType) {
131  dataSourceIdToNameMap = dataSourceIdMap;
132  filterByMedia = filterByMediaMimeType;
133  filterByDoc = filterByDocMimeType;
134  }
135 
145  static final String SELECT_PREFIX = "SELECT obj_id, md5, data_source_obj_id from tsk_files where"; //NON-NLS
146 
157  protected abstract String buildSqlSelectStatement();
158 
169  public CommonFilesMetadata findCommonFiles() throws TskCoreException, NoCurrentCaseException, SQLException {
170 
171  Map<String, Md5Metadata> commonFiles = new HashMap<>();
172 
173  SleuthkitCase sleuthkitCase = Case.getOpenCase().getSleuthkitCase();
174  String selectStatement = this.buildSqlSelectStatement();
175 
176  try (
177  CaseDbQuery query = sleuthkitCase.executeQuery(selectStatement);
178  ResultSet resultSet = query.getResultSet()) {
179 
180  while (resultSet.next()) {
181  Long objectId = resultSet.getLong(1);
182  String md5 = resultSet.getString(2);
183  Long dataSourceId = resultSet.getLong(3);
184  String dataSource = this.dataSourceIdToNameMap.get(dataSourceId);
185 
186  if (md5 == null || HashUtility.isNoDataMd5(md5)) {
187  continue;
188  }
189 
190  if (commonFiles.containsKey(md5)) {
191  final Md5Metadata md5Metadata = commonFiles.get(md5);
192  md5Metadata.addFileInstanceMetadata(new FileInstanceMetadata(objectId, dataSource));
193  } else {
194  final List<FileInstanceMetadata> fileInstances = new ArrayList<>();
195  fileInstances.add(new FileInstanceMetadata(objectId, dataSource));
196  Md5Metadata md5Metadata = new Md5Metadata(md5, fileInstances);
197  commonFiles.put(md5, md5Metadata);
198  }
199  }
200  }
201 
202  return new CommonFilesMetadata(commonFiles);
203  }
204 
215  String determineMimeTypeFilter() {
216 
217  Set<String> mimeTypesToFilterOn = new HashSet<>();
218  String mimeTypeString = "";
219  if (filterByMedia) {
220  mimeTypesToFilterOn.addAll(MEDIA_PICS_VIDEO_MIME_TYPES);
221  }
222  if (filterByDoc) {
223  mimeTypesToFilterOn.addAll(TEXT_FILES_MIME_TYPES);
224  }
225  StringBuilder mimeTypeFilter = new StringBuilder(mimeTypesToFilterOn.size());
226  if (!mimeTypesToFilterOn.isEmpty()) {
227  for (String mimeType : mimeTypesToFilterOn) {
228  mimeTypeFilter.append('"').append(mimeType).append("\",");
229  }
230  mimeTypeString = mimeTypeFilter.toString().substring(0, mimeTypeFilter.length() - 1);
231  mimeTypeString = String.format(filterByMimeTypesWhereClause, new Object[]{mimeTypeString});
232  }
233  return mimeTypeString;
234  }
235 
236  @NbBundle.Messages({
237  "CommonFilesMetadataBuilder.buildTabTitle.titleAll=Common Files (All Data Sources, %s)",
238  "CommonFilesMetadataBuilder.buildTabTitle.titleSingle=Common Files (Match Within Data Source: %s, %s)"
239  })
240  protected abstract String buildTabTitle();
241 
242  @NbBundle.Messages({
243  "CommonFilesMetadataBuilder.buildCategorySelectionString.doc=Documents",
244  "CommonFilesMetadataBuilder.buildCategorySelectionString.media=Media",
245  "CommonFilesMetadataBuilder.buildCategorySelectionString.all=All File Categories"
246  })
247  protected String buildCategorySelectionString() {
248  if (!this.filterByDoc && !this.filterByMedia) {
249  return Bundle.CommonFilesMetadataBuilder_buildCategorySelectionString_all();
250  } else {
251  List<String> filters = new ArrayList<>();
252  if (this.filterByDoc) {
253  filters.add(Bundle.CommonFilesMetadataBuilder_buildCategorySelectionString_doc());
254  }
255  if (this.filterByMedia) {
256  filters.add(Bundle.CommonFilesMetadataBuilder_buildCategorySelectionString_media());
257  }
258  return String.join(", ", filters);
259  }
260  }
261 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon May 7 2018
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.