Autopsy  4.12.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.BufferedReader;
22 import java.io.Reader;
23 import java.util.HashMap;
24 import java.util.Map;
25 import java.util.logging.Level;
26 import org.apache.commons.lang3.math.NumberUtils;
27 import org.apache.solr.client.solrj.SolrServerException;
28 import org.apache.solr.common.SolrInputDocument;
29 import org.openide.util.NbBundle;
36 import org.sleuthkit.datamodel.AbstractFile;
37 import org.sleuthkit.datamodel.BlackboardArtifact;
38 import org.sleuthkit.datamodel.Content;
39 import org.sleuthkit.datamodel.DerivedFile;
40 import org.sleuthkit.datamodel.Directory;
41 import org.sleuthkit.datamodel.File;
42 import org.sleuthkit.datamodel.LayoutFile;
43 import org.sleuthkit.datamodel.LocalDirectory;
44 import org.sleuthkit.datamodel.LocalFile;
45 import org.sleuthkit.datamodel.Report;
46 import org.sleuthkit.datamodel.SlackFile;
47 import org.sleuthkit.datamodel.SleuthkitItemVisitor;
48 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
49 import org.sleuthkit.datamodel.TskCoreException;
50 
54 //JMTODO: Should this class really be a singleton?
55 class Ingester {
56 
57  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
58  private volatile boolean uncommitedIngests = false;
59  private final Server solrServer = KeywordSearch.getServer();
60  private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
61  private static Ingester instance;
62 
63  private Ingester() {
64  }
65 
66  public static synchronized Ingester getDefault() {
67  if (instance == null) {
68  instance = new Ingester();
69  }
70  return instance;
71  }
72 
73  //JMTODO: this is probably useless
74  @Override
75  @SuppressWarnings("FinalizeDeclaration")
76  protected void finalize() throws Throwable {
77  super.finalize();
78 
79  // Warn if files might have been left uncommited.
80  if (uncommitedIngests) {
81  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
82  }
83  }
84 
95  void indexMetaDataOnly(AbstractFile file) throws IngesterException {
96  indexChunk("", file.getName().toLowerCase(), getContentFields(file));
97  }
98 
109  void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException {
110  indexChunk("", sourceName, getContentFields(artifact));
111  }
112 
121  private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
122  return item.accept(SOLR_FIELDS_VISITOR);
123  }
124 
142  // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
143  < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
144  int numChunks = 0; //unknown until chunking is done
145 
146  Map<String, String> fields = getContentFields(source);
147  //Get a reader for the content of the given source
148  try (BufferedReader reader = new BufferedReader(sourceReader)) {
149  Chunker chunker = new Chunker(reader);
150  for (Chunk chunk : chunker) {
151  if (context != null && context.fileIngestIsCancelled()) {
152  logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
153  return false;
154  }
155  String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
156  fields.put(Server.Schema.ID.toString(), chunkId);
157  fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
158  try {
159  //add the chunk text to Solr index
160  indexChunk(chunk.toString(), sourceName, fields);
161  numChunks++;
162  } catch (Ingester.IngesterException ingEx) {
163  logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
164  + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
165 
166  throw ingEx; //need to rethrow to signal error and move on
167  }
168  }
169  if (chunker.hasException()) {
170  logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
171  return false;
172  }
173  } catch (Exception ex) {
174  logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
175  return false;
176  } finally {
177  if (context != null && context.fileIngestIsCancelled()) {
178  return false;
179  } else {
180  //after all chunks, index just the meta data, including the numChunks, of the parent file
181  fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
182  //reset id field to base document id
183  fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
184  //"parent" docs don't have chunk_size
185  fields.remove(Server.Schema.CHUNK_SIZE.toString());
186  indexChunk(null, sourceName, fields);
187  }
188  }
189  return true;
190  }
191 
205  private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
206  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
207  //JMTODO: actually if the we couldn't get the image id it is set to -1,
208  // but does this really mean we don't want to index it?
209 
210  //skip the file, image id unknown
211  String msg = NbBundle.getMessage(Ingester.class,
212  "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
213  logger.log(Level.SEVERE, msg);
214  throw new IngesterException(msg);
215  }
216 
217  //Make a SolrInputDocument out of the field map
218  SolrInputDocument updateDoc = new SolrInputDocument();
219  for (String key : fields.keySet()) {
220  updateDoc.addField(key, fields.get(key));
221  }
222 
223  try {
224  //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
225 
226  //add the content to the SolrInputDocument
227  //JMTODO: can we just add it to the field map before passing that in?
228  updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
229 
230  // We also add the content (if present) in lowercase form to facilitate case
231  // insensitive substring/regular expression search.
232  double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
233  if (indexSchemaVersion >= 2.1) {
234  updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : chunk.toLowerCase()));
235  }
236 
237  TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
238 
239  solrServer.addDocument(updateDoc);
240  HealthMonitor.submitTimingMetric(metric);
241  uncommitedIngests = true;
242 
243  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
244  //JMTODO: does this need to be internationalized?
245  throw new IngesterException(
246  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
247  }
248  }
249 
254  void commit() {
255  try {
256  solrServer.commit();
257  uncommitedIngests = false;
258  } catch (NoOpenCoreException | SolrServerException ex) {
259  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
260 
261  }
262  }
263 
267  static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
268 
269  @Override
270  protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
271  return new HashMap<>();
272  }
273 
274  @Override
275  public Map<String, String> visit(File f) {
276  return getCommonAndMACTimeFields(f);
277  }
278 
279  @Override
280  public Map<String, String> visit(DerivedFile df) {
281  return getCommonAndMACTimeFields(df);
282  }
283 
284  @Override
285  public Map<String, String> visit(Directory d) {
286  return getCommonAndMACTimeFields(d);
287  }
288 
289  @Override
290  public Map<String, String> visit(LocalDirectory ld) {
291  return getCommonAndMACTimeFields(ld);
292  }
293 
294  @Override
295  public Map<String, String> visit(LayoutFile lf) {
296  // layout files do not have times
297  return getCommonFields(lf);
298  }
299 
300  @Override
301  public Map<String, String> visit(LocalFile lf) {
302  return getCommonAndMACTimeFields(lf);
303  }
304 
305  @Override
306  public Map<String, String> visit(SlackFile f) {
307  return getCommonAndMACTimeFields(f);
308  }
309 
319  private Map<String, String> getCommonAndMACTimeFields(AbstractFile file) {
320  Map<String, String> params = getCommonFields(file);
321  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
322  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
323  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
324  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
325  return params;
326  }
327 
336  private Map<String, String> getCommonFields(AbstractFile file) {
337  Map<String, String> params = new HashMap<>();
338  params.put(Server.Schema.ID.toString(), Long.toString(file.getId()));
339  try {
340  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
341  } catch (TskCoreException ex) {
342  logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + file.getId(), ex); //NON-NLS
343  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
344  }
345  params.put(Server.Schema.FILE_NAME.toString(), file.getName().toLowerCase());
346  return params;
347  }
348 
356  @Override
357  public Map<String, String> visit(BlackboardArtifact artifact) {
358  Map<String, String> params = new HashMap<>();
359  params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
360  try {
361  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(artifact.getDataSource().getId()));
362  } catch (TskCoreException ex) {
363  logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
364  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
365  }
366  return params;
367  }
368 
376  @Override
377  public Map<String, String> visit(Report report) {
378  Map<String, String> params = new HashMap<>();
379  params.put(Server.Schema.ID.toString(), Long.toString(report.getId()));
380  try {
381  Content dataSource = report.getDataSource();
382  if (null == dataSource) {
383  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
384  } else {
385  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
386  }
387  } catch (TskCoreException ex) {
388  logger.log(Level.SEVERE, "Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex); //NON-NLS
389  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
390  }
391  return params;
392  }
393  }
394 
399  static class IngesterException extends Exception {
400 
401  private static final long serialVersionUID = 1L;
402 
403  IngesterException(String message, Throwable ex) {
404  super(message, ex);
405  }
406 
407  IngesterException(String message) {
408  super(message);
409  }
410  }
411 }
Map< String, String > visit(LocalDirectory ld)
Definition: Ingester.java:290
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Definition: Ingester.java:319
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Definition: Ingester.java:336
Map< String, String > visit(BlackboardArtifact artifact)
Definition: Ingester.java:357
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)
Definition: Ingester.java:270

Copyright © 2012-2018 Basis Technology. Generated on: Wed Sep 18 2019
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.