Autopsy  4.6.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.BufferedReader;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.logging.Level;
25 import org.apache.solr.client.solrj.SolrServerException;
26 import org.apache.solr.common.SolrInputDocument;
27 import org.openide.util.NbBundle;
34 import org.sleuthkit.datamodel.AbstractFile;
35 import org.sleuthkit.datamodel.BlackboardArtifact;
36 import org.sleuthkit.datamodel.Content;
37 import org.sleuthkit.datamodel.DerivedFile;
38 import org.sleuthkit.datamodel.Directory;
39 import org.sleuthkit.datamodel.File;
40 import org.sleuthkit.datamodel.LayoutFile;
41 import org.sleuthkit.datamodel.LocalDirectory;
42 import org.sleuthkit.datamodel.LocalFile;
43 import org.sleuthkit.datamodel.Report;
44 import org.sleuthkit.datamodel.SlackFile;
45 import org.sleuthkit.datamodel.SleuthkitItemVisitor;
46 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
47 import org.sleuthkit.datamodel.TskCoreException;
48 
52 //JMTODO: Should this class really be a singleton?
53 class Ingester {
54 
55  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
56  private volatile boolean uncommitedIngests = false;
57  private final Server solrServer = KeywordSearch.getServer();
58  private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
59  private static Ingester instance;
60  private static final int SINGLE_READ_CHARS = 512;
61 
62  private Ingester() {
63  }
64 
65  public static synchronized Ingester getDefault() {
66  if (instance == null) {
67  instance = new Ingester();
68  }
69  return instance;
70  }
71 
72  //JMTODO: this is probably useless
73  @Override
74  @SuppressWarnings("FinalizeDeclaration")
75  protected void finalize() throws Throwable {
76  super.finalize();
77 
78  // Warn if files might have been left uncommited.
79  if (uncommitedIngests) {
80  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
81  }
82  }
83 
94  void indexMetaDataOnly(AbstractFile file) throws IngesterException {
95  indexChunk("", file.getName(), getContentFields(file));
96  }
97 
108  void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
109  indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
110  }
111 
120  private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
121  return item.accept(SOLR_FIELDS_VISITOR);
122  }
123 
143  // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
144  < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
145  final long sourceID = extractor.getID(source);
146  final String sourceName = extractor.getName(source);
147 
148  int numChunks = 0; //unknown until chunking is done
149 
150  if (extractor.isDisabled()) {
151  /*
152  * some Extractors, notable the strings extractor, have options
153  * which can be configured such that no extraction should be done
154  */
155  return true;
156  }
157 
158  Map<String, String> fields = getContentFields(source);
159  //Get a reader for the content of the given source
160  try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
161  Chunker chunker = new Chunker(reader);
162  for (Chunk chunk : chunker) {
163  if (context != null && context.fileIngestIsCancelled()) {
164  logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
165  return false;
166  }
167  String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
168  fields.put(Server.Schema.ID.toString(), chunkId);
169  fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
170  try {
171  //add the chunk text to Solr index
172  indexChunk(chunk.toString(), sourceName, fields);
173  numChunks++;
174  } catch (Ingester.IngesterException ingEx) {
175  extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
176  + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
177 
178  throw ingEx; //need to rethrow to signal error and move on
179  }
180  }
181  if (chunker.hasException()) {
182  extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
183  return false;
184  }
185  } catch (Exception ex) {
186  extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
187  return false;
188  } finally {
189  if (context != null && context.fileIngestIsCancelled()) {
190  return false;
191  } else {
192  //after all chunks, index just the meta data, including the numChunks, of the parent file
193  fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
194  //reset id field to base document id
195  fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
196  //"parent" docs don't have chunk_size
197  fields.remove(Server.Schema.CHUNK_SIZE.toString());
198  indexChunk(null, sourceName, fields);
199  }
200  }
201  return true;
202  }
203 
217  private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
218  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
219  //JMTODO: actually if the we couldn't get the image id it is set to -1,
220  // but does this really mean we don't want to index it?
221 
222  //skip the file, image id unknown
223  String msg = NbBundle.getMessage(Ingester.class,
224  "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
225  logger.log(Level.SEVERE, msg);
226  throw new IngesterException(msg);
227  }
228 
229  //Make a SolrInputDocument out of the field map
230  SolrInputDocument updateDoc = new SolrInputDocument();
231  for (String key : fields.keySet()) {
232  updateDoc.addField(key, fields.get(key));
233  }
234  //add the content to the SolrInputDocument
235  //JMTODO: can we just add it to the field map before passing that in?
236  updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
237 
238  try {
239  //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
240  TimingMetric metric = EnterpriseHealthMonitor.getTimingMetric("Solr: Index chunk");
241  solrServer.addDocument(updateDoc);
242  EnterpriseHealthMonitor.submitTimingMetric(metric);
243  uncommitedIngests = true;
244 
245  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
246  //JMTODO: does this need to be internationalized?
247  throw new IngesterException(
248  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
249  }
250  }
251 
256  void commit() {
257  try {
258  solrServer.commit();
259  uncommitedIngests = false;
260  } catch (NoOpenCoreException | SolrServerException ex) {
261  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
262 
263  }
264  }
265 
269  static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
270 
271  @Override
272  protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
273  return new HashMap<>();
274  }
275 
276  @Override
277  public Map<String, String> visit(File f) {
278  return getCommonAndMACTimeFields(f);
279  }
280 
281  @Override
282  public Map<String, String> visit(DerivedFile df) {
283  return getCommonAndMACTimeFields(df);
284  }
285 
286  @Override
287  public Map<String, String> visit(Directory d) {
288  return getCommonAndMACTimeFields(d);
289  }
290 
291  @Override
292  public Map<String, String> visit(LocalDirectory ld) {
293  return getCommonAndMACTimeFields(ld);
294  }
295 
296  @Override
297  public Map<String, String> visit(LayoutFile lf) {
298  // layout files do not have times
299  return getCommonFields(lf);
300  }
301 
302  @Override
303  public Map<String, String> visit(LocalFile lf) {
304  return getCommonAndMACTimeFields(lf);
305  }
306 
307  @Override
308  public Map<String, String> visit(SlackFile f) {
309  return getCommonAndMACTimeFields(f);
310  }
311 
321  private Map<String, String> getCommonAndMACTimeFields(AbstractFile file) {
322  Map<String, String> params = getCommonFields(file);
323  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
324  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
325  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
326  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
327  return params;
328  }
329 
338  private Map<String, String> getCommonFields(AbstractFile file) {
339  Map<String, String> params = new HashMap<>();
340  params.put(Server.Schema.ID.toString(), Long.toString(file.getId()));
341  try {
342  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
343  } catch (TskCoreException ex) {
344  logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + file.getId(), ex); //NON-NLS
345  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
346  }
347  params.put(Server.Schema.FILE_NAME.toString(), file.getName());
348  return params;
349  }
350 
358  @Override
359  public Map<String, String> visit(BlackboardArtifact artifact) {
360  Map<String, String> params = new HashMap<>();
361  params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
362  try {
363  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
364  } catch (TskCoreException ex) {
365  logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
366  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
367  }
368  return params;
369  }
370 
378  @Override
379  public Map<String, String> visit(Report report) {
380  Map<String, String> params = new HashMap<>();
381  params.put(Server.Schema.ID.toString(), Long.toString(report.getId()));
382  try {
383  Content dataSource = report.getDataSource();
384  if (null == dataSource) {
385  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
386  } else {
387  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
388  }
389  } catch (TskCoreException ex) {
390  logger.log(Level.SEVERE, "Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex); //NON-NLS
391  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
392  }
393  return params;
394  }
395  }
396 
401  static class IngesterException extends Exception {
402 
403  private static final long serialVersionUID = 1L;
404 
405  IngesterException(String message, Throwable ex) {
406  super(message, ex);
407  }
408 
409  IngesterException(String message) {
410  super(message);
411  }
412  }
413 }
Map< String, String > visit(LocalDirectory ld)
Definition: Ingester.java:292
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Definition: Ingester.java:321
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Definition: Ingester.java:338
Map< String, String > visit(BlackboardArtifact artifact)
Definition: Ingester.java:359
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)
Definition: Ingester.java:272

Copyright © 2012-2016 Basis Technology. Generated on: Mon May 7 2018
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.