Autopsy  4.5.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.BufferedReader;
22 import java.util.HashMap;
23 import java.util.Map;
24 import java.util.logging.Level;
25 import org.apache.solr.client.solrj.SolrServerException;
26 import org.apache.solr.common.SolrInputDocument;
27 import org.openide.util.NbBundle;
32 import org.sleuthkit.datamodel.AbstractFile;
33 import org.sleuthkit.datamodel.BlackboardArtifact;
34 import org.sleuthkit.datamodel.DerivedFile;
35 import org.sleuthkit.datamodel.Directory;
36 import org.sleuthkit.datamodel.File;
37 import org.sleuthkit.datamodel.LayoutFile;
38 import org.sleuthkit.datamodel.LocalDirectory;
39 import org.sleuthkit.datamodel.LocalFile;
40 import org.sleuthkit.datamodel.SlackFile;
41 import org.sleuthkit.datamodel.SleuthkitItemVisitor;
42 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
43 import org.sleuthkit.datamodel.TskCoreException;
44 
48 //JMTODO: Should this class really be a singleton?
49 class Ingester {
50 
51  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
52  private volatile boolean uncommitedIngests = false;
53  private final Server solrServer = KeywordSearch.getServer();
54  private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
55  private static Ingester instance;
56  private static final int SINGLE_READ_CHARS = 512;
57 
58  private Ingester() {
59  }
60 
61  public static synchronized Ingester getDefault() {
62  if (instance == null) {
63  instance = new Ingester();
64  }
65  return instance;
66  }
67 
68  //JMTODO: this is probably useless
69  @Override
70  @SuppressWarnings("FinalizeDeclaration")
71  protected void finalize() throws Throwable {
72  super.finalize();
73 
74  // Warn if files might have been left uncommited.
75  if (uncommitedIngests) {
76  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
77  }
78  }
79 
90  void indexMetaDataOnly(AbstractFile file) throws IngesterException {
91  indexChunk("", file.getName(), getContentFields(file));
92  }
93 
104  void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
105  indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
106  }
107 
116  private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
117  return item.accept(SOLR_FIELDS_VISITOR);
118  }
119 
139  // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
140  < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
141  final long sourceID = extractor.getID(source);
142  final String sourceName = extractor.getName(source);
143 
144  int numChunks = 0; //unknown until chunking is done
145 
146  if (extractor.isDisabled()) {
147  /*
148  * some Extractors, notable the strings extractor, have options
149  * which can be configured such that no extraction should be done
150  */
151  return true;
152  }
153 
154  Map<String, String> fields = getContentFields(source);
155  //Get a reader for the content of the given source
156  try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
157  Chunker chunker = new Chunker(reader);
158  for (Chunk chunk : chunker) {
159  if (context != null && context.fileIngestIsCancelled()) {
160  logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
161  return false;
162  }
163  String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
164  fields.put(Server.Schema.ID.toString(), chunkId);
165  fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
166  try {
167  //add the chunk text to Solr index
168  indexChunk(chunk.toString(), sourceName, fields);
169  numChunks++;
170  } catch (Ingester.IngesterException ingEx) {
171  extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
172  + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
173 
174  throw ingEx; //need to rethrow to signal error and move on
175  }
176  }
177  if (chunker.hasException()) {
178  extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
179  return false;
180  }
181  } catch (Exception ex) {
182  extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
183  return false;
184  } finally {
185  if (context != null && context.fileIngestIsCancelled()) {
186  return false;
187  } else {
188  //after all chunks, index just the meta data, including the numChunks, of the parent file
189  fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
190  //reset id field to base document id
191  fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
192  //"parent" docs don't have chunk_size
193  fields.remove(Server.Schema.CHUNK_SIZE.toString());
194  indexChunk(null, sourceName, fields);
195  }
196  }
197  return true;
198  }
199 
213  private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
214  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
215  //JMTODO: actually if the we couldn't get the image id it is set to -1,
216  // but does this really mean we don't want to index it?
217 
218  //skip the file, image id unknown
219  String msg = NbBundle.getMessage(Ingester.class,
220  "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
221  logger.log(Level.SEVERE, msg);
222  throw new IngesterException(msg);
223  }
224 
225  //Make a SolrInputDocument out of the field map
226  SolrInputDocument updateDoc = new SolrInputDocument();
227  for (String key : fields.keySet()) {
228  updateDoc.addField(key, fields.get(key));
229  }
230  //add the content to the SolrInputDocument
231  //JMTODO: can we just add it to the field map before passing that in?
232  updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
233 
234  try {
235  //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
236  solrServer.addDocument(updateDoc);
237  uncommitedIngests = true;
238 
239  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
240  //JMTODO: does this need to be internationalized?
241  throw new IngesterException(
242  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
243  }
244  }
245 
250  void commit() {
251  try {
252  solrServer.commit();
253  uncommitedIngests = false;
254  } catch (NoOpenCoreException | SolrServerException ex) {
255  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
256 
257  }
258  }
259 
263  static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
264 
265  @Override
266  protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
267  return new HashMap<>();
268  }
269 
270  @Override
271  public Map<String, String> visit(File f) {
272  return getCommonAndMACTimeFields(f);
273  }
274 
275  @Override
276  public Map<String, String> visit(DerivedFile df) {
277  return getCommonAndMACTimeFields(df);
278  }
279 
280  @Override
281  public Map<String, String> visit(Directory d) {
282  return getCommonAndMACTimeFields(d);
283  }
284 
285  @Override
286  public Map<String, String> visit(LocalDirectory ld) {
287  return getCommonAndMACTimeFields(ld);
288  }
289 
290  @Override
291  public Map<String, String> visit(LayoutFile lf) {
292  // layout files do not have times
293  return getCommonFields(lf);
294  }
295 
296  @Override
297  public Map<String, String> visit(LocalFile lf) {
298  return getCommonAndMACTimeFields(lf);
299  }
300 
301  @Override
302  public Map<String, String> visit(SlackFile f) {
303  return getCommonAndMACTimeFields(f);
304  }
305 
315  private Map<String, String> getCommonAndMACTimeFields(AbstractFile file) {
316  Map<String, String> params = getCommonFields(file);
317  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
318  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
319  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
320  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
321  return params;
322  }
323 
332  private Map<String, String> getCommonFields(AbstractFile file) {
333  Map<String, String> params = new HashMap<>();
334  params.put(Server.Schema.ID.toString(), Long.toString(file.getId()));
335  try {
336  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
337  } catch (TskCoreException ex) {
338  logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + file.getId(), ex); //NON-NLS
339  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
340  }
341  params.put(Server.Schema.FILE_NAME.toString(), file.getName());
342  return params;
343  }
344 
352  @Override
353  public Map<String, String> visit(BlackboardArtifact artifact) {
354  Map<String, String> params = new HashMap<>();
355  params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
356  try {
357  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
358  } catch (TskCoreException ex) {
359  logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
360  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
361  }
362  return params;
363  }
364  }
365 
370  static class IngesterException extends Exception {
371 
372  private static final long serialVersionUID = 1L;
373 
374  IngesterException(String message, Throwable ex) {
375  super(message, ex);
376  }
377 
378  IngesterException(String message) {
379  super(message);
380  }
381  }
382 }
Map< String, String > visit(LocalDirectory ld)
Definition: Ingester.java:286
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Definition: Ingester.java:315
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Definition: Ingester.java:332
Map< String, String > visit(BlackboardArtifact artifact)
Definition: Ingester.java:353
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)
Definition: Ingester.java:266

Copyright © 2012-2016 Basis Technology. Generated on: Tue Feb 20 2018
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.