Autopsy  4.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2015 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.Reader;
25 import java.io.UnsupportedEncodingException;
26 import java.util.HashMap;
27 import java.util.Map;
28 import java.util.logging.Level;
29 import org.apache.solr.client.solrj.SolrServerException;
30 import org.apache.solr.common.util.ContentStream;
31 import org.apache.solr.common.SolrInputDocument;
32 import org.openide.util.NbBundle;
35 import org.sleuthkit.datamodel.AbstractContent;
36 import org.sleuthkit.datamodel.AbstractFile;
37 import org.sleuthkit.datamodel.Content;
38 import org.sleuthkit.datamodel.ContentVisitor;
39 import org.sleuthkit.datamodel.DerivedFile;
40 import org.sleuthkit.datamodel.Directory;
41 import org.sleuthkit.datamodel.File;
42 import org.sleuthkit.datamodel.LayoutFile;
43 import org.sleuthkit.datamodel.LocalFile;
44 import org.sleuthkit.datamodel.ReadContentInputStream;
45 import org.sleuthkit.datamodel.TskCoreException;
46 
50 class Ingester {
51 
52  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
53  private volatile boolean uncommitedIngests = false;
54  private final Server solrServer = KeywordSearch.getServer();
55  private final GetContentFieldsV getContentFieldsV = new GetContentFieldsV();
56  private static Ingester instance;
57 
58  //for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
59  //TODO use a streaming way to add content to /update handler
60  private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
61  private static final String docContentEncoding = "UTF-8"; //NON-NLS
62 
63  private Ingester() {
64  }
65 
66  public static synchronized Ingester getDefault() {
67  if (instance == null) {
68  instance = new Ingester();
69  }
70  return instance;
71  }
72 
73  @Override
74  @SuppressWarnings("FinalizeDeclaration")
75  protected void finalize() throws Throwable {
76  super.finalize();
77 
78  // Warn if files might have been left uncommited.
79  if (uncommitedIngests) {
80  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
81  }
82  }
83 
93  void ingest(AbstractFileStringContentStream afscs) throws IngesterException {
94  Map<String, String> params = getContentFields(afscs.getSourceContent());
95  ingest(afscs, params, afscs.getSourceContent().getSize());
96  }
97 
110  void ingest(TextExtractor fe) throws IngesterException {
111  Map<String, String> params = getContentFields(fe.getSourceFile());
112 
113  params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
114 
115  ingest(new NullContentStream(fe.getSourceFile()), params, 0);
116  }
117 
130  void ingest(AbstractFileChunk fec, ByteContentStream bcs, int size) throws IngesterException {
131  AbstractContent sourceContent = bcs.getSourceContent();
132  Map<String, String> params = getContentFields(sourceContent);
133 
134  //overwrite id with the chunk id
135  params.put(Server.Schema.ID.toString(),
136  Server.getChunkIdString(sourceContent.getId(), fec.getChunkId()));
137 
138  ingest(bcs, params, size);
139  }
140 
154  void ingest(AbstractFile file, boolean ingestContent) throws IngesterException {
155  if (ingestContent == false || file.isDir()) {
156  ingest(new NullContentStream(file), getContentFields(file), 0);
157  } else {
158  ingest(new FscContentStream(file), getContentFields(file), file.getSize());
159  }
160  }
161 
169  private Map<String, String> getContentFields(AbstractContent fsc) {
170  return fsc.accept(getContentFieldsV);
171  }
172 
176  private class GetContentFieldsV extends ContentVisitor.Default<Map<String, String>> {
177 
178  @Override
179  protected Map<String, String> defaultVisit(Content cntnt) {
180  return new HashMap<>();
181  }
182 
183  @Override
184  public Map<String, String> visit(File f) {
185  Map<String, String> params = getCommonFields(f);
186  getCommonFileContentFields(params, f);
187  return params;
188  }
189 
190  @Override
191  public Map<String, String> visit(DerivedFile df) {
192  Map<String, String> params = getCommonFields(df);
193  getCommonFileContentFields(params, df);
194  return params;
195  }
196 
197  @Override
198  public Map<String, String> visit(Directory d) {
199  Map<String, String> params = getCommonFields(d);
200  getCommonFileContentFields(params, d);
201  return params;
202  }
203 
204  @Override
205  public Map<String, String> visit(LayoutFile lf) {
206  // layout files do not have times
207  return getCommonFields(lf);
208  }
209 
210  @Override
211  public Map<String, String> visit(LocalFile lf) {
212  Map<String, String> params = getCommonFields(lf);
213  getCommonFileContentFields(params, lf);
214  return params;
215  }
216 
217  private Map<String, String> getCommonFileContentFields(Map<String, String> params, AbstractFile file) {
218  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
219  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
220  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
221  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
222  return params;
223  }
224 
225  private Map<String, String> getCommonFields(AbstractFile af) {
226  Map<String, String> params = new HashMap<>();
227  params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
228  try {
229  long dataSourceId = af.getDataSource().getId();
230  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
231  } catch (TskCoreException ex) {
232  logger.log(Level.SEVERE, "Could not get data source id to properly index the file {0}", af.getId()); //NON-NLS
233  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
234  }
235 
236  params.put(Server.Schema.FILE_NAME.toString(), af.getName());
237  return params;
238  }
239  }
240 
257  void ingest(ContentStream cs, Map<String, String> fields, final long size) throws IngesterException {
258 
259  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
260  //skip the file, image id unknown
261  String msg = NbBundle.getMessage(this.getClass(),
262  "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
263  logger.log(Level.SEVERE, msg);
264  throw new IngesterException(msg);
265  }
266 
267  final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE];
268  SolrInputDocument updateDoc = new SolrInputDocument();
269 
270  for (String key : fields.keySet()) {
271  updateDoc.addField(key, fields.get(key));
272  }
273 
274  //using size here, but we are no longer ingesting entire files
275  //size is normally a chunk size, up to 1MB
276  if (size > 0) {
277  // TODO (RC): Use try with resources, adjust exception messages
278  InputStream is = null;
279  int read = 0;
280  try {
281  is = cs.getStream();
282  read = is.read(docChunkContentBuf);
283  } catch (IOException ex) {
284  throw new IngesterException(
285  NbBundle.getMessage(this.getClass(), "Ingester.ingest.exception.cantReadStream.msg",
286  cs.getName()));
287  } finally {
288  if (null != is) {
289  try {
290  is.close();
291  } catch (IOException ex) {
292  logger.log(Level.WARNING, "Could not close input stream after reading content, " + cs.getName(), ex); //NON-NLS
293  }
294  }
295  }
296 
297  if (read != 0) {
298  String s = "";
299  try {
300  s = new String(docChunkContentBuf, 0, read, docContentEncoding);
301  } catch (UnsupportedEncodingException ex) {
302  logger.log(Level.SEVERE, "Unsupported encoding", ex); //NON-NLS
303  }
304  updateDoc.addField(Server.Schema.CONTENT.toString(), s);
305  } else {
306  updateDoc.addField(Server.Schema.CONTENT.toString(), "");
307  }
308  } else {
309  //no content, such as case when 0th chunk indexed
310  updateDoc.addField(Server.Schema.CONTENT.toString(), "");
311  }
312 
313  try {
314  //TODO consider timeout thread, or vary socket timeout based on size of indexed content
315  solrServer.addDocument(updateDoc);
316  uncommitedIngests = true;
317  } catch (KeywordSearchModuleException ex) {
318  throw new IngesterException(
319  NbBundle.getMessage(this.getClass(), "Ingester.ingest.exception.err.msg", cs.getName()), ex);
320  }
321 
322  }
323 
331  static int getTimeout(long size) {
332  if (size < 1024 * 1024L) //1MB
333  {
334  return 60;
335  } else if (size < 10 * 1024 * 1024L) //10MB
336  {
337  return 1200;
338  } else if (size < 100 * 1024 * 1024L) //100MB
339  {
340  return 3600;
341  } else {
342  return 3 * 3600;
343  }
344 
345  }
346 
351  void commit() {
352  try {
353  solrServer.commit();
354  uncommitedIngests = false;
355  } catch (NoOpenCoreException | SolrServerException ex) {
356  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
357  }
358  }
359 
363  private static class FscContentStream implements ContentStream {
364 
365  private AbstractFile f;
366 
367  FscContentStream(AbstractFile f) {
368  this.f = f;
369  }
370 
371  @Override
372  public String getName() {
373  return f.getName();
374  }
375 
376  @Override
377  public String getSourceInfo() {
378  return NbBundle.getMessage(this.getClass(), "Ingester.FscContentStream.getSrcInfo", f.getId());
379  }
380 
381  @Override
382  public String getContentType() {
383  return null;
384  }
385 
386  @Override
387  public Long getSize() {
388  return f.getSize();
389  }
390 
391  @Override
392  public InputStream getStream() throws IOException {
393  return new ReadContentInputStream(f);
394  }
395 
396  @Override
397  public Reader getReader() throws IOException {
398  throw new UnsupportedOperationException(
399  NbBundle.getMessage(this.getClass(), "Ingester.FscContentStream.getReader"));
400  }
401  }
402 
406  private static class NullContentStream implements ContentStream {
407 
408  AbstractContent aContent;
409 
410  NullContentStream(AbstractContent aContent) {
411  this.aContent = aContent;
412  }
413 
414  @Override
415  public String getName() {
416  return aContent.getName();
417  }
418 
419  @Override
420  public String getSourceInfo() {
421  return NbBundle.getMessage(this.getClass(), "Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
422  }
423 
424  @Override
425  public String getContentType() {
426  return null;
427  }
428 
429  @Override
430  public Long getSize() {
431  return 0L;
432  }
433 
434  @Override
435  public InputStream getStream() throws IOException {
436  return new ByteArrayInputStream(new byte[0]);
437  }
438 
439  @Override
440  public Reader getReader() throws IOException {
441  throw new UnsupportedOperationException(
442  NbBundle.getMessage(this.getClass(), "Ingester.NullContentStream.getReader"));
443  }
444  }
445 
450  static class IngesterException extends Exception {
451 
452  private static final long serialVersionUID = 1L;
453 
454  IngesterException(String message, Throwable ex) {
455  super(message, ex);
456  }
457 
458  IngesterException(String message) {
459  super(message);
460  }
461  }
462 }
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile af)
Definition: Ingester.java:225
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Definition: Ingester.java:217

Copyright © 2012-2015 Basis Technology. Generated on: Wed Apr 6 2016
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.