Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
Ingester.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2016 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.Reader;
25 import java.io.UnsupportedEncodingException;
26 import java.util.HashMap;
27 import java.util.Map;
28 import java.util.logging.Level;
29 import org.apache.solr.client.solrj.SolrServerException;
30 import org.apache.solr.common.util.ContentStream;
31 import org.apache.solr.common.SolrInputDocument;
32 import org.openide.util.NbBundle;
36 import org.sleuthkit.datamodel.AbstractContent;
37 import org.sleuthkit.datamodel.AbstractFile;
38 import org.sleuthkit.datamodel.Content;
39 import org.sleuthkit.datamodel.ContentVisitor;
40 import org.sleuthkit.datamodel.DerivedFile;
41 import org.sleuthkit.datamodel.Directory;
42 import org.sleuthkit.datamodel.File;
43 import org.sleuthkit.datamodel.LayoutFile;
44 import org.sleuthkit.datamodel.LocalFile;
45 import org.sleuthkit.datamodel.ReadContentInputStream;
46 import org.sleuthkit.datamodel.SlackFile;
47 import org.sleuthkit.datamodel.TskCoreException;
48 
52 class Ingester {
53 
54  private static final Logger logger = Logger.getLogger(Ingester.class.getName());
55  private volatile boolean uncommitedIngests = false;
56  private final Server solrServer = KeywordSearch.getServer();
57  private final GetContentFieldsV getContentFieldsV = new GetContentFieldsV();
58  private static Ingester instance;
59 
60  //for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
61  //TODO use a streaming way to add content to /update handler
62  private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
63  private static final String ENCODING = "UTF-8"; //NON-NLS
64 
65  private Ingester() {
66  }
67 
68  public static synchronized Ingester getDefault() {
69  if (instance == null) {
70  instance = new Ingester();
71  }
72  return instance;
73  }
74 
75  @Override
76  @SuppressWarnings("FinalizeDeclaration")
77  protected void finalize() throws Throwable {
78  super.finalize();
79 
80  // Warn if files might have been left uncommited.
81  if (uncommitedIngests) {
82  logger.warning("Ingester was used to add files that it never committed."); //NON-NLS
83  }
84  }
85 
95  void ingest(AbstractFileStringContentStream afscs) throws IngesterException {
96  Map<String, String> params = getContentFields(afscs.getSourceContent());
97  ingest(afscs, params, afscs.getSourceContent().getSize());
98  }
99 
112  void ingest(TextExtractor fe) throws IngesterException {
113  Map<String, String> params = getContentFields(fe.getSourceFile());
114 
115  params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
116 
117  ingest(new NullContentStream(fe.getSourceFile()), params, 0);
118  }
119 
132  void ingest(AbstractFileChunk fec, ByteContentStream bcs, int size) throws IngesterException {
133  AbstractContent sourceContent = bcs.getSourceContent();
134  Map<String, String> params = getContentFields(sourceContent);
135 
136  //overwrite id with the chunk id
137  params.put(Server.Schema.ID.toString(),
138  Server.getChunkIdString(sourceContent.getId(), fec.getChunkNumber()));
139 
140  ingest(bcs, params, size);
141  }
142 
156  void ingest(AbstractFile file, boolean ingestContent) throws IngesterException {
157  if (ingestContent == false || file.isDir()) {
158  ingest(new NullContentStream(file), getContentFields(file), 0);
159  } else {
160  ingest(new FscContentStream(file), getContentFields(file), file.getSize());
161  }
162  }
163 
171  private Map<String, String> getContentFields(AbstractContent fsc) {
172  return fsc.accept(getContentFieldsV);
173  }
174 
178  private class GetContentFieldsV extends ContentVisitor.Default<Map<String, String>> {
179 
180  @Override
181  protected Map<String, String> defaultVisit(Content cntnt) {
182  return new HashMap<>();
183  }
184 
185  @Override
186  public Map<String, String> visit(File f) {
187  Map<String, String> params = getCommonFields(f);
188  getCommonFileContentFields(params, f);
189  return params;
190  }
191 
192  @Override
193  public Map<String, String> visit(DerivedFile df) {
194  Map<String, String> params = getCommonFields(df);
195  getCommonFileContentFields(params, df);
196  return params;
197  }
198 
199  @Override
200  public Map<String, String> visit(Directory d) {
201  Map<String, String> params = getCommonFields(d);
202  getCommonFileContentFields(params, d);
203  return params;
204  }
205 
206  @Override
207  public Map<String, String> visit(LayoutFile lf) {
208  // layout files do not have times
209  return getCommonFields(lf);
210  }
211 
212  @Override
213  public Map<String, String> visit(LocalFile lf) {
214  Map<String, String> params = getCommonFields(lf);
215  getCommonFileContentFields(params, lf);
216  return params;
217  }
218 
219  @Override
220  public Map<String, String> visit(SlackFile f) {
221  Map<String, String> params = getCommonFields(f);
222  getCommonFileContentFields(params, f);
223  return params;
224  }
225 
226  private Map<String, String> getCommonFileContentFields(Map<String, String> params, AbstractFile file) {
227  params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
228  params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
229  params.put(Server.Schema.MTIME.toString(), ContentUtils.getStringTimeISO8601(file.getMtime(), file));
230  params.put(Server.Schema.CRTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCrtime(), file));
231  return params;
232  }
233 
234  private Map<String, String> getCommonFields(AbstractFile af) {
235  Map<String, String> params = new HashMap<>();
236  params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
237  try {
238  long dataSourceId = af.getDataSource().getId();
239  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
240  } catch (TskCoreException ex) {
241  logger.log(Level.SEVERE, "Could not get data source id to properly index the file {0}", af.getId()); //NON-NLS
242  params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
243  }
244 
245  params.put(Server.Schema.FILE_NAME.toString(), af.getName());
246  return params;
247  }
248  }
249 
266  void ingest(ContentStream cs, Map<String, String> fields, final long size) throws IngesterException {
267  if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
268  //skip the file, image id unknown
269  String msg = NbBundle.getMessage(this.getClass(),
270  "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
271  logger.log(Level.SEVERE, msg);
272  throw new IngesterException(msg);
273  }
274 
275  final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE];
276  SolrInputDocument updateDoc = new SolrInputDocument();
277 
278  for (String key : fields.keySet()) {
279  updateDoc.addField(key, fields.get(key));
280  }
281 
282  //using size here, but we are no longer ingesting entire files
283  //size is normally a chunk size, up to 1MB
284  if (size > 0) {
285  // TODO (RC): Use try with resources, adjust exception messages
286  InputStream is = null;
287  int read = 0;
288  try {
289  is = cs.getStream();
290  read = is.read(docChunkContentBuf);
291  } catch (IOException ex) {
292  throw new IngesterException(
293  NbBundle.getMessage(this.getClass(), "Ingester.ingest.exception.cantReadStream.msg",
294  cs.getName()));
295  } finally {
296  if (null != is) {
297  try {
298  is.close();
299  } catch (IOException ex) {
300  logger.log(Level.WARNING, "Could not close input stream after reading content, " + cs.getName(), ex); //NON-NLS
301  }
302  }
303  }
304 
305  if (read != 0) {
306  String s = "";
307  try {
308  s = new String(docChunkContentBuf, 0, read, ENCODING);
309  // Sanitize by replacing non-UTF-8 characters with caret '^' before adding to index
310  char[] chars = null;
311  for (int i = 0; i < s.length(); i++) {
312  if (!TextUtil.isValidSolrUTF8(s.charAt(i))) {
313  // only convert string to char[] if there is a non-UTF8 character
314  if (chars == null) {
315  chars = s.toCharArray();
316  }
317  chars[i] = '^';
318  }
319  }
320  // check if the string was modified (i.e. there was a non-UTF8 character found)
321  if (chars != null) {
322  s = new String(chars);
323  }
324  } catch (UnsupportedEncodingException ex) {
325  logger.log(Level.SEVERE, "Unsupported encoding", ex); //NON-NLS
326  }
327  updateDoc.addField(Server.Schema.CONTENT.toString(), s);
328  } else {
329  updateDoc.addField(Server.Schema.CONTENT.toString(), "");
330  }
331  } else {
332  //no content, such as case when 0th chunk indexed
333  updateDoc.addField(Server.Schema.CONTENT.toString(), "");
334  }
335 
336  try {
337  //TODO consider timeout thread, or vary socket timeout based on size of indexed content
338  solrServer.addDocument(updateDoc);
339  uncommitedIngests = true;
340  } catch (KeywordSearchModuleException ex) {
341  throw new IngesterException(
342  NbBundle.getMessage(this.getClass(), "Ingester.ingest.exception.err.msg", cs.getName()), ex);
343  }
344 
345  }
346 
354  static int getTimeout(long size) {
355  if (size < 1024 * 1024L) //1MB
356  {
357  return 60;
358  } else if (size < 10 * 1024 * 1024L) //10MB
359  {
360  return 1200;
361  } else if (size < 100 * 1024 * 1024L) //100MB
362  {
363  return 3600;
364  } else {
365  return 3 * 3600;
366  }
367 
368  }
369 
374  void commit() {
375  try {
376  solrServer.commit();
377  uncommitedIngests = false;
378  } catch (NoOpenCoreException | SolrServerException ex) {
379  logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
380  }
381  }
382 
386  private static class FscContentStream implements ContentStream {
387 
388  private AbstractFile f;
389 
390  FscContentStream(AbstractFile f) {
391  this.f = f;
392  }
393 
394  @Override
395  public String getName() {
396  return f.getName();
397  }
398 
399  @Override
400  public String getSourceInfo() {
401  return NbBundle.getMessage(this.getClass(), "Ingester.FscContentStream.getSrcInfo", f.getId());
402  }
403 
404  @Override
405  public String getContentType() {
406  return null;
407  }
408 
409  @Override
410  public Long getSize() {
411  return f.getSize();
412  }
413 
414  @Override
415  public InputStream getStream() throws IOException {
416  return new ReadContentInputStream(f);
417  }
418 
419  @Override
420  public Reader getReader() throws IOException {
421  throw new UnsupportedOperationException(
422  NbBundle.getMessage(this.getClass(), "Ingester.FscContentStream.getReader"));
423  }
424  }
425 
429  private static class NullContentStream implements ContentStream {
430 
431  AbstractContent aContent;
432 
433  NullContentStream(AbstractContent aContent) {
434  this.aContent = aContent;
435  }
436 
437  @Override
438  public String getName() {
439  return aContent.getName();
440  }
441 
442  @Override
443  public String getSourceInfo() {
444  return NbBundle.getMessage(this.getClass(), "Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
445  }
446 
447  @Override
448  public String getContentType() {
449  return null;
450  }
451 
452  @Override
453  public Long getSize() {
454  return 0L;
455  }
456 
457  @Override
458  public InputStream getStream() throws IOException {
459  return new ByteArrayInputStream(new byte[0]);
460  }
461 
462  @Override
463  public Reader getReader() throws IOException {
464  throw new UnsupportedOperationException(
465  NbBundle.getMessage(this.getClass(), "Ingester.NullContentStream.getReader"));
466  }
467  }
468 
473  static class IngesterException extends Exception {
474 
475  private static final long serialVersionUID = 1L;
476 
477  IngesterException(String message, Throwable ex) {
478  super(message, ex);
479  }
480 
481  IngesterException(String message) {
482  super(message);
483  }
484  }
485 }
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile af)
Definition: Ingester.java:234
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Definition: Ingester.java:226

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.