19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.Reader;
25 import java.io.UnsupportedEncodingException;
26 import java.util.HashMap;
28 import java.util.logging.Level;
29 import org.apache.solr.client.solrj.SolrServerException;
30 import org.apache.solr.common.util.ContentStream;
31 import org.apache.solr.common.SolrInputDocument;
32 import org.openide.util.NbBundle;
54 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
55 private volatile boolean uncommitedIngests =
false;
56 private final Server solrServer = KeywordSearch.getServer();
57 private final GetContentFieldsV getContentFieldsV =
new GetContentFieldsV();
58 private static Ingester instance;
62 private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
63 private static final String ENCODING =
"UTF-8";
68 public static synchronized Ingester getDefault() {
69 if (instance == null) {
70 instance =
new Ingester();
76 @SuppressWarnings(
"FinalizeDeclaration")
77 protected
void finalize() throws Throwable {
81 if (uncommitedIngests) {
82 logger.warning(
"Ingester was used to add files that it never committed.");
95 void ingest(AbstractFileStringContentStream afscs)
throws IngesterException {
96 Map<String, String> params = getContentFields(afscs.getSourceContent());
97 ingest(afscs, params, afscs.getSourceContent().getSize());
112 void ingest(TextExtractor fe)
throws IngesterException {
113 Map<String, String> params = getContentFields(fe.getSourceFile());
115 params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
117 ingest(
new NullContentStream(fe.getSourceFile()), params, 0);
132 void ingest(AbstractFileChunk fec, ByteContentStream bcs,
int size)
throws IngesterException {
133 AbstractContent sourceContent = bcs.getSourceContent();
134 Map<String, String> params = getContentFields(sourceContent);
137 params.put(Server.Schema.ID.toString(),
138 Server.getChunkIdString(sourceContent.getId(), fec.getChunkNumber()));
140 ingest(bcs, params, size);
156 void ingest(AbstractFile file,
boolean ingestContent)
throws IngesterException {
157 if (ingestContent ==
false || file.isDir()) {
158 ingest(
new NullContentStream(file), getContentFields(file), 0);
160 ingest(
new FscContentStream(file), getContentFields(file), file.getSize());
171 private Map<String, String> getContentFields(AbstractContent fsc) {
172 return fsc.accept(getContentFieldsV);
182 return new HashMap<>();
186 public Map<String, String>
visit(File f) {
193 public Map<String, String>
visit(DerivedFile df) {
200 public Map<String, String>
visit(Directory d) {
207 public Map<String, String>
visit(LayoutFile lf) {
213 public Map<String, String>
visit(LocalFile lf) {
220 public Map<String, String>
visit(SlackFile f) {
235 Map<String, String> params =
new HashMap<>();
236 params.put(
Server.
Schema.ID.toString(), Long.toString(af.getId()));
238 long dataSourceId = af.getDataSource().getId();
239 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
240 }
catch (TskCoreException ex) {
241 logger.log(Level.SEVERE,
"Could not get data source id to properly index the file {0}", af.getId());
242 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
245 params.put(
Server.
Schema.FILE_NAME.toString(), af.getName());
266 void ingest(ContentStream cs, Map<String, String> fields,
final long size)
throws IngesterException {
267 if (fields.get(
Server.
Schema.IMAGE_ID.toString()) == null) {
269 String msg = NbBundle.getMessage(this.getClass(),
270 "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
271 logger.log(Level.SEVERE, msg);
272 throw new IngesterException(msg);
275 final byte[] docChunkContentBuf =
new byte[MAX_DOC_CHUNK_SIZE];
276 SolrInputDocument updateDoc =
new SolrInputDocument();
278 for (String key : fields.keySet()) {
279 updateDoc.addField(key, fields.get(key));
286 InputStream is = null;
290 read = is.read(docChunkContentBuf);
291 }
catch (IOException ex) {
292 throw new IngesterException(
293 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.cantReadStream.msg",
299 }
catch (IOException ex) {
300 logger.log(Level.WARNING,
"Could not close input stream after reading content, " + cs.getName(), ex);
308 s =
new String(docChunkContentBuf, 0, read, ENCODING);
311 for (
int i = 0; i < s.length(); i++) {
312 if (!TextUtil.isValidSolrUTF8(s.charAt(i))) {
315 chars = s.toCharArray();
322 s =
new String(chars);
324 }
catch (UnsupportedEncodingException ex) {
325 logger.log(Level.SEVERE,
"Unsupported encoding", ex);
327 updateDoc.addField(Server.Schema.CONTENT.toString(), s);
329 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
333 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
338 solrServer.addDocument(updateDoc);
339 uncommitedIngests =
true;
340 }
catch (KeywordSearchModuleException ex) {
341 throw new IngesterException(
342 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.err.msg", cs.getName()), ex);
354 static int getTimeout(
long size) {
355 if (size < 1024 * 1024L)
358 }
else if (size < 10 * 1024 * 1024L)
361 }
else if (size < 100 * 1024 * 1024L)
377 uncommitedIngests =
false;
378 }
catch (NoOpenCoreException | SolrServerException ex) {
379 logger.log(Level.WARNING,
"Error commiting index", ex);
388 private AbstractFile
f;
401 return NbBundle.getMessage(this.getClass(),
"Ingester.FscContentStream.getSrcInfo", f.getId());
416 return new ReadContentInputStream(f);
421 throw new UnsupportedOperationException(
422 NbBundle.getMessage(
this.getClass(),
"Ingester.FscContentStream.getReader"));
431 AbstractContent aContent;
434 this.aContent = aContent;
439 return aContent.getName();
444 return NbBundle.getMessage(this.getClass(),
"Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
459 return new ByteArrayInputStream(
new byte[0]);
464 throw new UnsupportedOperationException(
465 NbBundle.getMessage(
this.getClass(),
"Ingester.NullContentStream.getReader"));
473 static class IngesterException
extends Exception {
475 private static final long serialVersionUID = 1L;
477 IngesterException(String message, Throwable ex) {
481 IngesterException(String message) {
Map< String, String > visit(Directory d)
Map< String, String > visit(SlackFile f)
Map< String, String > defaultVisit(Content cntnt)
Map< String, String > visit(DerivedFile df)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > visit(File f)
Map< String, String > getCommonFields(AbstractFile af)
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Map< String, String > visit(LocalFile lf)
Map< String, String > visit(LayoutFile lf)