19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.io.BufferedReader;
22 import java.util.HashMap;
24 import java.util.logging.Level;
25 import org.apache.solr.client.solrj.SolrServerException;
26 import org.apache.solr.common.SolrInputDocument;
27 import org.openide.util.NbBundle;
51 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
52 private volatile boolean uncommitedIngests =
false;
53 private final Server solrServer = KeywordSearch.getServer();
54 private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR =
new SolrFieldsVisitor();
55 private static Ingester instance;
56 private static final int SINGLE_READ_CHARS = 512;
61 public static synchronized Ingester getDefault() {
62 if (instance == null) {
63 instance =
new Ingester();
70 @SuppressWarnings(
"FinalizeDeclaration")
71 protected
void finalize() throws Throwable {
75 if (uncommitedIngests) {
76 logger.warning(
"Ingester was used to add files that it never committed.");
90 void indexMetaDataOnly(AbstractFile file)
throws IngesterException {
91 indexChunk(
"", file.getName(), getContentFields(file));
104 void indexMetaDataOnly(BlackboardArtifact artifact)
throws IngesterException {
105 indexChunk(
"",
new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
116 private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
117 return item.accept(SOLR_FIELDS_VISITOR);
140 < T extends SleuthkitVisitableItem>
boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context)
throws Ingester.IngesterException {
141 final long sourceID = extractor.getID(source);
142 final String sourceName = extractor.getName(source);
146 if (extractor.isDisabled()) {
154 Map<String, String> fields = getContentFields(source);
156 try (BufferedReader reader =
new BufferedReader(extractor.getReader(source));) {
157 Chunker chunker =
new Chunker(reader);
158 for (Chunk chunk : chunker) {
159 if (context != null && context.fileIngestIsCancelled()) {
160 logger.log(Level.INFO,
"File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
163 String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
164 fields.put(Server.Schema.ID.toString(), chunkId);
165 fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
168 indexChunk(chunk.toString(), sourceName, fields);
170 }
catch (Ingester.IngesterException ingEx) {
171 extractor.logWarning(
"Ingester had a problem with extracted string from file '"
172 + sourceName +
"' (id: " + sourceID +
").", ingEx);
177 if (chunker.hasException()) {
178 extractor.logWarning(
"Error chunking content from " + sourceID +
": " + sourceName, chunker.getException());
181 }
catch (Exception ex) {
182 extractor.logWarning(
"Unexpected error, can't read content stream from " + sourceID +
": " + sourceName, ex);
185 if (context != null && context.fileIngestIsCancelled()) {
189 fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
191 fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
193 fields.remove(Server.Schema.CHUNK_SIZE.toString());
194 indexChunk(null, sourceName, fields);
213 private void indexChunk(String chunk, String sourceName, Map<String, String> fields)
throws IngesterException {
214 if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
219 String msg = NbBundle.getMessage(Ingester.class,
220 "Ingester.ingest.exception.unknownImgId.msg", sourceName);
221 logger.log(Level.SEVERE, msg);
222 throw new IngesterException(msg);
226 SolrInputDocument updateDoc =
new SolrInputDocument();
227 for (String key : fields.keySet()) {
228 updateDoc.addField(key, fields.get(key));
232 updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
236 solrServer.addDocument(updateDoc);
237 uncommitedIngests =
true;
239 }
catch (KeywordSearchModuleException | NoOpenCoreException ex) {
241 throw new IngesterException(
242 NbBundle.getMessage(Ingester.class,
"Ingester.ingest.exception.err.msg", sourceName), ex);
253 uncommitedIngests =
false;
254 }
catch (NoOpenCoreException | SolrServerException ex) {
255 logger.log(Level.WARNING,
"Error commiting index", ex);
263 static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
266 protected Map<String, String>
defaultVisit(SleuthkitVisitableItem svi) {
267 return new HashMap<>();
271 public Map<String, String>
visit(File f) {
276 public Map<String, String>
visit(DerivedFile df) {
281 public Map<String, String>
visit(Directory d) {
286 public Map<String, String>
visit(LocalDirectory ld) {
291 public Map<String, String>
visit(LayoutFile lf) {
297 public Map<String, String>
visit(LocalFile lf) {
302 public Map<String, String>
visit(SlackFile f) {
333 Map<String, String> params =
new HashMap<>();
334 params.put(
Server.
Schema.ID.toString(), Long.toString(file.getId()));
336 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
337 }
catch (TskCoreException ex) {
338 logger.log(Level.SEVERE,
"Could not get data source id to properly index the file " + file.getId(), ex);
339 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
341 params.put(
Server.
Schema.FILE_NAME.toString(), file.getName());
353 public Map<String, String>
visit(BlackboardArtifact artifact) {
354 Map<String, String> params =
new HashMap<>();
355 params.put(
Server.
Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
357 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
358 }
catch (TskCoreException ex) {
359 logger.log(Level.SEVERE,
"Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex);
360 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
370 static class IngesterException
extends Exception {
372 private static final long serialVersionUID = 1L;
374 IngesterException(String message, Throwable ex) {
378 IngesterException(String message) {
Map< String, String > visit(LayoutFile lf)
Map< String, String > visit(File f)
Map< String, String > visit(LocalDirectory ld)
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Map< String, String > visit(SlackFile f)
Map< String, String > visit(Directory d)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Map< String, String > visit(DerivedFile df)
Map< String, String > visit(BlackboardArtifact artifact)
Map< String, String > visit(LocalFile lf)
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)