19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.BufferedReader;
 
   22 import java.io.Reader;
 
   23 import java.util.HashMap;
 
   25 import java.util.logging.Level;
 
   26 import org.apache.commons.lang3.math.NumberUtils;
 
   27 import org.apache.solr.client.solrj.SolrServerException;
 
   28 import org.apache.solr.common.SolrInputDocument;
 
   29 import org.openide.util.NbBundle;
 
   57     private static final Logger logger = Logger.getLogger(Ingester.class.getName());
 
   58     private volatile boolean uncommitedIngests = 
false;
 
   59     private final Server solrServer = KeywordSearch.getServer();
 
   60     private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = 
new SolrFieldsVisitor();
 
   61     private static Ingester instance;
 
   66     public static synchronized Ingester getDefault() {
 
   67         if (instance == null) {
 
   68             instance = 
new Ingester();
 
   75     @SuppressWarnings(
"FinalizeDeclaration")
 
   76     protected 
void finalize() throws Throwable {
 
   80         if (uncommitedIngests) {
 
   81             logger.warning(
"Ingester was used to add files that it never committed."); 
 
   95     void indexMetaDataOnly(AbstractFile file) 
throws IngesterException {
 
   96         indexChunk(
"", file.getName().toLowerCase(), getContentFields(file));
 
  109     void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) 
throws IngesterException {
 
  110         indexChunk(
"", sourceName, getContentFields(artifact));
 
  121     private Map<String, String> getContentFields(SleuthkitVisitableItem item) {
 
  122         return item.accept(SOLR_FIELDS_VISITOR);
 
  145     < T extends SleuthkitVisitableItem> 
boolean indexText(Reader sourceReader, 
long sourceID, String sourceName, T source, IngestJobContext context) 
throws Ingester.IngesterException {
 
  148         Map<String, String> fields = getContentFields(source);
 
  150         try (BufferedReader reader = 
new BufferedReader(sourceReader)) {
 
  151             Chunker chunker = 
new Chunker(reader);
 
  152             for (Chunk chunk : chunker) {
 
  153                 if (context != null && context.fileIngestIsCancelled()) {
 
  154                     logger.log(Level.INFO, 
"File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
 
  157                 String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
 
  158                 fields.put(Server.Schema.ID.toString(), chunkId);
 
  159                 fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
 
  162                     indexChunk(chunk.toString(), sourceName, fields);
 
  164                 } 
catch (Ingester.IngesterException ingEx) {
 
  165                     logger.log(Level.WARNING, 
"Ingester had a problem with extracted string from file '"  
  166                             + sourceName + 
"' (id: " + sourceID + 
").", ingEx);
 
  171             if (chunker.hasException()) {
 
  172                 logger.log(Level.WARNING, 
"Error chunking content from " + sourceID + 
": " + sourceName, chunker.getException());
 
  175         } 
catch (Exception ex) {
 
  176             logger.log(Level.WARNING, 
"Unexpected error, can't read content stream from " + sourceID + 
": " + sourceName, ex);
 
  179             if (context != null && context.fileIngestIsCancelled()) {
 
  183                 fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
 
  185                 fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
 
  187                 fields.remove(Server.Schema.CHUNK_SIZE.toString());
 
  188                 indexChunk(null, sourceName, fields);
 
  207     private void indexChunk(String chunk, String sourceName, Map<String, String> fields) 
throws IngesterException {
 
  208         if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
 
  213             String msg = NbBundle.getMessage(Ingester.class,
 
  214                     "Ingester.ingest.exception.unknownImgId.msg", sourceName); 
 
  215             logger.log(Level.SEVERE, msg);
 
  216             throw new IngesterException(msg);
 
  220         SolrInputDocument updateDoc = 
new SolrInputDocument();
 
  221         for (String key : fields.keySet()) {
 
  222             updateDoc.addField(key, fields.get(key));
 
  230             updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
 
  234             double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
 
  235             if (indexSchemaVersion >= 2.1) {
 
  236                 updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? 
"" : chunk.toLowerCase()));
 
  239             TimingMetric metric = HealthMonitor.getTimingMetric(
"Solr: Index chunk");
 
  241             solrServer.addDocument(updateDoc);
 
  242             HealthMonitor.submitTimingMetric(metric);
 
  243             uncommitedIngests = 
true;
 
  245         } 
catch (KeywordSearchModuleException | NoOpenCoreException ex) {
 
  247             throw new IngesterException(
 
  248                     NbBundle.getMessage(Ingester.class, 
"Ingester.ingest.exception.err.msg", sourceName), ex);
 
  259             uncommitedIngests = 
false;
 
  260         } 
catch (NoOpenCoreException | SolrServerException ex) {
 
  261             logger.log(Level.WARNING, 
"Error commiting index", ex); 
 
  269     static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
 
  272         protected Map<String, String> 
defaultVisit(SleuthkitVisitableItem svi) {
 
  273             return new HashMap<>();
 
  277         public Map<String, String> 
visit(File f) {
 
  282         public Map<String, String> 
visit(DerivedFile df) {
 
  287         public Map<String, String> 
visit(Directory d) {
 
  292         public Map<String, String> 
visit(LocalDirectory ld) {
 
  297         public Map<String, String> 
visit(LayoutFile lf) {
 
  303         public Map<String, String> 
visit(LocalFile lf) {
 
  308         public Map<String, String> 
visit(SlackFile f) {
 
  339             Map<String, String> params = 
new HashMap<>();
 
  340             params.put(
Server.
Schema.ID.toString(), Long.toString(file.getId()));
 
  342                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(file.getDataSource().getId()));
 
  343             } 
catch (TskCoreException ex) {
 
  344                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the file " + file.getId(), ex); 
 
  345                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  347             params.put(
Server.
Schema.FILE_NAME.toString(), file.getName().toLowerCase());
 
  359         public Map<String, String> 
visit(BlackboardArtifact artifact) {
 
  360             Map<String, String> params = 
new HashMap<>();
 
  361             params.put(
Server.
Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
 
  363                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(artifact.getDataSource().getId()));
 
  364             } 
catch (TskCoreException ex) {
 
  365                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); 
 
  366                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  379         public Map<String, String> 
visit(Report report) {
 
  380             Map<String, String> params = 
new HashMap<>();
 
  381             params.put(
Server.
Schema.ID.toString(), Long.toString(report.getId()));
 
  383                 Content dataSource = report.getDataSource();
 
  384                 if (null == dataSource) {
 
  385                     params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  387                     params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
 
  389             } 
catch (TskCoreException ex) {
 
  390                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the report, using default value. Id: " + report.getId(), ex); 
 
  391                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  401     static class IngesterException 
extends Exception {
 
  403         private static final long serialVersionUID = 1L;
 
  405         IngesterException(String message, Throwable ex) {
 
  409         IngesterException(String message) {
 
Map< String, String > visit(Report report)
Map< String, String > visit(LayoutFile lf)
Map< String, String > visit(File f)
Map< String, String > visit(LocalDirectory ld)
Map< String, String > getCommonAndMACTimeFields(AbstractFile file)
Map< String, String > visit(SlackFile f)
Map< String, String > visit(Directory d)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > getCommonFields(AbstractFile file)
Map< String, String > visit(DerivedFile df)
Map< String, String > visit(BlackboardArtifact artifact)
Map< String, String > visit(LocalFile lf)
Map< String, String > defaultVisit(SleuthkitVisitableItem svi)