19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import com.google.common.collect.ImmutableList;
 
   22 import java.io.Reader;
 
   23 import java.util.HashMap;
 
   24 import java.util.List;
 
   26 import java.util.concurrent.atomic.AtomicInteger;
 
   27 import java.util.logging.Level;
 
   28 import org.openide.util.Lookup;
 
   29 import org.openide.util.NbBundle;
 
   30 import org.openide.util.NbBundle.Messages;
 
   31 import org.openide.util.lookup.Lookups;
 
   65     "# {0} - Reason for not starting Solr", 
"KeywordSearchIngestModule.init.tryStopSolrMsg={0}<br />Please try stopping Java Solr processes if any exist and restart the application.",
 
   66     "KeywordSearchIngestModule.init.badInitMsg=Keyword search server was not properly initialized, cannot run keyword search ingest.",
 
   67     "SolrConnectionCheck.Port=Invalid port number.",
 
   68     "# {0} - Reason for not connecting to Solr", 
"KeywordSearchIngestModule.init.exception.errConnToSolr.msg=Error connecting to SOLR server: {0}.",
 
   69     "KeywordSearchIngestModule.startUp.noOpenCore.msg=The index could not be opened or does not exist.",
 
   70     "CannotRunFileTypeDetection=Unable to run file type detection." 
   78     private static final List<String> ARCHIVE_MIME_TYPES
 
   81                     "application/x-7z-compressed", 
 
   82                     "application/x-ace-compressed", 
 
   83                     "application/x-alz-compressed", 
 
   85                     "application/vnd.ms-cab-compressed", 
 
   86                     "application/x-cfs-compressed", 
 
   87                     "application/x-dgc-compressed", 
 
   88                     "application/x-apple-diskimage", 
 
   89                     "application/x-gca-compressed", 
 
   93                     "application/x-rar-compressed", 
 
   94                     "application/x-stuffit", 
 
   95                     "application/x-stuffitx", 
 
   97                     "application/x-archive", 
 
   98                     "application/x-executable", 
 
  102                     "application/x-cpio", 
 
  103                     "application/x-shar", 
 
  105                     "application/x-bzip", 
 
  106                     "application/x-bzip2", 
 
  107                     "application/x-lzip", 
 
  108                     "application/x-lzma", 
 
  109                     "application/x-lzop", 
 
  111                     "application/x-compress"); 
 
  116     enum StringsExtractOptions {
 
  121     enum UpdateFrequency {
 
  127         NONE(Integer.MAX_VALUE),
 
  129         private final int time;
 
  131         UpdateFrequency(
int time) {
 
  141     private Ingester ingester = null;
 
  147     private boolean startedSearching = 
false;
 
  150     private boolean initialized = 
false;
 
  152     private static final AtomicInteger instanceCount = 
new AtomicInteger(0); 
 
  153     private int instanceNum = 0;
 
  166     private static final Map<Long, Map<Long, IngestStatus>> ingestStatus = 
new HashMap<>(); 
 
  177         synchronized (ingestStatus) {
 
  178             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(ingestJobId);
 
  179             if (ingestStatusForJob == null) {
 
  180                 ingestStatusForJob = 
new HashMap<>();
 
  181                 ingestStatus.put(ingestJobId, ingestStatusForJob);
 
  183             ingestStatusForJob.put(fileId, status);
 
  184             ingestStatus.put(ingestJobId, ingestStatusForJob);
 
  189         this.settings = settings;
 
  190         instanceNum = instanceCount.getAndIncrement();
 
  199         "KeywordSearchIngestModule.startupMessage.failedToGetIndexSchema=Failed to get schema version for text index.",
 
  200         "# {0} - Solr version number", 
"KeywordSearchIngestModule.startupException.indexSolrVersionNotSupported=Adding text no longer supported for Solr version {0} of the text index.",
 
  201         "# {0} - schema version number", 
"KeywordSearchIngestModule.startupException.indexSchemaNotSupported=Adding text no longer supported for schema version {0} of the text index.",
 
  202         "KeywordSearchIngestModule.noOpenCase.errMsg=No open case available." 
  210         if (server.coreIsOpen() == 
false) {
 
  215             Index indexInfo = server.getIndexInfo();
 
  216             if (!IndexFinder.getCurrentSolrVersion().equals(indexInfo.getSolrVersion())) {
 
  217                 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSolrVersionNotSupported(indexInfo.getSolrVersion()));
 
  219             if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) {
 
  220                 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion()));
 
  223             throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex);
 
  232         ingester = Ingester.getDefault();
 
  233         this.context = context;
 
  250                     port = Integer.parseInt(properties.getPort());
 
  251                 } 
catch (NumberFormatException ex) {
 
  253                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg() + 
" " + Bundle.SolrConnectionCheck_Port(), ex);
 
  256                     kwsService.
tryConnect(properties.getHost(), port);
 
  263                     if (!server.isRunning()) {
 
  264                         throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()));
 
  268                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex);
 
  275                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex);
 
  279                 List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL();
 
  280                 boolean hasKeywordsForSearch = 
false;
 
  282                     if (settings.keywordListIsEnabled(keywordList.getName()) && !keywordList.getKeywords().isEmpty()) {
 
  283                         hasKeywordsForSearch = 
true;
 
  287                 if (!hasKeywordsForSearch) {
 
  289                             NbBundle.getMessage(this.getClass(), 
"KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));
 
  295         Map<String, String> stringsOptions = KeywordSearchSettings.getStringExtractOptions();
 
  296         stringsConfig.
setExtractUTF8(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF8.toString())));
 
  297         stringsConfig.
setExtractUTF16(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF16.toString())));
 
  300         stringsExtractionContext = Lookups.fixed(stringsConfig);
 
  308         if (initialized == 
false) 
 
  310             logger.log(Level.SEVERE, 
"Skipping processing, module not initialized, file: {0}", abstractFile.getName());  
 
  315         if (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
 
  320         if (KeywordSearchSettings.getSkipKnown() && abstractFile.getKnown().equals(FileKnown.KNOWN)) {
 
  336         if (!startedSearching) {
 
  340             List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists();
 
  341             IngestSearchRunner.getInstance().startJob(context, keywordListNames);
 
  342             startedSearching = 
true;
 
  354         logger.log(Level.INFO, 
"Keyword search ingest module instance {0} shutting down", instanceNum); 
 
  356         if ((initialized == 
false) || (context == null)) {
 
  361             logger.log(Level.INFO, 
"Keyword search ingest module instance {0} stopping search job due to ingest cancellation", instanceNum); 
 
  362             IngestSearchRunner.getInstance().stopJob(jobId);
 
  368         IngestSearchRunner.getInstance().endJob(jobId);
 
  374                 logger.log(Level.INFO, 
"Indexed files count: {0}", numIndexedFiles); 
 
  376                 logger.log(Level.INFO, 
"Indexed file chunks count: {0}", numIndexedChunks); 
 
  378                 logger.log(Level.SEVERE, 
"Error executing Solr queries to check number of indexed files and file chunks", ex); 
 
  381             synchronized (ingestStatus) {
 
  382                 ingestStatus.remove(jobId);
 
  393         stringsExtractionContext = null;
 
  401         int text_ingested = 0;
 
  402         int metadata_ingested = 0;
 
  403         int strings_ingested = 0;
 
  408         synchronized (ingestStatus) {
 
  409             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(jobId);
 
  410             if (ingestStatusForJob == null) {
 
  418                     case METADATA_INGESTED:
 
  421                     case STRINGS_INGESTED:
 
  424                     case SKIPPED_ERROR_TEXTEXTRACT:
 
  427                     case SKIPPED_ERROR_INDEXING:
 
  430                     case SKIPPED_ERROR_IO:
 
  439         StringBuilder msg = 
new StringBuilder();
 
  440         msg.append(
"<table border=0><tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append(
"</td><td>").append(text_ingested).append(
"</td></tr>"); 
 
  441         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append(
"</td><td>").append(strings_ingested).append(
"</td></tr>"); 
 
  442         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.mdOnlyLbl")).append(
"</td><td>").append(metadata_ingested).append(
"</td></tr>"); 
 
  443         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.idxErrLbl")).append(
"</td><td>").append(error_index).append(
"</td></tr>"); 
 
  444         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.errTxtLbl")).append(
"</td><td>").append(error_text).append(
"</td></tr>"); 
 
  445         msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.errIoLbl")).append(
"</td><td>").append(error_io).append(
"</td></tr>"); 
 
  446         msg.append(
"</table>"); 
 
  447         String indexStats = msg.toString();
 
  448         logger.log(Level.INFO, 
"Keyword Indexing Completed: {0}", indexStats); 
 
  450         if (error_index > 0) {
 
  452                     NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.kwIdxErrMsgFiles", error_index));
 
  453         } 
else if (error_io + error_text > 0) {
 
  454             MessageNotifyUtil.
Notify.
warn(NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.kwIdxWarnMsgTitle"),
 
  455                     NbBundle.getMessage(
this.getClass(), 
"KeywordSearchIngestModule.postIndexSummary.idxErrReadFilesMsg"));
 
  482             imageConfig.
setOCREnabled(KeywordSearchSettings.getOcrOption());
 
  484             Lookup extractionContext = Lookups.fixed(imageConfig, terminator);
 
  488                 Reader extractedTextReader = extractor.
getReader();
 
  490                 return Ingester.getDefault().indexText(extractedTextReader, aFile.getId(), aFile.getName(), aFile, context);
 
  511                 Reader extractedTextReader = stringsExtractor.
getReader();
 
  516                     logger.log(Level.WARNING, 
"Failed to extract strings and ingest, file ''{0}'' (id: {1}).", 
new Object[]{aFile.getName(), aFile.getId()});  
 
  521                 logger.log(Level.WARNING, 
"Failed to extract strings and ingest, file '" + aFile.getName() + 
"' (id: " + aFile.getId() + 
").", ex);  
 
  534         private void indexFile(AbstractFile aFile, 
boolean indexContent) {
 
  537             TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
 
  540             if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))) {
 
  544                 extractStringsAndIndex(aFile);
 
  548             final long size = aFile.getSize();
 
  551             if ((indexContent == 
false || aFile.isDir() || size == 0)) {
 
  556                     ingester.indexMetaDataOnly(aFile);
 
  558                 } 
catch (IngesterException ex) {
 
  560                     logger.log(Level.WARNING, 
"Unable to index meta-data for file: " + aFile.getId(), ex); 
 
  568             String fileType = fileTypeDetector.
getMIMEType(aFile);
 
  572             if (ARCHIVE_MIME_TYPES.contains(fileType)) {
 
  577                     ingester.indexMetaDataOnly(aFile);
 
  579                 } 
catch (IngesterException ex) {
 
  581                     logger.log(Level.WARNING, 
"Unable to index meta-data for file: " + aFile.getId(), ex); 
 
  586             boolean wasTextAdded = 
false;
 
  594                 if (fileType.equals(
"application/octet-stream")) {
 
  595                     extractStringsAndIndex(aFile);
 
  598                 if (!extractTextAndIndex(aFile, fileType)) {
 
  606             } 
catch (IngesterException e) {
 
  607                 logger.log(Level.INFO, 
"Could not extract text with Tika, " + aFile.getId() + 
", "  
  608                         + aFile.getName(), e);
 
  610             } 
catch (Exception e) {
 
  611                 logger.log(Level.WARNING, 
"Error extracting text with Tika, " + aFile.getId() + 
", "  
  612                         + aFile.getName(), e);
 
  616             if ((wasTextAdded == 
false) && (aFile.getNameExtension().equalsIgnoreCase(
"txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
 
  620                     TextFileExtractor textFileExtractor = 
new TextFileExtractor();
 
  621                     Reader textReader = textFileExtractor.getReader(aFile);
 
  622                     if (textReader == null) {
 
  623                         logger.log(Level.INFO, 
"Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
 
  624                     } 
else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
 
  628                 } 
catch (IngesterException ex) {
 
  629                     logger.log(Level.WARNING, 
"Unable to index as unicode", ex);
 
  631                     logger.log(Level.INFO, 
"Could not extract text with TextFileExtractor", ex);
 
  636             if (wasTextAdded == 
false) {
 
  637                 extractStringsAndIndex(aFile);
 
int queryNumIndexedFiles()
FileTypeDetector fileTypeDetector
synchronized long decrementAndGet(long jobId)
int queryNumIndexedChunks()
void tryConnect(String host, int port)
static IndexingServerProperties getMultiUserServerProperties(String caseDirectory)
METADATA_INGESTED
No content, so we just text_ingested metadata. 
String getCaseDirectory()
boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat)
void startUp(IngestJobContext context)
static synchronized Server getServer()
synchronized long incrementAndGet(long jobId)
static IngestMessage createMessage(MessageType messageType, String source, String subject, String detailsHtml)
String getMIMEType(AbstractFile file)
final KeywordSearchJobSettings settings
SKIPPED_ERROR_INDEXING
File was skipped because index engine had problems. 
boolean extractStringsAndIndex(AbstractFile aFile)
int queryNumIndexedDocuments()
void postMessage(final IngestMessage message)
boolean fileIngestIsCancelled()
SKIPPED_ERROR_TEXTEXTRACT
File was skipped because of text extraction issues. 
static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status)
ProcessResult process(AbstractFile abstractFile)
static void error(String title, String message)
void indexFile(AbstractFile aFile, boolean indexContent)
synchronized static Logger getLogger(String name)
static Case getCurrentCaseThrows()
static IngestMessage createWarningMessage(String source, String subject, String detailsHtml)
Lookup stringsExtractionContext
static void warn(String title, String message)
static synchronized IngestServices getInstance()
STRINGS_INGESTED
Text was extracted by knowing file type and text_ingested.