api-docs/4.22.0/_keyword_search_ingest_module_8java_source.html

 /*

  * Autopsy Forensic Browser

  *

  * Copyright 2011-2023 Basis Technology Corp.

  * Contact: carrier <at> sleuthkit <dot> org

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 package org.sleuthkit.autopsy.keywordsearch;


 import com.google.common.collect.ImmutableList;

 import com.google.common.collect.ImmutableSet;

 import com.google.common.io.CharSource;

 import java.io.IOException;

 import java.io.Reader;

 import java.text.ParseException;

 import java.text.SimpleDateFormat;

 import java.util.ArrayList;

 import java.util.Collection;

 import java.util.Date;

 import java.util.HashMap;

 import java.util.List;

 import static java.util.Locale.US;

 import java.util.Map;

 import java.util.Map.Entry;

 import java.util.Optional;

 import java.util.concurrent.atomic.AtomicInteger;

 import java.util.logging.Level;

 import java.util.stream.Collectors;

 import java.util.stream.IntStream;

 import java.util.stream.Stream;

 import org.apache.commons.lang3.tuple.Pair;

 import org.apache.commons.lang3.tuple.Triple;

 import org.apache.tika.metadata.DublinCore;

 import org.apache.tika.metadata.FileSystem;

 import org.apache.tika.metadata.IPTC;

 import org.apache.tika.metadata.Office;

 import org.apache.tika.metadata.OfficeOpenXMLCore;

 import org.apache.tika.metadata.OfficeOpenXMLExtended;

 import org.apache.tika.metadata.PDF;

 import org.apache.tika.metadata.Photoshop;

 import org.apache.tika.metadata.TikaCoreProperties;

 import org.apache.tika.metadata.XMP;

 import org.apache.tika.metadata.XMPDM;

 import org.apache.tika.mime.MimeTypes;

 import org.openide.util.Lookup;

 import org.openide.util.NbBundle;

 import org.openide.util.NbBundle.Messages;

 import org.openide.util.lookup.Lookups;

 import org.sleuthkit.autopsy.casemodule.Case;

 import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;

 import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;

 import org.sleuthkit.autopsy.coreutils.Logger;

 import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;

 import org.sleuthkit.autopsy.ingest.FileIngestModule;

 import org.sleuthkit.autopsy.ingest.IngestJobContext;

 import org.sleuthkit.autopsy.ingest.IngestMessage;

 import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;

 import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter;

 import org.sleuthkit.autopsy.ingest.IngestServices;

 import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;

 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;

 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;

 import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;

 import org.sleuthkit.autopsy.textextractors.TextExtractor;

 import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;

 import org.sleuthkit.autopsy.textextractors.TextFileExtractor;

 import org.sleuthkit.autopsy.textextractors.configs.ImageConfig;

 import org.sleuthkit.autopsy.textextractors.configs.StringsConfig;

 import org.sleuthkit.datamodel.AbstractFile;

 import org.sleuthkit.datamodel.Blackboard;

 import org.sleuthkit.datamodel.BlackboardArtifact;

 import org.sleuthkit.datamodel.BlackboardAttribute;

 import org.sleuthkit.datamodel.TskCoreException;

 import org.sleuthkit.datamodel.TskData;

 import org.sleuthkit.datamodel.TskData.FileKnown;

 import org.sleuthkit.datamodel.TskException;


 @NbBundle.Messages({

     "# {0} - Reason for not starting Solr", "KeywordSearchIngestModule.init.tryStopSolrMsg={0}<br />Please try stopping Java Solr processes if any exist and restart the application.",

     "KeywordSearchIngestModule.init.badInitMsg=Keyword search server was not properly initialized, cannot run keyword search ingest.",

     "SolrConnectionCheck.Port=Invalid port number.",

     "# {0} - Reason for not connecting to Solr", "KeywordSearchIngestModule.init.exception.errConnToSolr.msg=Error connecting to SOLR server: {0}.",

     "KeywordSearchIngestModule.startUp.noOpenCore.msg=The index could not be opened or does not exist.",

     "CannotRunFileTypeDetection=Unable to run file type detection."

 })

 public final class KeywordSearchIngestModule implements FileIngestModule {


     private static final int LIMITED_OCR_SIZE_MIN = 100 * 1024;


     static final List<String> ARCHIVE_MIME_TYPES

             = ImmutableList.of(

                     //ignore unstructured binary and compressed data, for which string extraction or unzipper works better

                     "application/x-7z-compressed", //NON-NLS

                     "application/x-ace-compressed", //NON-NLS

                     "application/x-alz-compressed", //NON-NLS

                     "application/x-arj", //NON-NLS

                     "application/vnd.ms-cab-compressed", //NON-NLS

                     "application/x-cfs-compressed", //NON-NLS

                     "application/x-dgc-compressed", //NON-NLS

                     "application/x-apple-diskimage", //NON-NLS

                     "application/x-gca-compressed", //NON-NLS

                     "application/x-dar", //NON-NLS

                     "application/x-lzx", //NON-NLS

                     "application/x-lzh", //NON-NLS

                     "application/x-rar-compressed", //NON-NLS

                     "application/x-stuffit", //NON-NLS

                     "application/x-stuffitx", //NON-NLS

                     "application/x-gtar", //NON-NLS

                     "application/x-archive", //NON-NLS

                     "application/x-executable", //NON-NLS

                     "application/x-gzip", //NON-NLS

                     "application/zip", //NON-NLS

                     "application/x-zoo", //NON-NLS

                     "application/x-cpio", //NON-NLS

                     "application/x-shar", //NON-NLS

                     "application/x-tar", //NON-NLS

                     "application/x-bzip", //NON-NLS

                     "application/x-bzip2", //NON-NLS

                     "application/x-lzip", //NON-NLS

                     "application/x-lzma", //NON-NLS

                     "application/x-lzop", //NON-NLS

                     "application/x-z", //NON-NLS

                     "application/x-compress"); //NON-NLS


     private static final Map<String, Pair<BlackboardAttribute.ATTRIBUTE_TYPE, Integer>> METADATA_TYPES_MAP = Stream.of(

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_MODIFIED, List.of(

                     "Last-Save-Date",

                     TikaCoreProperties.MODIFIED.getName(),

                     FileSystem.MODIFIED.getName(),

                     DublinCore.MODIFIED.getName(),

                     PDF.DOC_INFO_MODIFICATION_DATE.getName(),

                     PDF.PDFVT_MODIFIED.getName(),

                     XMP.MODIFY_DATE.getName(),

                     XMPDM.AUDIO_MOD_DATE.getName(),

                     XMPDM.METADATA_MOD_DATE.getName(),

                     XMPDM.VIDEO_MOD_DATE.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_USER_ID, List.of(

                     "Last-Author",

                     Office.LAST_AUTHOR.getName(),

                     TikaCoreProperties.MODIFIER.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_CREATED, List.of(

                     "Creation-Date",

                     TikaCoreProperties.CREATED.getName(),

                     FileSystem.CREATED.getName(),

                     DublinCore.CREATED.getName(),

                     IPTC.DATE_CREATED.getName(),

                     Office.CREATION_DATE.getName(),

                     PDF.DOC_INFO_CREATED.getName(),

                     Photoshop.DATE_CREATED.getName(),

                     XMP.CREATE_DATE.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ORGANIZATION, List.of(

                     "Company",

                     DublinCore.PUBLISHER.getName(),

                     IPTC.ORGANISATION_NAME.getName(),

                     OfficeOpenXMLExtended.COMPANY.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_OWNER, List.of(

                     "Author",

                     TikaCoreProperties.CREATOR.getName(),

                     DublinCore.CREATOR.getName(),

                     Office.INITIAL_AUTHOR.getName(),

                     Office.AUTHOR.getName(),

                     Photoshop.AUTHORS_POSITION.getName(),

                     PDF.DOC_INFO_CREATOR.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME, List.of(

                     "Application-Name",

                     "Producer",

                     OfficeOpenXMLExtended.APPLICATION.getName(),

                     org.apache.tika.metadata.RTFMetadata.EMB_APP_VERSION.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_LAST_PRINTED_DATETIME, List.of(

                     "Last-Printed",

                     OfficeOpenXMLCore.LAST_PRINTED.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION, List.of(

                     "Title",

                     DublinCore.TITLE.getName(),

                     IPTC.TITLE.getName(),

                     PDF.DOC_INFO_TITLE.getName())),

             Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_VERSION, List.of(

                     PDF.PDF_VERSION.getName(),

                     OfficeOpenXMLCore.VERSION.getName())))

             .flatMap(pr -> {

                 BlackboardAttribute.ATTRIBUTE_TYPE attrType = pr.getKey();

                 List<String> keys = pr.getValue();

                 return IntStream.range(0, keys.size())

                         .mapToObj(idx -> Triple.of(keys.get(idx), attrType, idx));

             })

             .collect(Collectors.toMap(Triple::getLeft, trip -> Pair.of(trip.getMiddle(), trip.getRight()), (v1, v2) -> v1.getRight() < v2.getRight() ? v1 : v2));


     private static final String IMAGE_MIME_TYPE_PREFIX = "image/";


     // documents where OCR is performed

     private static final ImmutableSet<String> OCR_DOCUMENTS = ImmutableSet.of(

             "application/pdf",

             "application/msword",

             "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

             "application/vnd.ms-powerpoint",

             "application/vnd.openxmlformats-officedocument.presentationml.presentation",

             "application/vnd.ms-excel",

             "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"

     );


     enum StringsExtractOptions {

         EXTRACT_UTF16,

         EXTRACT_UTF8,

     };


     private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName());

     private final IngestServices services = IngestServices.getInstance();

     private Ingester ingester = null;

     private FileTypeDetector fileTypeDetector;

 //only search images from current ingest, not images previously ingested/indexed

     //accessed read-only by searcher thread


     private Lookup stringsExtractionContext;

     private final KeywordSearchJobSettings settings;

     private boolean initialized = false;

     private long jobId;

     private static final AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging

     private int instanceNum = 0;

     private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();

     private IngestJobContext context;


     private enum IngestStatus {


         TEXT_INGESTED,

         STRINGS_INGESTED,

         METADATA_INGESTED,

         SKIPPED_ERROR_INDEXING,

         SKIPPED_ERROR_TEXTEXTRACT,

         SKIPPED_ERROR_IO

     };

     private static final Map<Long, Map<Long, IngestStatus>> ingestStatus = new HashMap<>(); //guarded by itself


     private static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status) {

         synchronized (ingestStatus) {

             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(ingestJobId);

             if (ingestStatusForJob == null) {

                 ingestStatusForJob = new HashMap<>();

                 ingestStatus.put(ingestJobId, ingestStatusForJob);

             }

             ingestStatusForJob.put(fileId, status);

             ingestStatus.put(ingestJobId, ingestStatusForJob);

         }

     }


     KeywordSearchIngestModule(KeywordSearchJobSettings settings) {

         this.settings = settings;

         instanceNum = instanceCount.getAndIncrement();

     }


     @Messages({

         "KeywordSearchIngestModule.startupMessage.failedToGetIndexSchema=Failed to get schema version for text index.",

         "# {0} - Solr version number", "KeywordSearchIngestModule.startupException.indexSolrVersionNotSupported=Adding text no longer supported for Solr version {0} of the text index.",

         "# {0} - schema version number", "KeywordSearchIngestModule.startupException.indexSchemaNotSupported=Adding text no longer supported for schema version {0} of the text index.",

         "KeywordSearchIngestModule.noOpenCase.errMsg=No open case available."

     })

     @Override

     public void startUp(IngestJobContext context) throws IngestModuleException {

         initialized = false;

         jobId = context.getJobId();


         Server server = null;

         if (settings.isIndexToSolrEnabled()) {

             server = KeywordSearch.getServer();

             if (server.coreIsOpen() == false) {

                 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg());

             }


             try {

                 Index indexInfo = server.getIndexInfo();

                 if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) {

                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion()));

                 }

             } catch (NoOpenCoreException ex) {

                 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex);

             }

         }


         try {

             fileTypeDetector = new FileTypeDetector();

         } catch (FileTypeDetector.FileTypeDetectorInitException ex) {

             throw new IngestModuleException(Bundle.CannotRunFileTypeDetection(), ex);

         }


         ingester = Ingester.getDefault();

         this.context = context;


         // increment the module reference count

         // if first instance of this module for this job then check the server and existence of keywords

         Case openCase;

         try {

             openCase = Case.getCurrentCaseThrows();

         } catch (NoCurrentCaseException ex) {

             throw new IngestModuleException(Bundle.KeywordSearchIngestModule_noOpenCase_errMsg(), ex);

         }

         if (refCounter.incrementAndGet(jobId) == 1) {

             if (openCase.getCaseType() == Case.CaseType.MULTI_USER_CASE) {

                 // for multi-user cases need to verify connection to remore SOLR server

                 KeywordSearchService kwsService = new SolrSearchService();

                 Server.IndexingServerProperties properties = Server.getMultiUserServerProperties(openCase.getCaseDirectory());

                 int port;

                 try {

                     port = Integer.parseInt(properties.getPort());

                 } catch (NumberFormatException ex) {

                     // if there is an error parsing the port number

                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg() + " " + Bundle.SolrConnectionCheck_Port(), ex);

                 }

                 try {

                     kwsService.tryConnect(properties.getHost(), port);

                 } catch (KeywordSearchServiceException ex) {

                     throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg(), ex);

                 }

             } else {

                 // for single-user cases need to verify connection to local SOLR service

                 // server will be null if indexing is disabled

                 if (server != null) {

                     try {

                         if (!server.isLocalSolrRunning()) {

                             throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()));

                         }

                     } catch (KeywordSearchModuleException ex) {

                         //this means Solr is not properly initialized

                         throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex);

                     }

                     try {

                         // make an actual query to verify that server is responding

                         // we had cases where getStatus was OK, but the connection resulted in a 404

                         server.queryNumIndexedDocuments();

                     } catch (KeywordSearchModuleException | NoOpenCoreException ex) {

                         throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex);

                     }

                 }

                 // check if this job has any searchable keywords

                 List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL();

                 boolean hasKeywordsForSearch = false;

                 for (KeywordList keywordList : keywordLists) {

                     if (settings.keywordListIsEnabled(keywordList.getName()) && !keywordList.getKeywords().isEmpty()) {

                         hasKeywordsForSearch = true;

                         break;

                     }

                 }


                 if (!settings.isIndexToSolrEnabled()) {

                     services.postMessage(IngestMessage.createWarningMessage(KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.SolrIndexingDisabled"),

                             NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.indexingDisabled")));

                 } else {

                     if (!hasKeywordsForSearch) {

                         services.postMessage(IngestMessage.createWarningMessage(KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.noKwInLstMsg"),

                                 NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));

                     }

                 }

             }

         }


         StringsConfig stringsConfig = new StringsConfig();

         Map<String, String> stringsOptions = KeywordSearchSettings.getStringExtractOptions();

         stringsConfig.setExtractUTF8(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF8.toString())));

         stringsConfig.setExtractUTF16(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF16.toString())));

         stringsConfig.setLanguageScripts(KeywordSearchSettings.getStringExtractScripts());


         stringsExtractionContext = Lookups.fixed(stringsConfig);


         initialized = true;

     }


     @Override

     public ProcessResult process(AbstractFile abstractFile) {

         if (initialized == false) //error initializing indexing/Solr

         {

             logger.log(Level.SEVERE, "Skipping processing, module not initialized, file: {0}", abstractFile.getName());  //NON-NLS

             putIngestStatus(jobId, abstractFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

             return ProcessResult.OK;

         }


         if (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {

             //skip indexing of virtual dirs (no content, no real name) - will index children files

             return ProcessResult.OK;

         }


         // if ocr only is enabled and not an ocr file, return

         Optional<TextExtractor> extractorOpt = getExtractor(abstractFile);


         String mimeType = fileTypeDetector.getMIMEType(abstractFile).trim().toLowerCase();


         if (settings.isOCREnabled()) {

             // if ocr only and the extractor is not present or will not perform ocr on this file, continue

             if (settings.isOCROnly() && (!extractorOpt.isPresent() || !extractorOpt.get().willUseOCR())) {

                 return ProcessResult.OK;

             }


             // if limited ocr is enabled, the extractor will use ocr, and

             // the file would not be subject to limited ocr reading, continue

             if (settings.isLimitedOCREnabled() && extractorOpt.isPresent()

                     && extractorOpt.get().willUseOCR() && !isLimitedOCRFile(abstractFile, mimeType)) {

                 return ProcessResult.OK;

             }

         }


         if (KeywordSearchSettings.getSkipKnown() && abstractFile.getKnown().equals(FileKnown.KNOWN)) {

             //index meta-data only

             if (context.fileIngestIsCancelled()) {

                 return ProcessResult.OK;

             }

             searchFile(extractorOpt, abstractFile, mimeType, false);

             return ProcessResult.OK;

         }


         //index the file and content (if the content is supported)

         if (context.fileIngestIsCancelled()) {

             return ProcessResult.OK;

         }

         searchFile(extractorOpt, abstractFile, mimeType, true);


         return ProcessResult.OK;

     }


     @Override

     public void shutDown() {

         logger.log(Level.INFO, "Keyword search ingest module instance {0} shutting down", instanceNum); //NON-NLS


         if ((initialized == false) || (context == null)) {

             return;

         }


         if (context.fileIngestIsCancelled()) {

             logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping due to ingest cancellation", instanceNum); //NON-NLS

             cleanup();

             return;

         }


         // We only need to post the summary msg from the last module per job

         if (refCounter.decrementAndGet(jobId) == 0) {


             try {

                 InlineSearcher.makeArtifacts(context);

                 InlineSearcher.cleanup(context);

                 Ingester.getDefault().commit();

             } catch (TskException ex) {

                 logger.log(Level.SEVERE, String.format("Failed to create search ingest artifacts for job %d", context.getJobId()), ex);

             }


             try {

                 final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();

                 logger.log(Level.INFO, "Indexed files count: {0}", numIndexedFiles); //NON-NLS

                 final int numIndexedChunks = KeywordSearch.getServer().queryNumIndexedChunks();

                 logger.log(Level.INFO, "Indexed file chunks count: {0}", numIndexedChunks); //NON-NLS

             } catch (NoOpenCoreException | KeywordSearchModuleException ex) {

                 logger.log(Level.SEVERE, "Error executing Solr queries to check number of indexed files and file chunks", ex); //NON-NLS

             }

             postIndexSummary();

             synchronized (ingestStatus) {

                 ingestStatus.remove(jobId);

             }

         }


         cleanup();

     }


     private void cleanup() {

         stringsExtractionContext = null;

         initialized = false;

     }


     private boolean isLimitedOCRFile(AbstractFile aFile, String mimeType) {

         if (OCR_DOCUMENTS.contains(mimeType)) {

             return true;

         }


         if (mimeType.startsWith(IMAGE_MIME_TYPE_PREFIX)) {

             return aFile.getSize() > LIMITED_OCR_SIZE_MIN

                     || aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED;

         }


         return false;

     }


     private void postIndexSummary() {

         int text_ingested = 0;

         int metadata_ingested = 0;

         int strings_ingested = 0;

         int error_text = 0;

         int error_index = 0;

         int error_io = 0;


         synchronized (ingestStatus) {

             Map<Long, IngestStatus> ingestStatusForJob = ingestStatus.get(jobId);

             if (ingestStatusForJob == null) {

                 return;

             }

             for (IngestStatus s : ingestStatusForJob.values()) {

                 switch (s) {

                     case TEXT_INGESTED:

                         text_ingested++;

                         break;

                     case METADATA_INGESTED:

                         metadata_ingested++;

                         break;

                     case STRINGS_INGESTED:

                         strings_ingested++;

                         break;

                     case SKIPPED_ERROR_TEXTEXTRACT:

                         error_text++;

                         break;

                     case SKIPPED_ERROR_INDEXING:

                         error_index++;

                         break;

                     case SKIPPED_ERROR_IO:

                         error_io++;

                         break;

                     default:

                        ;

                 }

             }

         }


         StringBuilder msg = new StringBuilder();

         msg.append("<table border=0><tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append("</td><td>").append(text_ingested).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append("</td><td>").append(strings_ingested).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.mdOnlyLbl")).append("</td><td>").append(metadata_ingested).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.idxErrLbl")).append("</td><td>").append(error_index).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.errTxtLbl")).append("</td><td>").append(error_text).append("</td></tr>"); //NON-NLS

         msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.errIoLbl")).append("</td><td>").append(error_io).append("</td></tr>"); //NON-NLS

         msg.append("</table>"); //NON-NLS

         String indexStats = msg.toString();

         logger.log(Level.INFO, "Keyword Indexing Completed: {0}", indexStats); //NON-NLS

         services.postMessage(IngestMessage.createMessage(MessageType.INFO, KeywordSearchModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxResultsLbl"), indexStats));

         if (error_index > 0) {

             MessageNotifyUtil.Notify.error(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxErrsTitle"),

                     NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxErrMsgFiles", error_index));

         } else if (error_io + error_text > 0) {

             MessageNotifyUtil.Notify.warn(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.kwIdxWarnMsgTitle"),

                     NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.idxErrReadFilesMsg"));

         }

     }


     private Optional<TextExtractor> getExtractor(AbstractFile abstractFile) {

         ImageConfig imageConfig = new ImageConfig();

         imageConfig.setOCREnabled(settings.isOCREnabled());

         ProcessTerminator terminator = () -> context.fileIngestIsCancelled();

         Lookup extractionContext = Lookups.fixed(imageConfig, terminator);

         try {

             return Optional.ofNullable(TextExtractorFactory.getExtractor(abstractFile, extractionContext));

         } catch (TextExtractorFactory.NoTextExtractorFound ex) {

             return Optional.empty();

         }

     }


     private boolean extractTextAndSearch(Optional<TextExtractor> extractorOptional, AbstractFile aFile,

                 Map<String, String> extractedMetadata) throws IngesterException {


         try {

             if (!extractorOptional.isPresent()) {

                 return false;

             }

             //divide into chunks and index

             Ingester.getDefault().search(getTikaOrTextExtractor(extractorOptional, aFile, extractedMetadata), aFile.getId(), aFile.getName(), aFile, context, true,settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists());


         } catch (TextExtractor.InitReaderException  ex) {

             return false;

         } catch(Exception ex) {

             logger.log(Level.WARNING, String.format("Failed to search file %s [id=%d]",

                             aFile.getName(), aFile.getId()), ex);

             return false;

         }


         return true;

     }


     private Reader getTikaOrTextExtractor(Optional<TextExtractor> extractorOptional, AbstractFile aFile,

                 Map<String, String> extractedMetadata) throws TextExtractor.InitReaderException {


             TextExtractor extractor = extractorOptional.get();

             Reader fileText = extractor.getReader();

             Reader finalReader;

             try {

                 Map<String, String> metadata = extractor.getMetadata();

                 if (!metadata.isEmpty()) {

                     // Creating the metadata artifact here causes occasional problems

                     // when indexing the text, so we save the metadata map to

                     // use after this method is complete.

                     extractedMetadata.putAll(metadata);

                 }

                 CharSource formattedMetadata = getMetaDataCharSource(metadata);

                 //Append the metadata to end of the file text

                 finalReader = CharSource.concat(new CharSource() {

                     //Wrap fileText reader for concatenation

                     @Override

                     public Reader openStream() throws IOException {

                         return fileText;

                     }

                 }, formattedMetadata).openStream();

             } catch (IOException ex) {

                 logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]",

                         aFile.getName(), aFile.getId()), ex);

                 //Just send file text.

                 finalReader = fileText;

             }

             //divide into chunks and index

             return finalReader;


     }


     private void createMetadataArtifact(AbstractFile aFile, Map<String, String> metadata) {


         String moduleName = KeywordSearchIngestModule.class.getName();


         Collection<BlackboardAttribute> attributes = new ArrayList<>();

         Collection<BlackboardArtifact> bbartifacts = new ArrayList<>();


         Map<BlackboardAttribute.ATTRIBUTE_TYPE, Pair<Integer, String>> intermediateMapping = new HashMap<>();

         for (Map.Entry<String, String> entry : metadata.entrySet()) {

             if (entry.getValue() != null) {

                 Pair<BlackboardAttribute.ATTRIBUTE_TYPE, Integer> attrPair = METADATA_TYPES_MAP.get(entry.getKey());

                 if (attrPair != null && attrPair.getKey() != null && attrPair.getValue() != null) {

                     intermediateMapping.compute(attrPair.getKey(), (k, v) -> {

                         if (v == null || v.getKey() > attrPair.getValue()) {

                             return Pair.of(attrPair.getValue(), entry.getValue());

                         } else {

                             return v;

                         }

                     });

                 }

             }

         }


         for (Entry<BlackboardAttribute.ATTRIBUTE_TYPE, Pair<Integer, String>> interEntry: intermediateMapping.entrySet()) {

             BlackboardAttribute attribute = checkAttribute(interEntry.getKey(), interEntry.getValue().getValue());

             if (attribute != null) {

                 attributes.add(attribute);

             }

         }


         if (!attributes.isEmpty()) {

             try {

                 BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes);

                 bbartifacts.add(bbart);

             } catch (TskCoreException ex) {

                 // Log error and return to continue processing

                 logger.log(Level.WARNING, String.format("Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS

                 return;

             }

             if (!bbartifacts.isEmpty()) {

                 try {

                     Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId);

                 } catch (NoCurrentCaseException | Blackboard.BlackboardException ex) {

                     // Log error and return to continue processing

                     logger.log(Level.WARNING, String.format("Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS

                     return;

                 }

             }

         }

     }


     private BlackboardAttribute checkAttribute(BlackboardAttribute.ATTRIBUTE_TYPE attrType, String value) {

         String moduleName = KeywordSearchIngestModule.class.getName();

         if (attrType != null && !value.isEmpty() && value.charAt(0) != ' ') {

             if (attrType.getValueType() == BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.DATETIME) {

                 SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US);

                 Long metadataDateTime = Long.valueOf(0);

                 try {

                     String metadataDate = value.replaceAll("T", " ").replaceAll("Z", "");

                     Date usedDate = metadataDateFormat.parse(metadataDate);

                     metadataDateTime = usedDate.getTime() / 1000;

                     return new BlackboardAttribute(attrType, moduleName, metadataDateTime);

                 } catch (ParseException ex) {

                     // catching error and displaying date that could not be parsed then will continue on.

                     logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, attrType == null ? "<null>" : attrType.name()), ex); //NON-NLS

                     return null;

                 }

             } else {

                 return new BlackboardAttribute(attrType, moduleName, value);

             }

         }


         return null;


     }


     @NbBundle.Messages({

         "KeywordSearchIngestModule.metadataTitle=METADATA"

     })

     static CharSource getMetaDataCharSource(Map<String, String> metadata) {

         return CharSource.wrap(new StringBuilder(

                 String.format("\n\n------------------------------%s------------------------------\n\n",

                         Bundle.KeywordSearchIngestModule_metadataTitle()))

                 .append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey())

                         .map(entry -> entry.getKey() + ": " + entry.getValue())

                         .collect(Collectors.joining("\n"))

                 ));

     }


     private boolean extractStringsAndIndex(AbstractFile aFile) {

         try {

             if (context.fileIngestIsCancelled()) {

                 return true;

             }

             Reader extractedTextReader = KeywordSearchUtil.getReader(aFile, stringsExtractionContext);

             Ingester.getDefault().search(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context, false, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists());

             putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);

         } catch (Exception ex) {

             logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);  //NON-NLS

             putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

             return false;

         }

         return true;

     }


     private void searchFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType, boolean indexContent) {

         //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName());


         TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();


         if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)

                 || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))

                 || (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase("txt"))) {

             if (context.fileIngestIsCancelled()) {

                 return;

             }

             extractStringsAndIndex(aFile);

             return;

         }


         final long size = aFile.getSize();

         //if not to index content, or a dir, or 0 content, index meta data only


         if ((indexContent == false || aFile.isDir() || size == 0)) {

             try {

                 if (context.fileIngestIsCancelled()) {

                     return;

                 }

                 ingester.indexMetaDataOnly(aFile);

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);

             } catch (IngesterException ex) {

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

                 logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS

             }

             return;

         }


         if (context.fileIngestIsCancelled()) {

             return;

         }


         // we skip archive formats that are opened by the archive module.

         // @@@ We could have a check here to see if the archive module was enabled though...

         if (ARCHIVE_MIME_TYPES.contains(mimeType)) {

             try {

                 if (context.fileIngestIsCancelled()) {

                     return;

                 }

                 ingester.indexMetaDataOnly(aFile);

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);

             } catch (IngesterException ex) {

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

                 logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS

             }

             return;

         }


         boolean wasTextAdded = false;

         Map<String, String> extractedMetadata = new HashMap<>();


         //extract text with one of the extractors, divide into chunks and index with Solr

         try {

             //logger.log(Level.INFO, "indexing: " + aFile.getName());

             if (context.fileIngestIsCancelled()) {

                 return;

             }

             if (MimeTypes.OCTET_STREAM.equals(mimeType)) {

                 extractStringsAndIndex(aFile);

                 return;

             }

             if (!extractTextAndSearch(extractor, aFile, extractedMetadata)) {

                 // Text extractor not found for file. Extract string only.

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);

             } else {

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);

                 wasTextAdded = true;

             }


         } catch (IngesterException e) {

             logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS

                     + aFile.getName(), e);

             putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);

         } catch (Exception e) {

             logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS

                     + aFile.getName(), e);

             putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);

         }


         if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {

             //Carved Files should be the only type of unallocated files capable of a txt extension and

             //should be ignored by the TextFileExtractor because they may contain more than one text encoding

             wasTextAdded = searchTextFile(aFile);

         }


         // if it wasn't supported or had an error, default to strings

         if (wasTextAdded == false) {

             extractStringsAndIndex(aFile);

         }


         // Now that the indexing is complete, create the metadata artifact (if applicable).

         // It is unclear why calling this from extractTextAndIndex() generates

         // errors.

         if (!extractedMetadata.isEmpty()) {

             createMetadataArtifact(aFile, extractedMetadata);

         }

     }


     private boolean searchTextFile(AbstractFile aFile) {

         try {

             TextFileExtractor textFileExtractor = new TextFileExtractor(aFile);

             Reader textReader = textFileExtractor.getReader();

             if (textReader == null) {

                 logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());

             } else {

                 Ingester.getDefault().search(textReader, aFile.getId(), aFile.getName(), aFile, context, true, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists());

                 textReader.close();

                 putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);

                 return true;

             }

         } catch (Exception ex) {

             logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex);

         }

         return false;

     }


 }

org::sleuthkit.autopsy.textextractors.TextExtractorFactory
Definition: TextExtractorFactory.java:37

org::sleuthkit.autopsy.keywordsearchservice.KeywordSearchService
Definition: KeywordSearchService.java:41

org::sleuthkit

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.checkAttribute
BlackboardAttribute checkAttribute(BlackboardAttribute.ATTRIBUTE_TYPE attrType, String value)
Definition: KeywordSearchIngestModule.java:752

org::sleuthkit.autopsy.textextractors.TextExtractor.InitReaderException
Definition: TextExtractor.java:91

org::sleuthkit.autopsy.keywordsearch.Server.queryNumIndexedFiles
int queryNumIndexedFiles()
Definition: Server.java:1572

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.postIndexSummary
void postIndexSummary()
Definition: KeywordSearchIngestModule.java:541

org::sleuthkit.autopsy.keywordsearch.KeywordSearchJobSettings.isOCREnabled
boolean isOCREnabled()
Definition: KeywordSearchJobSettings.java:92

org::sleuthkit.autopsy.casemodule.Case
Definition: Case.java:168

org::sleuthkit::datamodel::AbstractFile::getSize
long getSize()

org::sleuthkit.autopsy.textextractors.TextExtractor.getReader
Reader getReader()

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_USER_ID
TSK_USER_ID

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.fileTypeDetector
FileTypeDetector fileTypeDetector
Definition: KeywordSearchIngestModule.java:241

org::sleuthkit.autopsy.ingest.IngestModuleReferenceCounter.decrementAndGet
synchronized long decrementAndGet(long jobId)
Definition: IngestModuleReferenceCounter.java:46

org::sleuthkit.autopsy.textextractors.TextExtractorFactory.NoTextExtractorFound
Definition: TextExtractorFactory.java:146

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.cleanup
void cleanup()
Definition: KeywordSearchIngestModule.java:510

org::sleuthkit.autopsy.ingest.IngestMessage.MessageType
Definition: IngestMessage.java:37

org::sleuthkit.autopsy.keywordsearch.Server.queryNumIndexedChunks
int queryNumIndexedChunks()
Definition: Server.java:1598

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_LAST_PRINTED_DATETIME
TSK_LAST_PRINTED_DATETIME

org::sleuthkit::datamodel::TskData

org::sleuthkit::datamodel::BlackboardAttribute

org::sleuthkit::datamodel::SleuthkitCase::getBlackboard
Blackboard getBlackboard()

org::sleuthkit::datamodel::AbstractContent::getName
String getName()

org::sleuthkit::datamodel::Blackboard::postArtifacts
void postArtifacts(Collection< BlackboardArtifact > artifacts, String moduleName)

org::sleuthkit.autopsy.textextractors.TextFileExtractor
Definition: TextFileExtractor.java:39

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.getTikaOrTextExtractor
Reader getTikaOrTextExtractor(Optional< TextExtractor > extractorOptional, AbstractFile aFile, Map< String, String > extractedMetadata)
Definition: KeywordSearchIngestModule.java:652

org::sleuthkit.autopsy.keywordsearch.Server.IndexingServerProperties
Definition: Server.java:1504

org::sleuthkit::datamodel::BlackboardAttribute::TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE::DATETIME
DATETIME

org::sleuthkit.autopsy.textextractors.configs.ImageConfig.setOCREnabled
void setOCREnabled(boolean enabled)
Definition: ImageConfig.java:47

org::sleuthkit.autopsy.keywordsearchservice.KeywordSearchService.tryConnect
void tryConnect(String host, int port)

org::sleuthkit.autopsy.keywordsearch.Server.getMultiUserServerProperties
static IndexingServerProperties getMultiUserServerProperties(String caseDirectory)
Definition: Server.java:1371

org::sleuthkit::datamodel::AbstractFile::getType
TskData.TSK_DB_FILES_TYPE_ENUM getType()

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.METADATA_INGESTED
METADATA_INGESTED
No content, so we just text_ingested metadata.
Definition: KeywordSearchIngestModule.java:258

org::sleuthkit.autopsy.casemodule.Case.getCaseDirectory
String getCaseDirectory()
Definition: Case.java:1540

org

org::sleuthkit.autopsy.keywordsearch.NoOpenCoreException
Definition: NoOpenCoreException.java:27

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_DESCRIPTION
TSK_DESCRIPTION

com

org::sleuthkit::datamodel::Blackboard

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_PROG_NAME
TSK_PROG_NAME

org::sleuthkit.autopsy.casemodule
Definition: AddImageAction.java:19

org::sleuthkit.autopsy.textextractors.configs.StringsConfig.setExtractUTF8
void setExtractUTF8(boolean enabled)
Definition: StringsConfig.java:48

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.searchTextFile
boolean searchTextFile(AbstractFile aFile)
Definition: KeywordSearchIngestModule.java:944

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.startUp
void startUp(IngestJobContext context)
Definition: KeywordSearchIngestModule.java:302

org::sleuthkit::datamodel::AbstractFile::getNameExtension
String getNameExtension()

org::sleuthkit.autopsy.keywordsearch.KeywordSearch.getServer
static synchronized Server getServer()
Definition: KeywordSearch.java:59

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.searchFile
void searchFile(Optional< TextExtractor > extractor, AbstractFile aFile, String mimeType, boolean indexContent)
Definition: KeywordSearchIngestModule.java:830

org::sleuthkit.autopsy.modules.filetypeid.FileTypeDetector
Definition: FileTypeDetector.java:46

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.createMetadataArtifact
void createMetadataArtifact(AbstractFile aFile, Map< String, String > metadata)
Definition: KeywordSearchIngestModule.java:686

org::sleuthkit.autopsy.ingest.IngestModuleReferenceCounter.incrementAndGet
synchronized long incrementAndGet(long jobId)
Definition: IngestModuleReferenceCounter.java:39

org::sleuthkit.autopsy.ingest.IngestMessage.createMessage
static IngestMessage createMessage(MessageType messageType, String source, String subject, String detailsHtml)
Definition: IngestMessage.java:183

org::sleuthkit::datamodel::BlackboardArtifact

org::sleuthkit.autopsy.ingest.IngestModule.ProcessResult
Definition: IngestModule.java:119

org::sleuthkit::datamodel::BlackboardArtifact::Type

org::sleuthkit.autopsy.casemodule.Case.CaseType
Definition: Case.java:221

org::sleuthkit::datamodel::TskException

org::sleuthkit.autopsy.modules.filetypeid.FileTypeDetector.getMIMEType
String getMIMEType(AbstractFile file)
Definition: FileTypeDetector.java:182

org::sleuthkit.autopsy.coreutils
Definition: AppSQLiteDB.java:19

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.settings
final KeywordSearchJobSettings settings
Definition: KeywordSearchIngestModule.java:246

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.SKIPPED_ERROR_INDEXING
SKIPPED_ERROR_INDEXING
File was skipped because index engine had problems.
Definition: KeywordSearchIngestModule.java:259

org::sleuthkit.autopsy.coreutils.Logger
Definition: Logger.java:36

org::sleuthkit::datamodel::TskData::TSK_DB_FILES_TYPE_ENUM::CARVED
CARVED

org::sleuthkit.autopsy.textextractors.TextExtractor.getMetadata
default Map< String, String > getMetadata()
Definition: TextExtractor.java:74

org::sleuthkit::datamodel::BlackboardAttribute::TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE

org::sleuthkit::datamodel::TskData::FileKnown

org::sleuthkit::datamodel::AbstractFile::getParentPath
String getParentPath()

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.extractTextAndSearch
boolean extractTextAndSearch(Optional< TextExtractor > extractorOptional, AbstractFile aFile, Map< String, String > extractedMetadata)
Definition: KeywordSearchIngestModule.java:631

org::sleuthkit.autopsy.keywordsearch.KeywordSearchJobSettings
Definition: KeywordSearchJobSettings.java:29

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.jobId
long jobId
Definition: KeywordSearchIngestModule.java:248

org::sleuthkit::datamodel::AbstractFile::newDataArtifact
DataArtifact newDataArtifact(BlackboardArtifact.Type artifactType, Collection< BlackboardAttribute > attributesList)

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.isLimitedOCRFile
boolean isLimitedOCRFile(AbstractFile aFile, String mimeType)
Definition: KeywordSearchIngestModule.java:525

org::sleuthkit.autopsy.textextractors.TextExtractorFactory.getExtractor
static TextExtractor getExtractor(Content content, Lookup context)
Definition: TextExtractorFactory.java:57

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.context
IngestJobContext context
Definition: KeywordSearchIngestModule.java:252

org::sleuthkit::datamodel::AbstractFile::getKnown
TskData.FileKnown getKnown()

org::sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
Definition: Ingester.java:614

org::sleuthkit.autopsy.ingest
Definition: AnalysisResultIngestModule.java:19

org::sleuthkit.autopsy.coreutils.ExecUtil
Definition: ExecUtil.java:37

org::sleuthkit.autopsy.casemodule.Case.getCaseType
CaseType getCaseType()
Definition: Case.java:1459

org::sleuthkit.autopsy.ingest.IngestModule.IngestModuleException
Definition: IngestModule.java:98

org::sleuthkit::datamodel::TskCoreException

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule
Definition: KeywordSearchIngestModule.java:105

org::sleuthkit::datamodel::Blackboard::BlackboardException

org::sleuthkit.autopsy.keywordsearch.Server.queryNumIndexedDocuments
int queryNumIndexedDocuments()
Definition: Server.java:1624

org::sleuthkit.autopsy.ingest.IngestMessage
Definition: IngestMessage.java:32

org::sleuthkit.autopsy.textextractors.TextExtractor
Definition: TextExtractor.java:33

org::sleuthkit.autopsy.modules.filetypeid.FileTypeDetector.FileTypeDetectorInitException
Definition: FileTypeDetector.java:404

org::sleuthkit.autopsy.keywordsearchservice
Definition: KeywordSearchService.java:19

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_VERSION
TSK_VERSION

org::sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator
Definition: ExecUtil.java:54

org::sleuthkit.autopsy.ingest.IngestServices.postMessage
void postMessage(final IngestMessage message)
Definition: IngestServices.java:100

org::sleuthkit::datamodel::TskData::TSK_DB_FILES_TYPE_ENUM::DERIVED
DERIVED

org::sleuthkit.autopsy.textextractors.configs.ImageConfig
Definition: ImageConfig.java:33

org::sleuthkit.autopsy.casemodule.Case.getSleuthkitCase
SleuthkitCase getSleuthkitCase()
Definition: Case.java:1441

org::sleuthkit.autopsy.textextractors.configs.StringsConfig
Definition: StringsConfig.java:37

org::sleuthkit.autopsy.ingest.IngestMessage.MessageType.INFO
INFO
Definition: IngestMessage.java:39

org::sleuthkit.autopsy.keywordsearch.Server
Definition: Server.java:110

org::sleuthkit.autopsy.coreutils.MessageNotifyUtil
Definition: MessageNotifyUtil.java:47

org::sleuthkit.autopsy.textextractors.configs.StringsConfig.setExtractUTF16
void setExtractUTF16(boolean enabled)
Definition: StringsConfig.java:57

org::sleuthkit.autopsy.ingest.IngestJobContext
Definition: IngestJobContext.java:29

org::sleuthkit.autopsy.ingest.IngestJobContext.fileIngestIsCancelled
boolean fileIngestIsCancelled()
Definition: IngestJobContext.java:103

org::sleuthkit.autopsy.keywordsearch.KeywordSearch
Definition: KeywordSearch.java:37

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_ORGANIZATION
TSK_ORGANIZATION

org::sleuthkit.autopsy.modules

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.SKIPPED_ERROR_TEXTEXTRACT
SKIPPED_ERROR_TEXTEXTRACT
File was skipped because of text extraction issues.
Definition: KeywordSearchIngestModule.java:260

org::sleuthkit::datamodel::AbstractContent::getId
long getId()

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.putIngestStatus
static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status)
Definition: KeywordSearchIngestModule.java:273

org::sleuthkit.autopsy.keywordsearch.SolrSearchService
Definition: SolrSearchService.java:65

org::sleuthkit.autopsy.ingest.IngestServices
Definition: IngestServices.java:34

org::sleuthkit::datamodel::TskData::TSK_DB_FILES_TYPE_ENUM::UNALLOC_BLOCKS
UNALLOC_BLOCKS

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus
Definition: KeywordSearchIngestModule.java:254

org::sleuthkit::datamodel::AbstractFile

org::sleuthkit.autopsy.keywordsearch.KeywordSearchModuleFactory
Definition: KeywordSearchModuleFactory.java:41

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.process
ProcessResult process(AbstractFile abstractFile)
Definition: KeywordSearchIngestModule.java:411

org::sleuthkit.autopsy.keywordsearch.Ingester
Definition: Ingester.java:63

org::sleuthkit.autopsy.ingest.IngestModuleReferenceCounter
Definition: IngestModuleReferenceCounter.java:29

org::sleuthkit.autopsy.coreutils.MessageNotifyUtil.Notify.error
static void error(String title, String message)
Definition: MessageNotifyUtil.java:227

org::sleuthkit.autopsy.ingest.FileIngestModule
Definition: FileIngestModule.java:27

org::sleuthkit::datamodel

org::sleuthkit.autopsy.coreutils.Logger.getLogger
synchronized static Logger getLogger(String name)
Definition: Logger.java:124

org::sleuthkit.autopsy.casemodule.Case.getCurrentCaseThrows
static Case getCurrentCaseThrows()
Definition: Case.java:910

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_OWNER
TSK_OWNER

org::sleuthkit.autopsy.ingest.IngestMessage.createWarningMessage
static IngestMessage createWarningMessage(String source, String subject, String detailsHtml)
Definition: IngestMessage.java:236

org::sleuthkit::datamodel::TskData::FileKnown::KNOWN
KNOWN

org::sleuthkit.autopsy

org::sleuthkit.autopsy.casemodule.Case.CaseType.MULTI_USER_CASE
MULTI_USER_CASE
Definition: Case.java:224

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.TEXT_INGESTED
TEXT_INGESTED
Definition: KeywordSearchIngestModule.java:256

org::sleuthkit.autopsy.keywordsearch
Definition: AccountsText.java:19

org::sleuthkit::datamodel::TskData::TSK_DB_FILES_TYPE_ENUM::VIRTUAL_DIR
VIRTUAL_DIR

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_DATETIME_CREATED
TSK_DATETIME_CREATED

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.getExtractor
Optional< TextExtractor > getExtractor(AbstractFile abstractFile)
Definition: KeywordSearchIngestModule.java:600

org::sleuthkit.autopsy.coreutils.MessageNotifyUtil.Notify
Definition: MessageNotifyUtil.java:160

org::sleuthkit::datamodel::AbstractFile::isDir
boolean isDir()

org::sleuthkit.autopsy.keywordsearch.KeywordSearchModuleException
Definition: KeywordSearchModuleException.java:24

org::sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException
Definition: KeywordSearchServiceException.java:24

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE

org::sleuthkit::datamodel::BlackboardAttribute::ATTRIBUTE_TYPE::TSK_DATETIME_MODIFIED
TSK_DATETIME_MODIFIED

org::sleuthkit.autopsy.ingest.IngestModule.ProcessResult.OK
OK
Definition: IngestModule.java:121

org::sleuthkit.autopsy.casemodule.NoCurrentCaseException
Definition: NoCurrentCaseException.java:26

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.stringsExtractionContext
Lookup stringsExtractionContext
Definition: KeywordSearchIngestModule.java:245

org::sleuthkit.autopsy.textextractors.configs.StringsConfig.setLanguageScripts
void setLanguageScripts(List< SCRIPT > scripts)
Definition: StringsConfig.java:87

org::sleuthkit.autopsy.textextractors
Definition: ArtifactTextExtractor.java:19

org::sleuthkit.autopsy.textextractors.configs
Definition: ImageConfig.java:19

org::sleuthkit.autopsy.keywordsearch.KeywordList
Definition: KeywordList.java:30

org::sleuthkit.autopsy.textextractors.TextFileExtractor.getReader
Reader getReader()
Definition: TextFileExtractor.java:57

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.shutDown
void shutDown()
Definition: KeywordSearchIngestModule.java:466

org::sleuthkit.autopsy.ingest.IngestJobContext.getJobId
long getJobId()
Definition: IngestJobContext.java:66

org::sleuthkit.autopsy.coreutils.MessageNotifyUtil.Notify.warn
static void warn(String title, String message)
Definition: MessageNotifyUtil.java:237

org::sleuthkit::datamodel::BlackboardArtifact::ARTIFACT_TYPE

org::sleuthkit::datamodel::BlackboardArtifact::ARTIFACT_TYPE::TSK_METADATA
TSK_METADATA

org::sleuthkit.autopsy.modules.filetypeid
Definition: AddFileTypeDialog.java:19

org::sleuthkit::datamodel::TskData::TSK_DB_FILES_TYPE_ENUM::UNUSED_BLOCKS
UNUSED_BLOCKS

org::sleuthkit::datamodel::TskData::TSK_DB_FILES_TYPE_ENUM

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.extractStringsAndIndex
boolean extractStringsAndIndex(AbstractFile aFile)
Definition: KeywordSearchIngestModule.java:804

org::sleuthkit.autopsy.ingest.IngestServices.getInstance
static synchronized IngestServices getInstance()
Definition: IngestServices.java:54

org::sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.IngestStatus.STRINGS_INGESTED
STRINGS_INGESTED
Text was extracted by knowing file type and text_ingested.
Definition: KeywordSearchIngestModule.java:257