19package org.sleuthkit.autopsy.keywordsearch;
21import com.google.common.collect.ImmutableList;
22import com.google.common.collect.ImmutableSet;
23import com.google.common.io.CharSource;
24import java.io.IOException;
26import java.text.ParseException;
27import java.text.SimpleDateFormat;
28import java.util.ArrayList;
29import java.util.Collection;
31import java.util.HashMap;
33import static java.util.Locale.US;
35import java.util.Map.Entry;
36import java.util.Optional;
37import java.util.concurrent.atomic.AtomicInteger;
38import java.util.logging.Level;
39import java.util.stream.Collectors;
40import java.util.stream.IntStream;
41import java.util.stream.Stream;
42import org.apache.commons.lang3.tuple.Pair;
43import org.apache.commons.lang3.tuple.Triple;
44import org.apache.tika.metadata.DublinCore;
45import org.apache.tika.metadata.FileSystem;
46import org.apache.tika.metadata.IPTC;
47import org.apache.tika.metadata.Office;
48import org.apache.tika.metadata.OfficeOpenXMLCore;
49import org.apache.tika.metadata.OfficeOpenXMLExtended;
50import org.apache.tika.metadata.PDF;
51import org.apache.tika.metadata.Photoshop;
52import org.apache.tika.metadata.TikaCoreProperties;
53import org.apache.tika.metadata.XMP;
54import org.apache.tika.metadata.XMPDM;
55import org.apache.tika.mime.MimeTypes;
56import org.openide.util.Lookup;
57import org.openide.util.NbBundle;
58import org.openide.util.NbBundle.Messages;
59import org.openide.util.lookup.Lookups;
60import org.sleuthkit.autopsy.casemodule.Case;
61import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
62import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
63import org.sleuthkit.autopsy.coreutils.Logger;
64import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
65import org.sleuthkit.autopsy.ingest.FileIngestModule;
66import org.sleuthkit.autopsy.ingest.IngestJobContext;
67import org.sleuthkit.autopsy.ingest.IngestMessage;
68import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
69import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter;
70import org.sleuthkit.autopsy.ingest.IngestServices;
71import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
72import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
73import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
74import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
75import org.sleuthkit.autopsy.textextractors.TextExtractor;
76import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
77import org.sleuthkit.autopsy.textextractors.TextFileExtractor;
78import org.sleuthkit.autopsy.textextractors.configs.ImageConfig;
79import org.sleuthkit.autopsy.textextractors.configs.StringsConfig;
80import org.sleuthkit.datamodel.AbstractFile;
81import org.sleuthkit.datamodel.Blackboard;
82import org.sleuthkit.datamodel.BlackboardArtifact;
83import org.sleuthkit.datamodel.BlackboardAttribute;
84import org.sleuthkit.datamodel.TskCoreException;
85import org.sleuthkit.datamodel.TskData;
86import org.sleuthkit.datamodel.TskData.FileKnown;
87import org.sleuthkit.datamodel.TskException;
98 "# {0} - Reason for not starting Solr",
"KeywordSearchIngestModule.init.tryStopSolrMsg={0}<br />Please try stopping Java Solr processes if any exist and restart the application.",
99 "KeywordSearchIngestModule.init.badInitMsg=Keyword search server was not properly initialized, cannot run keyword search ingest.",
100 "SolrConnectionCheck.Port=Invalid port number.",
101 "# {0} - Reason for not connecting to Solr",
"KeywordSearchIngestModule.init.exception.errConnToSolr.msg=Error connecting to SOLR server: {0}.",
102 "KeywordSearchIngestModule.startUp.noOpenCore.msg=The index could not be opened or does not exist.",
103 "CannotRunFileTypeDetection=Unable to run file type detection."
113 static final List<String> ARCHIVE_MIME_TYPES
116 "application/x-7z-compressed",
117 "application/x-ace-compressed",
118 "application/x-alz-compressed",
120 "application/vnd.ms-cab-compressed",
121 "application/x-cfs-compressed",
122 "application/x-dgc-compressed",
123 "application/x-apple-diskimage",
124 "application/x-gca-compressed",
128 "application/x-rar-compressed",
129 "application/x-stuffit",
130 "application/x-stuffitx",
131 "application/x-gtar",
132 "application/x-archive",
133 "application/x-executable",
134 "application/x-gzip",
137 "application/x-cpio",
138 "application/x-shar",
140 "application/x-bzip",
141 "application/x-bzip2",
142 "application/x-lzip",
143 "application/x-lzma",
144 "application/x-lzop",
146 "application/x-compress");
153 private static final Map<String, Pair<BlackboardAttribute.ATTRIBUTE_TYPE, Integer>>
METADATA_TYPES_MAP = Stream.of(
154 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_MODIFIED, List.of(
156 TikaCoreProperties.MODIFIED.getName(),
157 FileSystem.MODIFIED.getName(),
158 DublinCore.MODIFIED.getName(),
159 PDF.DOC_INFO_MODIFICATION_DATE.getName(),
160 PDF.PDFVT_MODIFIED.getName(),
161 XMP.MODIFY_DATE.getName(),
162 XMPDM.AUDIO_MOD_DATE.getName(),
163 XMPDM.METADATA_MOD_DATE.getName(),
164 XMPDM.VIDEO_MOD_DATE.getName())),
165 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_USER_ID, List.of(
167 Office.LAST_AUTHOR.getName(),
168 TikaCoreProperties.MODIFIER.getName())),
169 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_CREATED, List.of(
171 TikaCoreProperties.CREATED.getName(),
172 FileSystem.CREATED.getName(),
173 DublinCore.CREATED.getName(),
174 IPTC.DATE_CREATED.getName(),
175 Office.CREATION_DATE.getName(),
176 PDF.DOC_INFO_CREATED.getName(),
177 Photoshop.DATE_CREATED.getName(),
178 XMP.CREATE_DATE.getName())),
179 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ORGANIZATION, List.of(
181 DublinCore.PUBLISHER.getName(),
182 IPTC.ORGANISATION_NAME.getName(),
183 OfficeOpenXMLExtended.COMPANY.getName())),
184 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_OWNER, List.of(
186 TikaCoreProperties.CREATOR.getName(),
187 DublinCore.CREATOR.getName(),
188 Office.INITIAL_AUTHOR.getName(),
189 Office.AUTHOR.getName(),
190 Photoshop.AUTHORS_POSITION.getName(),
191 PDF.DOC_INFO_CREATOR.getName())),
192 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME, List.of(
195 OfficeOpenXMLExtended.APPLICATION.getName(),
196 org.apache.tika.metadata.RTFMetadata.EMB_APP_VERSION.getName())),
197 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_LAST_PRINTED_DATETIME, List.of(
199 OfficeOpenXMLCore.LAST_PRINTED.getName())),
200 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION, List.of(
202 DublinCore.TITLE.getName(),
203 IPTC.TITLE.getName(),
204 PDF.DOC_INFO_TITLE.getName())),
205 Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_VERSION, List.of(
206 PDF.PDF_VERSION.getName(),
207 OfficeOpenXMLCore.VERSION.getName())))
209 BlackboardAttribute.ATTRIBUTE_TYPE attrType = pr.getKey();
210 List<String> keys = pr.getValue();
211 return IntStream.range(0, keys.size())
212 .mapToObj(idx -> Triple.of(keys.get(idx), attrType, idx));
214 .collect(Collectors.toMap(Triple::getLeft, trip -> Pair.of(trip.getMiddle(), trip.getRight()), (v1, v2) -> v1.getRight() < v2.getRight() ? v1 : v2));
222 "application/msword",
223 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
224 "application/vnd.ms-powerpoint",
225 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
226 "application/vnd.ms-excel",
227 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
263 private static final Map<Long, Map<Long, IngestStatus>>
ingestStatus =
new HashMap<>();
275 Map<Long, IngestStatus> ingestStatusForJob =
ingestStatus.get(ingestJobId);
276 if (ingestStatusForJob ==
null) {
277 ingestStatusForJob =
new HashMap<>();
280 ingestStatusForJob.put(fileId, status);
286 this.settings = settings;
287 instanceNum = instanceCount.getAndIncrement();
296 "KeywordSearchIngestModule.startupMessage.failedToGetIndexSchema=Failed to get schema version for text index.",
297 "# {0} - Solr version number",
"KeywordSearchIngestModule.startupException.indexSolrVersionNotSupported=Adding text no longer supported for Solr version {0} of the text index.",
298 "# {0} - schema version number",
"KeywordSearchIngestModule.startupException.indexSchemaNotSupported=Adding text no longer supported for schema version {0} of the text index.",
299 "KeywordSearchIngestModule.noOpenCase.errMsg=No open case available."
307 if (
settings.isIndexToSolrEnabled()) {
309 if (server.coreIsOpen() ==
false) {
314 Index indexInfo = server.getIndexInfo();
315 if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) {
316 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion()));
319 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex);
347 port = Integer.parseInt(properties.getPort());
348 }
catch (NumberFormatException ex) {
350 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_badInitMsg() +
" " + Bundle.SolrConnectionCheck_Port(), ex);
353 kwsService.
tryConnect(properties.getHost(), port);
360 if (server !=
null) {
362 if (!server.isLocalSolrRunning()) {
363 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()));
367 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex);
374 throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex);
378 List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL();
379 boolean hasKeywordsForSearch =
false;
381 if (
settings.keywordListIsEnabled(keywordList.getName()) && !keywordList.getKeywords().isEmpty()) {
382 hasKeywordsForSearch =
true;
387 if (!
settings.isIndexToSolrEnabled()) {
389 NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.init.indexingDisabled")));
391 if (!hasKeywordsForSearch) {
393 NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));
400 Map<String, String> stringsOptions = KeywordSearchSettings.getStringExtractOptions();
414 logger.log(Level.SEVERE,
"Skipping processing, module not initialized, file: {0}", abstractFile.getName());
419 if (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
425 Optional<TextExtractor> extractorOpt =
getExtractor(abstractFile);
427 String mimeType =
fileTypeDetector.getMIMEType(abstractFile).trim().toLowerCase();
431 if (
settings.isOCROnly() && (!extractorOpt.isPresent() || !extractorOpt.get().willUseOCR())) {
437 if (
settings.isLimitedOCREnabled() && extractorOpt.isPresent()
438 && extractorOpt.get().willUseOCR() && !
isLimitedOCRFile(abstractFile, mimeType)) {
443 if (KeywordSearchSettings.getSkipKnown() && abstractFile.getKnown().equals(FileKnown.KNOWN)) {
445 if (
context.fileIngestIsCancelled()) {
448 searchFile(extractorOpt, abstractFile, mimeType,
false);
453 if (
context.fileIngestIsCancelled()) {
456 searchFile(extractorOpt, abstractFile, mimeType,
true);
467 logger.log(Level.INFO,
"Keyword search ingest module instance {0} shutting down",
instanceNum);
473 if (
context.fileIngestIsCancelled()) {
474 logger.log(Level.INFO,
"Keyword search ingest module instance {0} stopping due to ingest cancellation",
instanceNum);
483 InlineSearcher.makeArtifacts(
context);
484 InlineSearcher.cleanup(
context);
485 Ingester.getDefault().commit();
486 }
catch (TskException ex) {
487 logger.log(Level.SEVERE, String.format(
"Failed to create search ingest artifacts for job %d",
context.getJobId()), ex);
492 logger.log(Level.INFO,
"Indexed files count: {0}", numIndexedFiles);
494 logger.log(Level.INFO,
"Indexed file chunks count: {0}", numIndexedChunks);
496 logger.log(Level.SEVERE,
"Error executing Solr queries to check number of indexed files and file chunks", ex);
532 || aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED;
542 int text_ingested = 0;
543 int metadata_ingested = 0;
544 int strings_ingested = 0;
551 if (ingestStatusForJob ==
null) {
559 case METADATA_INGESTED:
562 case STRINGS_INGESTED:
565 case SKIPPED_ERROR_TEXTEXTRACT:
568 case SKIPPED_ERROR_INDEXING:
571 case SKIPPED_ERROR_IO:
580 StringBuilder msg =
new StringBuilder();
581 msg.append(
"<table border=0><tr><td>").append(NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append(
"</td><td>").append(text_ingested).append(
"</td></tr>");
582 msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append(
"</td><td>").append(strings_ingested).append(
"</td></tr>");
583 msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.mdOnlyLbl")).append(
"</td><td>").append(metadata_ingested).append(
"</td></tr>");
584 msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.idxErrLbl")).append(
"</td><td>").append(error_index).append(
"</td></tr>");
585 msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.errTxtLbl")).append(
"</td><td>").append(error_text).append(
"</td></tr>");
586 msg.append(
"<tr><td>").append(NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.errIoLbl")).append(
"</td><td>").append(error_io).append(
"</td></tr>");
587 msg.append(
"</table>");
588 String indexStats = msg.toString();
589 logger.log(Level.INFO,
"Keyword Indexing Completed: {0}", indexStats);
591 if (error_index > 0) {
593 NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.kwIdxErrMsgFiles", error_index));
594 }
else if (error_io + error_text > 0) {
595 MessageNotifyUtil.
Notify.
warn(NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.kwIdxWarnMsgTitle"),
596 NbBundle.getMessage(
this.getClass(),
"KeywordSearchIngestModule.postIndexSummary.idxErrReadFilesMsg"));
600 private Optional<TextExtractor>
getExtractor(AbstractFile abstractFile) {
604 Lookup extractionContext = Lookups.fixed(imageConfig, terminator);
608 return Optional.empty();
632 Map<String, String> extractedMetadata)
throws IngesterException {
635 if (!extractorOptional.isPresent()) {
639 Ingester.getDefault().search(
getTikaOrTextExtractor(extractorOptional, aFile, extractedMetadata), aFile.getId(), aFile.getName(), aFile,
context,
true,
settings.isIndexToSolrEnabled(),
settings.getNamesOfEnabledKeyWordLists());
643 }
catch(Exception ex) {
644 logger.log(Level.WARNING, String.format(
"Failed to search file %s [id=%d]",
645 aFile.getName(), aFile.getId()), ex);
659 Map<String, String> metadata = extractor.
getMetadata();
660 if (!metadata.isEmpty()) {
664 extractedMetadata.putAll(metadata);
666 CharSource formattedMetadata = getMetaDataCharSource(metadata);
668 finalReader = CharSource.concat(
new CharSource() {
671 public Reader openStream()
throws IOException {
674 }, formattedMetadata).openStream();
675 }
catch (IOException ex) {
676 logger.log(Level.WARNING, String.format(
"Could not format extracted metadata for file %s [id=%d]",
677 aFile.getName(), aFile.getId()), ex);
679 finalReader = fileText;
688 String moduleName = KeywordSearchIngestModule.class.getName();
690 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
691 Collection<BlackboardArtifact> bbartifacts =
new ArrayList<>();
701 Map<BlackboardAttribute.ATTRIBUTE_TYPE, Pair<Integer, String>> intermediateMapping =
new HashMap<>();
702 for (Map.Entry<String, String> entry : metadata.entrySet()) {
703 if (entry.getValue() !=
null) {
704 Pair<BlackboardAttribute.ATTRIBUTE_TYPE, Integer> attrPair =
METADATA_TYPES_MAP.get(entry.getKey());
705 if (attrPair !=
null && attrPair.getKey() !=
null && attrPair.getValue() !=
null) {
706 intermediateMapping.compute(attrPair.getKey(), (k, v) -> {
707 if (v == null || v.getKey() > attrPair.getValue()) {
708 return Pair.of(attrPair.getValue(), entry.getValue());
717 for (Entry<BlackboardAttribute.ATTRIBUTE_TYPE, Pair<Integer, String>> interEntry: intermediateMapping.entrySet()) {
718 BlackboardAttribute attribute = checkAttribute(interEntry.getKey(), interEntry.getValue().getValue());
719 if (attribute !=
null) {
720 attributes.add(attribute);
724 if (!attributes.isEmpty()) {
726 BlackboardArtifact bbart = aFile.newDataArtifact(
new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes);
727 bbartifacts.add(bbart);
728 }
catch (TskCoreException ex) {
730 logger.log(Level.WARNING, String.format(
"Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex);
733 if (!bbartifacts.isEmpty()) {
735 Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId);
738 logger.log(Level.WARNING, String.format(
"Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex);
752 private BlackboardAttribute
checkAttribute(BlackboardAttribute.ATTRIBUTE_TYPE attrType, String value) {
753 String moduleName = KeywordSearchIngestModule.class.getName();
754 if (attrType !=
null && !value.isEmpty() && value.charAt(0) !=
' ') {
755 if (attrType.getValueType() == BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.DATETIME) {
756 SimpleDateFormat metadataDateFormat =
new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss", US);
757 Long metadataDateTime = Long.valueOf(0);
759 String metadataDate = value.replaceAll(
"T",
" ").replaceAll(
"Z",
"");
760 Date usedDate = metadataDateFormat.parse(metadataDate);
761 metadataDateTime = usedDate.getTime() / 1000;
762 return new BlackboardAttribute(attrType, moduleName, metadataDateTime);
763 }
catch (ParseException ex) {
765 logger.log(Level.WARNING, String.format(
"Failed to parse date/time %s for metadata attribute %s.", value, attrType ==
null ?
"<null>" : attrType.name()), ex);
769 return new BlackboardAttribute(attrType, moduleName, value);
785 "KeywordSearchIngestModule.metadataTitle=METADATA"
787 static CharSource getMetaDataCharSource(Map<String, String> metadata) {
788 return CharSource.wrap(
new StringBuilder(
789 String.format(
"\n\n------------------------------%s------------------------------\n\n",
790 Bundle.KeywordSearchIngestModule_metadataTitle()))
791 .append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey())
792 .map(entry -> entry.getKey() +
": " + entry.getValue())
793 .collect(Collectors.joining(
"\n"))
806 if (
context.fileIngestIsCancelled()) {
810 Ingester.getDefault().search(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context,
false,
settings.isIndexToSolrEnabled(),
settings.getNamesOfEnabledKeyWordLists());
812 }
catch (Exception ex) {
813 logger.log(Level.WARNING,
"Failed to extract strings and ingest, file '" + aFile.getName() +
"' (id: " + aFile.getId() +
").", ex);
830 private void searchFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType,
boolean indexContent) {
833 TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
841 if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
842 || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))
843 || (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase(
"txt"))) {
844 if (
context.fileIngestIsCancelled()) {
851 final long size = aFile.getSize();
854 if ((indexContent ==
false || aFile.isDir() || size == 0)) {
856 if (
context.fileIngestIsCancelled()) {
861 }
catch (IngesterException ex) {
863 logger.log(Level.WARNING,
"Unable to index meta-data for file: " + aFile.getId(), ex);
868 if (
context.fileIngestIsCancelled()) {
874 if (ARCHIVE_MIME_TYPES.contains(mimeType)) {
876 if (
context.fileIngestIsCancelled()) {
881 }
catch (IngesterException ex) {
883 logger.log(Level.WARNING,
"Unable to index meta-data for file: " + aFile.getId(), ex);
888 boolean wasTextAdded =
false;
889 Map<String, String> extractedMetadata =
new HashMap<>();
894 if (
context.fileIngestIsCancelled()) {
897 if (MimeTypes.OCTET_STREAM.equals(mimeType)) {
909 }
catch (IngesterException e) {
910 logger.log(Level.INFO,
"Could not extract text with Tika, " + aFile.getId() +
", "
911 + aFile.getName(), e);
913 }
catch (Exception e) {
914 logger.log(Level.WARNING,
"Error extracting text with Tika, " + aFile.getId() +
", "
915 + aFile.getName(), e);
919 if ((wasTextAdded ==
false) && (aFile.getNameExtension().equalsIgnoreCase(
"txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
926 if (wasTextAdded ==
false) {
933 if (!extractedMetadata.isEmpty()) {
947 Reader textReader = textFileExtractor.
getReader();
948 if (textReader ==
null) {
949 logger.log(Level.INFO,
"Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
951 Ingester.getDefault().search(textReader, aFile.getId(), aFile.getName(), aFile,
context,
true,
settings.isIndexToSolrEnabled(),
settings.getNamesOfEnabledKeyWordLists());
956 }
catch (Exception ex) {
957 logger.log(Level.WARNING,
"Unable to index " + aFile.getName(), ex);
static Case getCurrentCaseThrows()
String getCaseDirectory()
synchronized static Logger getLogger(String name)
static void warn(String title, String message)
static void error(String title, String message)
static IngestMessage createMessage(MessageType messageType, String source, String subject, String detailsHtml)
static IngestMessage createWarningMessage(String source, String subject, String detailsHtml)
static synchronized IngestServices getInstance()
static synchronized Server getServer()
final IngestServices services
boolean isLimitedOCRFile(AbstractFile aFile, String mimeType)
static final IngestModuleReferenceCounter refCounter
void startUp(IngestJobContext context)
static final Map< String, Pair< BlackboardAttribute.ATTRIBUTE_TYPE, Integer > > METADATA_TYPES_MAP
Lookup stringsExtractionContext
Reader getTikaOrTextExtractor(Optional< TextExtractor > extractorOptional, AbstractFile aFile, Map< String, String > extractedMetadata)
boolean extractStringsAndIndex(AbstractFile aFile)
boolean searchTextFile(AbstractFile aFile)
Optional< TextExtractor > getExtractor(AbstractFile abstractFile)
static final AtomicInteger instanceCount
boolean extractTextAndSearch(Optional< TextExtractor > extractorOptional, AbstractFile aFile, Map< String, String > extractedMetadata)
FileTypeDetector fileTypeDetector
final KeywordSearchJobSettings settings
static final String IMAGE_MIME_TYPE_PREFIX
static final ImmutableSet< String > OCR_DOCUMENTS
static final Logger logger
static final Map< Long, Map< Long, IngestStatus > > ingestStatus
void createMetadataArtifact(AbstractFile aFile, Map< String, String > metadata)
BlackboardAttribute checkAttribute(BlackboardAttribute.ATTRIBUTE_TYPE attrType, String value)
static void putIngestStatus(long ingestJobId, long fileId, IngestStatus status)
ProcessResult process(AbstractFile abstractFile)
static final int LIMITED_OCR_SIZE_MIN
void searchFile(Optional< TextExtractor > extractor, AbstractFile aFile, String mimeType, boolean indexContent)
int queryNumIndexedFiles()
static IndexingServerProperties getMultiUserServerProperties(String caseDirectory)
int queryNumIndexedChunks()
int queryNumIndexedDocuments()
SKIPPED_ERROR_IO
File was skipped because of IO issues reading it.
SKIPPED_ERROR_TEXTEXTRACT
File was skipped because of text extraction issues.
SKIPPED_ERROR_INDEXING
File was skipped because index engine had problems.
STRINGS_INGESTED
Text was extracted by knowing file type and text_ingested.
METADATA_INGESTED
No content, so we just text_ingested metadata.
void tryConnect(String host, int port)