19package org.sleuthkit.autopsy.keywordsearch;
21import java.util.ArrayList;
22import java.util.Collection;
24import java.util.Locale;
26import java.util.logging.Level;
27import org.apache.commons.lang3.StringUtils;
28import org.apache.commons.lang3.math.NumberUtils;
29import org.apache.solr.client.solrj.SolrQuery;
30import org.apache.solr.client.solrj.SolrRequest;
31import org.apache.solr.client.solrj.SolrRequest.METHOD;
32import org.apache.solr.client.solrj.response.QueryResponse;
33import org.apache.solr.common.SolrDocument;
34import org.apache.solr.common.SolrDocumentList;
35import org.apache.solr.common.params.CursorMarkParams;
36import org.sleuthkit.autopsy.coreutils.EscapeUtil;
37import org.sleuthkit.autopsy.coreutils.Logger;
38import org.sleuthkit.autopsy.coreutils.Version;
39import org.sleuthkit.datamodel.BlackboardArtifact;
40import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
41import org.sleuthkit.datamodel.BlackboardAttribute;
42import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
43import org.sleuthkit.datamodel.Content;
44import org.sleuthkit.datamodel.Score;
45import org.sleuthkit.datamodel.TskCoreException;
46import org.sleuthkit.datamodel.TskException;
54 private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
55 private String keywordStringEscaped;
56 private boolean isEscaped;
57 private final Keyword originalKeyword;
58 private final KeywordList keywordList;
59 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
60 private String field =
null;
61 private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
62 static final int SNIPPET_LENGTH = 50;
63 static final String HIGHLIGHT_FIELD = Server.Schema.TEXT.toString();
65 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
72 LuceneQuery(KeywordList keywordList, Keyword keyword) {
73 this.keywordList = keywordList;
74 this.originalKeyword = keyword;
75 this.keywordStringEscaped = this.originalKeyword.getSearchTerm();
79 public void addFilter(KeywordQueryFilter filter) {
80 this.filters.add(filter);
84 public void setField(String field) {
92 keywordStringEscaped +=
"*";
97 keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(originalKeyword.getSearchTerm());
108 return originalKeyword.searchTermIsLiteral();
113 return this.keywordStringEscaped;
118 return this.originalKeyword.getSearchTerm();
127 public QueryResults
performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
129 final Server solrServer = KeywordSearch.getServer();
130 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
132 SolrQuery solrQuery = createAndConfigureSolrQuery(KeywordSearchSettings.getShowSnippets());
134 final String strippedQueryString = StringUtils.strip(
getQueryString(),
"\"");
136 String cursorMark = CursorMarkParams.CURSOR_MARK_START;
137 boolean allResultsProcessed =
false;
138 List<KeywordHit> matches =
new ArrayList<>();
139 LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults =
new LanguageSpecificContentQueryHelper.QueryResults();
140 while (!allResultsProcessed) {
141 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
142 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
143 SolrDocumentList resultList = response.getResults();
145 Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
147 if (2.2 <= indexSchemaVersion) {
148 languageSpecificQueryResults.highlighting.putAll(response.getHighlighting());
151 for (SolrDocument resultDoc : resultList) {
152 if (2.2 <= indexSchemaVersion) {
153 Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString());
154 if (language !=
null) {
155 LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc);
166 final String docId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
167 final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
168 final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
171 if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) {
175 if (indexSchemaVersion < 2.0) {
177 matches.add(createKeywordtHit(highlightResponse, docId));
180 for (Object content_obj : content) {
181 String content_str = (String) content_obj;
182 if (content_str ==
null) {
186 int firstOccurence = strippedQueryString ==
null ? -1
187 : content_str.toLowerCase(Locale.ROOT).indexOf(strippedQueryString.toLowerCase(Locale.ROOT));
189 if (chunkSize ==
null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
190 matches.add(createKeywordtHit(highlightResponse, docId));
194 }
catch (TskException ex) {
195 throw new KeywordSearchModuleException(ex);
198 String nextCursorMark = response.getNextCursorMark();
199 if (cursorMark.equals(nextCursorMark)) {
200 allResultsProcessed =
true;
202 cursorMark = nextCursorMark;
205 List<KeywordHit> mergedMatches;
206 if (2.2 <= indexSchemaVersion) {
207 mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults);
209 mergedMatches = matches;
212 QueryResults results =
new QueryResults(
this);
214 results.addResult(
new Keyword(originalKeyword.getSearchTerm(),
true,
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches);
221 return StringUtils.isNotBlank(originalKeyword.getSearchTerm());
241 public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
242 return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId);
245 public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
246 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
248 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
249 if (snippet !=
null) {
250 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
252 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase()));
253 if (StringUtils.isNotBlank(listName)) {
254 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
257 if (originalKW !=
null) {
258 BlackboardAttribute.ATTRIBUTE_TYPE selType = originalKW.getArtifactAttributeType();
259 if (selType !=
null) {
260 attributes.add(
new BlackboardAttribute(selType, MODULE_NAME, foundKeyword.getSearchTerm()));
263 if (originalKW.searchTermIsWholeWord()) {
264 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal()));
266 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
270 hit.getArtifactID().ifPresent(artifactID
271 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
275 return content.newAnalysisResult(
276 BlackboardArtifact.Type.TSK_KEYWORD_HIT, Score.SCORE_LIKELY_NOTABLE,
277 null, listName,
null,
279 .getAnalysisResult();
280 }
catch (TskCoreException e) {
281 logger.log(Level.WARNING,
"Error adding bb artifact for keyword hit", e);
294 private SolrQuery createAndConfigureSolrQuery(
boolean snippets)
throws NoOpenCoreException, KeywordSearchModuleException {
295 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
297 SolrQuery q =
new SolrQuery();
298 q.setShowDebugInfo(DEBUG);
300 String queryStr = originalKeyword.searchTermIsLiteral()
301 ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
306 queryStr = field +
":" + queryStr;
307 q.setQuery(queryStr);
308 }
else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
309 q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr));
311 q.setQuery(queryStr);
313 q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
315 q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
317 q.setFields(Server.Schema.ID.toString(),
318 Server.Schema.CHUNK_SIZE.toString(),
319 Server.Schema.CONTENT_STR.toString());
321 if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
322 q.addField(Server.Schema.LANGUAGE.toString());
323 LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped);
326 for (KeywordQueryFilter filter : filters) {
327 q.addFilterQuery(filter.toString());
331 configurwQueryForHighlighting(q);
343 private static void configurwQueryForHighlighting(SolrQuery q)
throws NoOpenCoreException {
344 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
345 if (2.2 <= indexSchemaVersion) {
346 for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
347 q.addHighlightField(field.toString());
350 q.addHighlightField(HIGHLIGHT_FIELD);
353 q.setHighlightSnippets(1);
354 q.setHighlightFragsize(SNIPPET_LENGTH);
357 q.setParam(
"hl.useFastVectorHighlighter",
"on");
358 q.setParam(
"hl.tag.pre",
"«");
359 q.setParam(
"hl.tag.post",
"«");
360 q.setParam(
"hl.fragListBuilder",
"simple");
363 q.setParam(
"hl.fragCharSize", Integer.toString(q.getQuery().length()));
367 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
370 private KeywordHit createKeywordtHit(Map<String, Map<String, List<String>>> highlightResponse, String docId)
throws TskException {
376 if (KeywordSearchSettings.getShowSnippets()) {
377 List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
379 if (snippetList !=
null) {
380 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
384 return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
401 static String querySnippet(String query,
long solrObjectId,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
402 return querySnippet(query, solrObjectId, 0, isRegex, group);
420 static String querySnippet(String query,
long solrObjectId,
int chunkID,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
421 SolrQuery q =
new SolrQuery();
422 q.setShowDebugInfo(DEBUG);
426 queryStr = HIGHLIGHT_FIELD +
":"
427 + (group ? KeywordSearchUtil.quoteQuery(query)
434 queryStr = KeywordSearchUtil.quoteQuery(query);
436 q.setQuery(queryStr);
438 String contentIDStr = (chunkID == 0)
439 ? Long.toString(solrObjectId)
440 : Server.getChunkIdString(solrObjectId, chunkID);
441 String idQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
442 q.addFilterQuery(idQuery);
444 configurwQueryForHighlighting(q);
446 Server solrServer = KeywordSearch.getServer();
449 QueryResponse response = solrServer.query(q, METHOD.POST);
450 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
451 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
452 if (responseHighlightID ==
null) {
455 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
456 List<String> contentHighlights;
457 if (2.2 <= indexSchemaVersion) {
458 contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(
null);
460 contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
462 if (contentHighlights ==
null) {
466 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
468 }
catch (NoOpenCoreException ex) {
469 logger.log(Level.SEVERE,
"Error executing Lucene Solr Query: " + query +
". Solr doc id " + solrObjectId +
", chunkID " + chunkID, ex);
471 }
catch (KeywordSearchModuleException ex) {
472 logger.log(Level.SEVERE,
"Error executing Lucene Solr Query: " + query +
". Solr doc id " + solrObjectId +
", chunkID " + chunkID, ex);
KeywordList getKeywordList()
QueryResults performQuery()
String getEscapedQueryString()