Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
LuceneQuery.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2017 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.keywordsearch;
20
21import java.util.ArrayList;
22import java.util.Collection;
23import java.util.List;
24import java.util.Locale;
25import java.util.Map;
26import java.util.logging.Level;
27import org.apache.commons.lang3.StringUtils;
28import org.apache.commons.lang3.math.NumberUtils;
29import org.apache.solr.client.solrj.SolrQuery;
30import org.apache.solr.client.solrj.SolrRequest;
31import org.apache.solr.client.solrj.SolrRequest.METHOD;
32import org.apache.solr.client.solrj.response.QueryResponse;
33import org.apache.solr.common.SolrDocument;
34import org.apache.solr.common.SolrDocumentList;
35import org.apache.solr.common.params.CursorMarkParams;
36import org.sleuthkit.autopsy.coreutils.EscapeUtil;
37import org.sleuthkit.autopsy.coreutils.Logger;
38import org.sleuthkit.autopsy.coreutils.Version;
39import org.sleuthkit.datamodel.BlackboardArtifact;
40import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
41import org.sleuthkit.datamodel.BlackboardAttribute;
42import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
43import org.sleuthkit.datamodel.Content;
44import org.sleuthkit.datamodel.Score;
45import org.sleuthkit.datamodel.TskCoreException;
46import org.sleuthkit.datamodel.TskException;
47
52class LuceneQuery implements KeywordSearchQuery {
53
54 private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
55 private String keywordStringEscaped;
56 private boolean isEscaped;
57 private final Keyword originalKeyword;
58 private final KeywordList keywordList;
59 private final List<KeywordQueryFilter> filters = new ArrayList<>();
60 private String field = null;
61 private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
62 static final int SNIPPET_LENGTH = 50;
63 static final String HIGHLIGHT_FIELD = Server.Schema.TEXT.toString();
64
65 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
66
72 LuceneQuery(KeywordList keywordList, Keyword keyword) {
73 this.keywordList = keywordList;
74 this.originalKeyword = keyword;
75 this.keywordStringEscaped = this.originalKeyword.getSearchTerm();
76 }
77
78 @Override
79 public void addFilter(KeywordQueryFilter filter) {
80 this.filters.add(filter);
81 }
82
83 @Override
84 public void setField(String field) {
85 this.field = field;
86 }
87
88 @Override
89 public void setSubstringQuery() {
90 // Note that this is not a full substring search. Normally substring
91 // searches will be done with TermComponentQuery objects instead.
92 keywordStringEscaped += "*";
93 }
94
95 @Override
96 public void escape() {
97 keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(originalKeyword.getSearchTerm());
98 isEscaped = true;
99 }
100
101 @Override
102 public boolean isEscaped() {
103 return isEscaped;
104 }
105
106 @Override
107 public boolean isLiteral() {
108 return originalKeyword.searchTermIsLiteral();
109 }
110
111 @Override
112 public String getEscapedQueryString() {
113 return this.keywordStringEscaped;
114 }
115
116 @Override
117 public String getQueryString() {
118 return this.originalKeyword.getSearchTerm();
119 }
120
121 @Override
122 public KeywordList getKeywordList() {
123 return keywordList;
124 }
125
126 @Override
127 public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
128
129 final Server solrServer = KeywordSearch.getServer();
130 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
131
132 SolrQuery solrQuery = createAndConfigureSolrQuery(KeywordSearchSettings.getShowSnippets());
133
134 final String strippedQueryString = StringUtils.strip(getQueryString(), "\"");
135
136 String cursorMark = CursorMarkParams.CURSOR_MARK_START;
137 boolean allResultsProcessed = false;
138 List<KeywordHit> matches = new ArrayList<>();
139 LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults = new LanguageSpecificContentQueryHelper.QueryResults();
140 while (!allResultsProcessed) {
141 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
142 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
143 SolrDocumentList resultList = response.getResults();
144 // objectId_chunk -> "text" -> List of previews
145 Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
146
147 if (2.2 <= indexSchemaVersion) {
148 languageSpecificQueryResults.highlighting.putAll(response.getHighlighting());
149 }
150
151 for (SolrDocument resultDoc : resultList) {
152 if (2.2 <= indexSchemaVersion) {
153 Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString());
154 if (language != null) {
155 LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc);
156 }
157 }
158
159 try {
160 /*
161 * for each result doc, check that the first occurence of
162 * that term is before the window. if all the ocurences
163 * start within the window, don't record them for this
164 * chunk, they will get picked up in the next one.
165 */
166 final String docId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
167 final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
168 final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
169
170 // if the document has language, it should be hit in language specific content fields. So skip here.
171 if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) {
172 continue;
173 }
174
175 if (indexSchemaVersion < 2.0) {
176 //old schema versions don't support chunk_size or the content_str fields, so just accept hits
177 matches.add(createKeywordtHit(highlightResponse, docId));
178 } else {
179 //check against file name and actual content seperately.
180 for (Object content_obj : content) {
181 String content_str = (String) content_obj;
182 if (content_str == null) {
183 continue;
184 }
185 //for new schemas, check that the hit is before the chunk/window boundary.
186 int firstOccurence = strippedQueryString == null ? -1
187 : content_str.toLowerCase(Locale.ROOT).indexOf(strippedQueryString.toLowerCase(Locale.ROOT));
188 //there is no chunksize field for "parent" entries in the index
189 if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
190 matches.add(createKeywordtHit(highlightResponse, docId));
191 }
192 }
193 }
194 } catch (TskException ex) {
195 throw new KeywordSearchModuleException(ex);
196 }
197 }
198 String nextCursorMark = response.getNextCursorMark();
199 if (cursorMark.equals(nextCursorMark)) {
200 allResultsProcessed = true;
201 }
202 cursorMark = nextCursorMark;
203 }
204
205 List<KeywordHit> mergedMatches;
206 if (2.2 <= indexSchemaVersion) {
207 mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults);
208 } else {
209 mergedMatches = matches;
210 }
211
212 QueryResults results = new QueryResults(this);
213 //in case of single term literal query there is only 1 term
214 results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches);
215
216 return results;
217 }
218
219 @Override
220 public boolean validate() {
221 return StringUtils.isNotBlank(originalKeyword.getSearchTerm());
222 }
223
240 @Override
241 public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
242 return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId);
243 }
244
245 public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
246 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
247
248 Collection<BlackboardAttribute> attributes = new ArrayList<>();
249 if (snippet != null) {
250 attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
251 }
252 attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase()));
253 if (StringUtils.isNotBlank(listName)) {
254 attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
255 }
256
257 if (originalKW != null) {
258 BlackboardAttribute.ATTRIBUTE_TYPE selType = originalKW.getArtifactAttributeType();
259 if (selType != null) {
260 attributes.add(new BlackboardAttribute(selType, MODULE_NAME, foundKeyword.getSearchTerm()));
261 }
262
263 if (originalKW.searchTermIsWholeWord()) {
264 attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal()));
265 } else {
266 attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
267 }
268 }
269
270 hit.getArtifactID().ifPresent(artifactID
271 -> attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
272 );
273
274 try {
275 return content.newAnalysisResult(
276 BlackboardArtifact.Type.TSK_KEYWORD_HIT, Score.SCORE_LIKELY_NOTABLE,
277 null, listName, null,
278 attributes)
279 .getAnalysisResult();
280 } catch (TskCoreException e) {
281 logger.log(Level.WARNING, "Error adding bb artifact for keyword hit", e); //NON-NLS
282 return null;
283 }
284 }
285
286
287 /*
288 * Create the query object for the stored keyword
289 *
290 * @param snippets True if query should request snippets
291 *
292 * @return
293 */
294 private SolrQuery createAndConfigureSolrQuery(boolean snippets) throws NoOpenCoreException, KeywordSearchModuleException {
295 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
296
297 SolrQuery q = new SolrQuery();
298 q.setShowDebugInfo(DEBUG); //debug
299 // Wrap the query string in quotes if this is a literal search term.
300 String queryStr = originalKeyword.searchTermIsLiteral()
301 ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
302
303 // Run the query against an optional alternative field.
304 if (field != null) {
305 //use the optional field
306 queryStr = field + ":" + queryStr;
307 q.setQuery(queryStr);
308 } else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
309 q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr));
310 } else {
311 q.setQuery(queryStr);
312 }
313 q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
314 // Setting the sort order is necessary for cursor based paging to work.
315 q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
316
317 q.setFields(Server.Schema.ID.toString(),
318 Server.Schema.CHUNK_SIZE.toString(),
319 Server.Schema.CONTENT_STR.toString());
320
321 if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
322 q.addField(Server.Schema.LANGUAGE.toString());
323 LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped);
324 }
325
326 for (KeywordQueryFilter filter : filters) {
327 q.addFilterQuery(filter.toString());
328 }
329
330 if (snippets) {
331 configurwQueryForHighlighting(q);
332 }
333
334 return q;
335 }
336
343 private static void configurwQueryForHighlighting(SolrQuery q) throws NoOpenCoreException {
344 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
345 if (2.2 <= indexSchemaVersion) {
346 for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
347 q.addHighlightField(field.toString());
348 }
349 } else {
350 q.addHighlightField(HIGHLIGHT_FIELD);
351 }
352
353 q.setHighlightSnippets(1);
354 q.setHighlightFragsize(SNIPPET_LENGTH);
355
356 //tune the highlighter
357 q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
358 q.setParam("hl.tag.pre", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
359 q.setParam("hl.tag.post", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
360 q.setParam("hl.fragListBuilder", "simple"); //makes sense for FastVectorHighlighter only NON-NLS
361
362 //Solr bug if fragCharSize is smaller than Query string, StringIndexOutOfBoundsException is thrown.
363 q.setParam("hl.fragCharSize", Integer.toString(q.getQuery().length())); //makes sense for FastVectorHighlighter only NON-NLS
364
365 //docs says makes sense for the original Highlighter only, but not really
366 //analyze all content SLOW! consider lowering
367 q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
368 }
369
370 private KeywordHit createKeywordtHit(Map<String, Map<String, List<String>>> highlightResponse, String docId) throws TskException {
375 String snippet = "";
376 if (KeywordSearchSettings.getShowSnippets()) {
377 List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
378 // list is null if there wasn't a snippet
379 if (snippetList != null) {
380 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
381 }
382 }
383
384 return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
385 }
386
401 static String querySnippet(String query, long solrObjectId, boolean isRegex, boolean group) throws NoOpenCoreException {
402 return querySnippet(query, solrObjectId, 0, isRegex, group);
403 }
404
420 static String querySnippet(String query, long solrObjectId, int chunkID, boolean isRegex, boolean group) throws NoOpenCoreException {
421 SolrQuery q = new SolrQuery();
422 q.setShowDebugInfo(DEBUG); //debug
423
424 String queryStr;
425 if (isRegex) {
426 queryStr = HIGHLIGHT_FIELD + ":"
427 + (group ? KeywordSearchUtil.quoteQuery(query)
428 : query);
429 } else {
430 /*
431 * simplify query/escaping and use default field always force
432 * grouping/quotes
433 */
434 queryStr = KeywordSearchUtil.quoteQuery(query);
435 }
436 q.setQuery(queryStr);
437
438 String contentIDStr = (chunkID == 0)
439 ? Long.toString(solrObjectId)
440 : Server.getChunkIdString(solrObjectId, chunkID);
441 String idQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
442 q.addFilterQuery(idQuery);
443
444 configurwQueryForHighlighting(q);
445
446 Server solrServer = KeywordSearch.getServer();
447
448 try {
449 QueryResponse response = solrServer.query(q, METHOD.POST);
450 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
451 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
452 if (responseHighlightID == null) {
453 return "";
454 }
455 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
456 List<String> contentHighlights;
457 if (2.2 <= indexSchemaVersion) {
458 contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
459 } else {
460 contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
461 }
462 if (contentHighlights == null) {
463 return "";
464 } else {
465 // extracted content is HTML-escaped, but snippet goes in a plain text field
466 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
467 }
468 } catch (NoOpenCoreException ex) {
469 logger.log(Level.SEVERE, "Error executing Lucene Solr Query: " + query + ". Solr doc id " + solrObjectId + ", chunkID " + chunkID, ex); //NON-NLS
470 throw ex;
471 } catch (KeywordSearchModuleException ex) {
472 logger.log(Level.SEVERE, "Error executing Lucene Solr Query: " + query + ". Solr doc id " + solrObjectId + ", chunkID " + chunkID, ex); //NON-NLS
473 return "";
474 }
475 }
476}

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.