Autopsy  4.19.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
LuceneQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.logging.Level;
26 import org.apache.commons.lang3.StringUtils;
27 import org.apache.commons.lang3.math.NumberUtils;
28 import org.apache.solr.client.solrj.SolrQuery;
29 import org.apache.solr.client.solrj.SolrRequest;
30 import org.apache.solr.client.solrj.SolrRequest.METHOD;
31 import org.apache.solr.client.solrj.response.QueryResponse;
32 import org.apache.solr.common.SolrDocument;
33 import org.apache.solr.common.SolrDocumentList;
34 import org.apache.solr.common.params.CursorMarkParams;
38 import org.sleuthkit.datamodel.BlackboardArtifact;
39 import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
40 import org.sleuthkit.datamodel.BlackboardAttribute;
41 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
42 import org.sleuthkit.datamodel.Content;
43 import org.sleuthkit.datamodel.Score;
44 import org.sleuthkit.datamodel.TskCoreException;
45 import org.sleuthkit.datamodel.TskException;
46 
51 class LuceneQuery implements KeywordSearchQuery {
52 
53  private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
54  private String keywordStringEscaped;
55  private boolean isEscaped;
56  private final Keyword originalKeyword;
57  private final KeywordList keywordList;
58  private final List<KeywordQueryFilter> filters = new ArrayList<>();
59  private String field = null;
60  private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
61  static final int SNIPPET_LENGTH = 50;
62  static final String HIGHLIGHT_FIELD = Server.Schema.TEXT.toString();
63 
64  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
65 
71  LuceneQuery(KeywordList keywordList, Keyword keyword) {
72  this.keywordList = keywordList;
73  this.originalKeyword = keyword;
74  this.keywordStringEscaped = this.originalKeyword.getSearchTerm();
75  }
76 
77  @Override
78  public void addFilter(KeywordQueryFilter filter) {
79  this.filters.add(filter);
80  }
81 
82  @Override
83  public void setField(String field) {
84  this.field = field;
85  }
86 
87  @Override
88  public void setSubstringQuery() {
89  // Note that this is not a full substring search. Normally substring
90  // searches will be done with TermComponentQuery objects instead.
91  keywordStringEscaped += "*";
92  }
93 
94  @Override
95  public void escape() {
96  keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(originalKeyword.getSearchTerm());
97  isEscaped = true;
98  }
99 
100  @Override
101  public boolean isEscaped() {
102  return isEscaped;
103  }
104 
105  @Override
106  public boolean isLiteral() {
107  return originalKeyword.searchTermIsLiteral();
108  }
109 
110  @Override
111  public String getEscapedQueryString() {
112  return this.keywordStringEscaped;
113  }
114 
115  @Override
116  public String getQueryString() {
117  return this.originalKeyword.getSearchTerm();
118  }
119 
120  @Override
121  public KeywordList getKeywordList() {
122  return keywordList;
123  }
124 
125  @Override
126  public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
127 
128  final Server solrServer = KeywordSearch.getServer();
129  double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
130 
131  SolrQuery solrQuery = createAndConfigureSolrQuery(KeywordSearchSettings.getShowSnippets());
132 
133  final String strippedQueryString = StringUtils.strip(getQueryString(), "\"");
134 
135  String cursorMark = CursorMarkParams.CURSOR_MARK_START;
136  boolean allResultsProcessed = false;
137  List<KeywordHit> matches = new ArrayList<>();
138  LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults = new LanguageSpecificContentQueryHelper.QueryResults();
139  while (!allResultsProcessed) {
140  solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
141  QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
142  SolrDocumentList resultList = response.getResults();
143  // objectId_chunk -> "text" -> List of previews
144  Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
145 
146  if (2.2 <= indexSchemaVersion) {
147  languageSpecificQueryResults.highlighting.putAll(response.getHighlighting());
148  }
149 
150  for (SolrDocument resultDoc : resultList) {
151  if (2.2 <= indexSchemaVersion) {
152  Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString());
153  if (language != null) {
154  LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc);
155  }
156  }
157 
158  try {
159  /*
160  * for each result doc, check that the first occurence of
161  * that term is before the window. if all the ocurences
162  * start within the window, don't record them for this
163  * chunk, they will get picked up in the next one.
164  */
165  final String docId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
166  final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
167  final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
168 
169  // if the document has language, it should be hit in language specific content fields. So skip here.
170  if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) {
171  continue;
172  }
173 
174  if (indexSchemaVersion < 2.0) {
175  //old schema versions don't support chunk_size or the content_str fields, so just accept hits
176  matches.add(createKeywordtHit(highlightResponse, docId));
177  } else {
178  //check against file name and actual content seperately.
179  for (Object content_obj : content) {
180  String content_str = (String) content_obj;
181  //for new schemas, check that the hit is before the chunk/window boundary.
182  int firstOccurence = StringUtils.indexOfIgnoreCase(content_str, strippedQueryString);
183  //there is no chunksize field for "parent" entries in the index
184  if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
185  matches.add(createKeywordtHit(highlightResponse, docId));
186  }
187  }
188  }
189  } catch (TskException ex) {
190  throw new KeywordSearchModuleException(ex);
191  }
192  }
193  String nextCursorMark = response.getNextCursorMark();
194  if (cursorMark.equals(nextCursorMark)) {
195  allResultsProcessed = true;
196  }
197  cursorMark = nextCursorMark;
198  }
199 
200  List<KeywordHit> mergedMatches;
201  if (2.2 <= indexSchemaVersion) {
202  mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults);
203  } else {
204  mergedMatches = matches;
205  }
206 
207  QueryResults results = new QueryResults(this);
208  //in case of single term literal query there is only 1 term
209  results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches);
210 
211  return results;
212  }
213 
214  @Override
215  public boolean validate() {
216  return StringUtils.isNotBlank(originalKeyword.getSearchTerm());
217  }
218 
235  @Override
236  public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
237  final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
238 
239  Collection<BlackboardAttribute> attributes = new ArrayList<>();
240  if (snippet != null) {
241  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
242  }
243  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
244  if (StringUtils.isNotBlank(listName)) {
245  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
246  }
247 
248  if (originalKeyword != null) {
249  BlackboardAttribute.ATTRIBUTE_TYPE selType = originalKeyword.getArtifactAttributeType();
250  if (selType != null) {
251  attributes.add(new BlackboardAttribute(selType, MODULE_NAME, foundKeyword.getSearchTerm()));
252  }
253 
254  if (originalKeyword.searchTermIsWholeWord()) {
255  attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal()));
256  } else {
257  attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
258  }
259  }
260 
261  hit.getArtifactID().ifPresent(artifactID
262  -> attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
263  );
264 
265  try {
266  return content.newAnalysisResult(
267  BlackboardArtifact.Type.TSK_KEYWORD_HIT, Score.SCORE_LIKELY_NOTABLE,
268  null, listName, null,
269  attributes)
270  .getAnalysisResult();
271  } catch (TskCoreException e) {
272  logger.log(Level.WARNING, "Error adding bb artifact for keyword hit", e); //NON-NLS
273  return null;
274  }
275  }
276 
277 
278  /*
279  * Create the query object for the stored keyword
280  *
281  * @param snippets True if query should request snippets
282  *
283  * @return
284  */
285  private SolrQuery createAndConfigureSolrQuery(boolean snippets) throws NoOpenCoreException, KeywordSearchModuleException {
286  double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
287 
288  SolrQuery q = new SolrQuery();
289  q.setShowDebugInfo(DEBUG); //debug
290  // Wrap the query string in quotes if this is a literal search term.
291  String queryStr = originalKeyword.searchTermIsLiteral()
292  ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
293 
294  // Run the query against an optional alternative field.
295  if (field != null) {
296  //use the optional field
297  queryStr = field + ":" + queryStr;
298  q.setQuery(queryStr);
299  } else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
300  q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr));
301  } else {
302  q.setQuery(queryStr);
303  }
304  q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
305  // Setting the sort order is necessary for cursor based paging to work.
306  q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
307 
308  q.setFields(Server.Schema.ID.toString(),
309  Server.Schema.CHUNK_SIZE.toString(),
310  Server.Schema.CONTENT_STR.toString());
311 
312  if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
313  q.addField(Server.Schema.LANGUAGE.toString());
314  LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped);
315  }
316 
317  for (KeywordQueryFilter filter : filters) {
318  q.addFilterQuery(filter.toString());
319  }
320 
321  if (snippets) {
322  configurwQueryForHighlighting(q);
323  }
324 
325  return q;
326  }
327 
334  private static void configurwQueryForHighlighting(SolrQuery q) throws NoOpenCoreException {
335  double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
336  if (2.2 <= indexSchemaVersion) {
337  for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
338  q.addHighlightField(field.toString());
339  }
340  } else {
341  q.addHighlightField(HIGHLIGHT_FIELD);
342  }
343 
344  q.setHighlightSnippets(1);
345  q.setHighlightFragsize(SNIPPET_LENGTH);
346 
347  //tune the highlighter
348  q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
349  q.setParam("hl.tag.pre", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
350  q.setParam("hl.tag.post", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
351  q.setParam("hl.fragListBuilder", "simple"); //makes sense for FastVectorHighlighter only NON-NLS
352 
353  //Solr bug if fragCharSize is smaller than Query string, StringIndexOutOfBoundsException is thrown.
354  q.setParam("hl.fragCharSize", Integer.toString(q.getQuery().length())); //makes sense for FastVectorHighlighter only NON-NLS
355 
356  //docs says makes sense for the original Highlighter only, but not really
357  //analyze all content SLOW! consider lowering
358  q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
359  }
360 
361  private KeywordHit createKeywordtHit(Map<String, Map<String, List<String>>> highlightResponse, String docId) throws TskException {
366  String snippet = "";
367  if (KeywordSearchSettings.getShowSnippets()) {
368  List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
369  // list is null if there wasn't a snippet
370  if (snippetList != null) {
371  snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
372  }
373  }
374 
375  return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
376  }
377 
392  static String querySnippet(String query, long solrObjectId, boolean isRegex, boolean group) throws NoOpenCoreException {
393  return querySnippet(query, solrObjectId, 0, isRegex, group);
394  }
395 
411  static String querySnippet(String query, long solrObjectId, int chunkID, boolean isRegex, boolean group) throws NoOpenCoreException {
412  SolrQuery q = new SolrQuery();
413  q.setShowDebugInfo(DEBUG); //debug
414 
415  String queryStr;
416  if (isRegex) {
417  queryStr = HIGHLIGHT_FIELD + ":"
418  + (group ? KeywordSearchUtil.quoteQuery(query)
419  : query);
420  } else {
421  /*
422  * simplify query/escaping and use default field always force
423  * grouping/quotes
424  */
425  queryStr = KeywordSearchUtil.quoteQuery(query);
426  }
427  q.setQuery(queryStr);
428 
429  String contentIDStr = (chunkID == 0)
430  ? Long.toString(solrObjectId)
431  : Server.getChunkIdString(solrObjectId, chunkID);
432  String idQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
433  q.addFilterQuery(idQuery);
434 
435  configurwQueryForHighlighting(q);
436 
437  Server solrServer = KeywordSearch.getServer();
438 
439  try {
440  QueryResponse response = solrServer.query(q, METHOD.POST);
441  Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
442  Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
443  if (responseHighlightID == null) {
444  return "";
445  }
446  double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
447  List<String> contentHighlights;
448  if (2.2 <= indexSchemaVersion) {
449  contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
450  } else {
451  contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
452  }
453  if (contentHighlights == null) {
454  return "";
455  } else {
456  // extracted content is HTML-escaped, but snippet goes in a plain text field
457  return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
458  }
459  } catch (NoOpenCoreException ex) {
460  logger.log(Level.SEVERE, "Error executing Lucene Solr Query: " + query + ". Solr doc id " + solrObjectId + ", chunkID " + chunkID, ex); //NON-NLS
461  throw ex;
462  } catch (KeywordSearchModuleException ex) {
463  logger.log(Level.SEVERE, "Error executing Lucene Solr Query: " + query + ". Solr doc id " + solrObjectId + ", chunkID " + chunkID, ex); //NON-NLS
464  return "";
465  }
466  }
467 }

Copyright © 2012-2021 Basis Technology. Generated on: Thu Sep 30 2021
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.