Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
LuceneQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2015 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Comparator;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.Set;
28 import java.util.TreeSet;
29 import java.util.logging.Level;
30 import org.apache.solr.client.solrj.SolrQuery;
31 import org.apache.solr.client.solrj.SolrRequest.METHOD;
32 import org.apache.solr.client.solrj.response.QueryResponse;
33 import org.apache.solr.common.SolrDocument;
34 import org.apache.solr.common.SolrDocumentList;
39 import org.sleuthkit.datamodel.BlackboardArtifact;
40 import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
41 import org.sleuthkit.datamodel.BlackboardAttribute;
42 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
43 import org.sleuthkit.datamodel.SleuthkitCase;
44 import org.sleuthkit.datamodel.TskException;
45 
50 class LuceneQuery implements KeywordSearchQuery {
51 
52  private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
53  private final String keywordString; //original unescaped query
54  private String keywordStringEscaped;
55  private boolean isEscaped;
56  private Keyword keyword = null;
57  private KeywordList keywordList = null;
58  private final List<KeywordQueryFilter> filters = new ArrayList<>();
59  private String field = null;
60  private static final int MAX_RESULTS = 20000;
61  static final int SNIPPET_LENGTH = 50;
62  //can use different highlight schema fields for regex and literal search
63  static final String HIGHLIGHT_FIELD_LITERAL = Server.Schema.TEXT.toString();
64  static final String HIGHLIGHT_FIELD_REGEX = Server.Schema.TEXT.toString();
65  //TODO use content_ws stored="true" in solr schema for perfect highlight hits
66  //static final String HIGHLIGHT_FIELD_REGEX = Server.Schema.CONTENT_WS.toString()
67 
68  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
69 
75  public LuceneQuery(KeywordList keywordList, Keyword keyword) {
76  this.keywordList = keywordList;
77  this.keyword = keyword;
78 
79  // @@@ BC: Long-term, we should try to get rid of this string and use only the
80  // keyword object. Refactoring did not make its way through this yet.
81  this.keywordString = keyword.getSearchTerm();
82  this.keywordStringEscaped = this.keywordString;
83  }
84 
85  @Override
86  public void addFilter(KeywordQueryFilter filter) {
87  this.filters.add(filter);
88  }
89 
90  @Override
91  public void setField(String field) {
92  this.field = field;
93  }
94 
95  @Override
96  public void setSubstringQuery() {
97  // Note that this is not a full substring search. Normally substring
98  // searches will be done with TermComponentQuery objects instead.
99  keywordStringEscaped = keywordStringEscaped + "*";
100  }
101 
102  @Override
103  public void escape() {
104  keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(keywordString);
105  isEscaped = true;
106  }
107 
108  @Override
109  public boolean isEscaped() {
110  return isEscaped;
111  }
112 
113  @Override
114  public boolean isLiteral() {
115  return true;
116  }
117 
118  @Override
119  public String getEscapedQueryString() {
120  return this.keywordStringEscaped;
121  }
122 
123  @Override
124  public String getQueryString() {
125  return this.keywordString;
126  }
127 
128  @Override
129  public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
130  QueryResults results = new QueryResults(this, keywordList);
131  //in case of single term literal query there is only 1 term
132  boolean showSnippets = KeywordSearchSettings.getShowSnippets();
133  results.addResult(new Keyword(keywordString, true), performLuceneQuery(showSnippets));
134 
135  return results;
136  }
137 
138  @Override
139  public boolean validate() {
140  return keywordString != null && !keywordString.equals("");
141  }
142 
143  @Override
144  public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String termHit, KeywordHit hit, String snippet, String listName) {
145  final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
146 
147  Collection<BlackboardAttribute> attributes = new ArrayList<>();
148  BlackboardArtifact bba;
149  KeywordCachedArtifact writeResult;
150  try {
151  bba = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
152  writeResult = new KeywordCachedArtifact(bba);
153  } catch (Exception e) {
154  logger.log(Level.WARNING, "Error adding bb artifact for keyword hit", e); //NON-NLS
155  return null;
156  }
157 
158  if (snippet != null) {
159  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
160  }
161  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, termHit));
162  if ((listName != null) && (listName.equals("") == false)) {
163  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
164  }
165 
166  //bogus - workaround the dir tree table issue
167  //attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "", ""));
168  //selector
169  if (keyword != null) {
170  BlackboardAttribute.ATTRIBUTE_TYPE selType = keyword.getArtifactAttributeType();
171  if (selType != null) {
172  attributes.add(new BlackboardAttribute(selType, MODULE_NAME, termHit));
173  }
174  }
175 
176  if (hit.isArtifactHit()) {
177  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
178  }
179 
180  try {
181  bba.addAttributes(attributes); //write out to bb
182  writeResult.add(attributes);
183  return writeResult;
184  } catch (TskException e) {
185  logger.log(Level.WARNING, "Error adding bb attributes to artifact", e); //NON-NLS
186  }
187  return null;
188  }
189 
200  private List<KeywordHit> performLuceneQuery(boolean snippets) throws KeywordSearchModuleException, NoOpenCoreException {
201  List<KeywordHit> matches = new ArrayList<>();
202  boolean allMatchesFetched = false;
203  final Server solrServer = KeywordSearch.getServer();
204 
205  SolrQuery q = createAndConfigureSolrQuery(snippets);
206  QueryResponse response;
207  SolrDocumentList resultList;
208  Map<String, Map<String, List<String>>> highlightResponse;
209 
210  response = solrServer.query(q, METHOD.POST);
211 
212  resultList = response.getResults();
213 
214  // objectId_chunk -> "text" -> List of previews
215  highlightResponse = response.getHighlighting();
216 
217  // cycle through results in sets of MAX_RESULTS
218  for (int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
219  q.setStart(start);
220 
221  allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound();
222 
223  SleuthkitCase sleuthkitCase;
224  try {
225  sleuthkitCase = Case.getCurrentCase().getSleuthkitCase();
226  } catch (IllegalStateException ex) {
227  //no case open, must be just closed
228  return matches;
229  }
230  for (SolrDocument resultDoc : resultList) {
231  KeywordHit contentHit;
232  try {
233  contentHit = createKeywordtHit(resultDoc, highlightResponse, sleuthkitCase);
234  } catch (TskException ex) {
235  return matches;
236  }
237  matches.add(contentHit);
238  }
239  }
240  return matches;
241  }
242 
250  private SolrQuery createAndConfigureSolrQuery(boolean snippets) {
251  SolrQuery q = new SolrQuery();
252  q.setShowDebugInfo(DEBUG); //debug
253  //set query, force quotes/grouping around all literal queries
254  final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped);
255  String theQueryStr = groupedQuery;
256  if (field != null) {
257  //use the optional field
258  StringBuilder sb = new StringBuilder();
259  sb.append(field).append(":").append(groupedQuery);
260  theQueryStr = sb.toString();
261  }
262  q.setQuery(theQueryStr);
263  q.setRows(MAX_RESULTS);
264 
265  q.setFields(Server.Schema.ID.toString());
266  q.addSort(Server.Schema.ID.toString(), SolrQuery.ORDER.asc);
267  for (KeywordQueryFilter filter : filters) {
268  q.addFilterQuery(filter.toString());
269  }
270 
271  if (snippets) {
272  q.addHighlightField(Server.Schema.TEXT.toString());
273  //q.setHighlightSimplePre("&laquo;"); //original highlighter only
274  //q.setHighlightSimplePost("&raquo;"); //original highlighter only
275  q.setHighlightSnippets(1);
276  q.setHighlightFragsize(SNIPPET_LENGTH);
277 
278  //tune the highlighter
279  q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
280  q.setParam("hl.tag.pre", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
281  q.setParam("hl.tag.post", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
282  q.setParam("hl.fragListBuilder", "simple"); //makes sense for FastVectorHighlighter only NON-NLS
283 
284  //Solr bug if fragCharSize is smaller than Query string, StringIndexOutOfBoundsException is thrown.
285  q.setParam("hl.fragCharSize", Integer.toString(theQueryStr.length())); //makes sense for FastVectorHighlighter only NON-NLS
286 
287  //docs says makes sense for the original Highlighter only, but not really
288  //analyze all content SLOW! consider lowering
289  q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
290  }
291 
292  return q;
293  }
294 
295  private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SleuthkitCase caseDb) throws TskException {
300  final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
301  String snippet = "";
302  if (KeywordSearchSettings.getShowSnippets()) {
303  List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
304  // list is null if there wasn't a snippet
305  if (snippetList != null) {
306  snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
307  }
308  }
309  return new KeywordHit(docId, snippet);
310  }
311 
326  public static String querySnippet(String query, long solrObjectId, boolean isRegex, boolean group) throws NoOpenCoreException {
327  return querySnippet(query, solrObjectId, 0, isRegex, group);
328  }
329 
345  public static String querySnippet(String query, long solrObjectId, int chunkID, boolean isRegex, boolean group) throws NoOpenCoreException {
346  Server solrServer = KeywordSearch.getServer();
347 
348  String highlightField;
349  if (isRegex) {
350  highlightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
351  } else {
352  highlightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
353  }
354 
355  SolrQuery q = new SolrQuery();
356 
357  String queryStr;
358 
359  if (isRegex) {
360  StringBuilder sb = new StringBuilder();
361  sb.append(highlightField).append(":");
362  if (group) {
363  sb.append("\"");
364  }
365  sb.append(query);
366  if (group) {
367  sb.append("\"");
368  }
369 
370  queryStr = sb.toString();
371  } else {
372  //simplify query/escaping and use default field
373  //always force grouping/quotes
374  queryStr = KeywordSearchUtil.quoteQuery(query);
375  }
376 
377  q.setQuery(queryStr);
378 
379  String contentIDStr;
380 
381  if (chunkID == 0) {
382  contentIDStr = Long.toString(solrObjectId);
383  } else {
384  contentIDStr = Server.getChunkIdString(solrObjectId, chunkID);
385  }
386 
387  String idQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
388  q.setShowDebugInfo(DEBUG); //debug
389  q.addFilterQuery(idQuery);
390  q.addHighlightField(highlightField);
391  //q.setHighlightSimplePre("&laquo;"); //original highlighter only
392  //q.setHighlightSimplePost("&raquo;"); //original highlighter only
393  q.setHighlightSnippets(1);
394  q.setHighlightFragsize(SNIPPET_LENGTH);
395 
396  //tune the highlighter
397  q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
398  q.setParam("hl.tag.pre", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
399  q.setParam("hl.tag.post", "&laquo;"); //makes sense for FastVectorHighlighter only NON-NLS
400  q.setParam("hl.fragListBuilder", "simple"); //makes sense for FastVectorHighlighter only NON-NLS
401 
402  //Solr bug if fragCharSize is smaller than Query string, StringIndexOutOfBoundsException is thrown.
403  q.setParam("hl.fragCharSize", Integer.toString(queryStr.length())); //makes sense for FastVectorHighlighter only NON-NLS
404 
405  //docs says makes sense for the original Highlighter only, but not really
406  //analyze all content SLOW! consider lowering
407  q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
408 
409  try {
410  QueryResponse response = solrServer.query(q, METHOD.POST);
411  Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
412  Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
413  if (responseHighlightID == null) {
414  return "";
415  }
416  List<String> contentHighlights = responseHighlightID.get(highlightField);
417  if (contentHighlights == null) {
418  return "";
419  } else {
420  // extracted content is HTML-escaped, but snippet goes in a plain text field
421  return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
422  }
423  } catch (NoOpenCoreException ex) {
424  logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + query, ex); //NON-NLS
425  throw ex;
426  } catch (KeywordSearchModuleException ex) {
427  logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + query, ex); //NON-NLS
428  return "";
429  }
430  }
431 
432  @Override
433  public KeywordList getKeywordList() {
434  return keywordList;
435  }
436 
441  private class SolrDocumentComparatorIgnoresChunkId implements Comparator<SolrDocument> {
442 
443  @Override
444  public int compare(SolrDocument left, SolrDocument right) {
445  // ID is in the form of ObjectId_Chunk
446 
447  final String idName = Server.Schema.ID.toString();
448 
449  // get object id of left doc
450  String leftID = left.getFieldValue(idName).toString();
451  int index = leftID.indexOf(Server.CHUNK_ID_SEPARATOR);
452  if (index != -1) {
453  leftID = leftID.substring(0, index);
454  }
455 
456  // get object id of right doc
457  String rightID = right.getFieldValue(idName).toString();
458  index = rightID.indexOf(Server.CHUNK_ID_SEPARATOR);
459  if (index != -1) {
460  rightID = rightID.substring(0, index);
461  }
462 
463  Long leftLong = new Long(leftID);
464  Long rightLong = new Long(rightID);
465  return leftLong.compareTo(rightLong);
466  }
467  }
468 
469 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.