19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Comparator;
25 import java.util.List;
28 import java.util.TreeSet;
29 import java.util.logging.Level;
30 import org.apache.solr.client.solrj.SolrQuery;
31 import org.apache.solr.client.solrj.SolrRequest.METHOD;
32 import org.apache.solr.client.solrj.response.QueryResponse;
33 import org.apache.solr.common.SolrDocument;
34 import org.apache.solr.common.SolrDocumentList;
40 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
42 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
50 class LuceneQuery
implements KeywordSearchQuery {
52 private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
53 private final String keywordString;
54 private String keywordStringEscaped;
55 private boolean isEscaped;
56 private Keyword keyword = null;
57 private KeywordList keywordList = null;
58 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
59 private String field = null;
60 private static final int MAX_RESULTS = 20000;
61 static final int SNIPPET_LENGTH = 50;
63 static final String HIGHLIGHT_FIELD_LITERAL = Server.Schema.TEXT.toString();
64 static final String HIGHLIGHT_FIELD_REGEX = Server.Schema.TEXT.toString();
68 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
75 public LuceneQuery(KeywordList keywordList, Keyword keyword) {
76 this.keywordList = keywordList;
77 this.keyword = keyword;
81 this.keywordString = keyword.getSearchTerm();
82 this.keywordStringEscaped = this.keywordString;
86 public void addFilter(KeywordQueryFilter filter) {
87 this.filters.add(filter);
91 public void setField(String field) {
96 public void setSubstringQuery() {
99 keywordStringEscaped = keywordStringEscaped +
"*";
103 public void escape() {
104 keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(keywordString);
109 public boolean isEscaped() {
114 public boolean isLiteral() {
119 public String getEscapedQueryString() {
120 return this.keywordStringEscaped;
124 public String getQueryString() {
125 return this.keywordString;
129 public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
130 QueryResults results =
new QueryResults(
this, keywordList);
132 boolean showSnippets = KeywordSearchSettings.getShowSnippets();
133 results.addResult(
new Keyword(keywordString,
true), performLuceneQuery(showSnippets));
139 public boolean validate() {
140 return keywordString != null && !keywordString.equals(
"");
144 public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String termHit, KeywordHit hit, String snippet, String listName) {
145 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
147 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
148 BlackboardArtifact bba;
149 KeywordCachedArtifact writeResult;
151 bba = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
152 writeResult =
new KeywordCachedArtifact(bba);
153 }
catch (Exception e) {
154 logger.log(Level.WARNING,
"Error adding bb artifact for keyword hit", e);
158 if (snippet != null) {
159 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
161 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, termHit));
162 if ((listName != null) && (listName.equals(
"") ==
false)) {
163 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
169 if (keyword != null) {
170 BlackboardAttribute.ATTRIBUTE_TYPE selType = keyword.getArtifactAttributeType();
171 if (selType != null) {
172 attributes.add(
new BlackboardAttribute(selType, MODULE_NAME, termHit));
176 if (hit.isArtifactHit()) {
177 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
181 bba.addAttributes(attributes);
182 writeResult.add(attributes);
184 }
catch (TskException e) {
185 logger.log(Level.WARNING,
"Error adding bb attributes to artifact", e);
200 private List<KeywordHit> performLuceneQuery(
boolean snippets)
throws KeywordSearchModuleException, NoOpenCoreException {
201 List<KeywordHit> matches =
new ArrayList<>();
202 boolean allMatchesFetched =
false;
203 final Server solrServer = KeywordSearch.getServer();
205 SolrQuery q = createAndConfigureSolrQuery(snippets);
206 QueryResponse response;
207 SolrDocumentList resultList;
208 Map<String, Map<String, List<String>>> highlightResponse;
210 response = solrServer.query(q, METHOD.POST);
212 resultList = response.getResults();
215 highlightResponse = response.getHighlighting();
218 for (
int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
221 allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound();
223 SleuthkitCase sleuthkitCase;
225 sleuthkitCase = Case.getCurrentCase().getSleuthkitCase();
226 }
catch (IllegalStateException ex) {
230 for (SolrDocument resultDoc : resultList) {
231 KeywordHit contentHit;
233 contentHit = createKeywordtHit(resultDoc, highlightResponse, sleuthkitCase);
234 }
catch (TskException ex) {
237 matches.add(contentHit);
250 private SolrQuery createAndConfigureSolrQuery(
boolean snippets) {
251 SolrQuery q =
new SolrQuery();
252 q.setShowDebugInfo(DEBUG);
254 final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped);
255 String theQueryStr = groupedQuery;
258 StringBuilder sb =
new StringBuilder();
259 sb.append(field).append(
":").append(groupedQuery);
260 theQueryStr = sb.toString();
262 q.setQuery(theQueryStr);
263 q.setRows(MAX_RESULTS);
265 q.setFields(Server.Schema.ID.toString());
266 q.addSort(Server.Schema.ID.toString(), SolrQuery.ORDER.asc);
267 for (KeywordQueryFilter filter : filters) {
268 q.addFilterQuery(filter.toString());
272 q.addHighlightField(Server.Schema.TEXT.toString());
275 q.setHighlightSnippets(1);
276 q.setHighlightFragsize(SNIPPET_LENGTH);
279 q.setParam(
"hl.useFastVectorHighlighter",
"on");
280 q.setParam(
"hl.tag.pre",
"«");
281 q.setParam(
"hl.tag.post",
"«");
282 q.setParam(
"hl.fragListBuilder",
"simple");
285 q.setParam(
"hl.fragCharSize", Integer.toString(theQueryStr.length()));
289 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
295 private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SleuthkitCase caseDb)
throws TskException {
300 final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
302 if (KeywordSearchSettings.getShowSnippets()) {
303 List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
305 if (snippetList != null) {
306 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
309 return new KeywordHit(docId, snippet);
326 public static String querySnippet(String query,
long solrObjectId,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
327 return querySnippet(query, solrObjectId, 0, isRegex, group);
345 public static String querySnippet(String query,
long solrObjectId,
int chunkID,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
346 Server solrServer = KeywordSearch.getServer();
348 String highlightField;
350 highlightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
352 highlightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
355 SolrQuery q =
new SolrQuery();
360 StringBuilder sb =
new StringBuilder();
361 sb.append(highlightField).append(
":");
370 queryStr = sb.toString();
374 queryStr = KeywordSearchUtil.quoteQuery(query);
377 q.setQuery(queryStr);
382 contentIDStr = Long.toString(solrObjectId);
384 contentIDStr = Server.getChunkIdString(solrObjectId, chunkID);
387 String idQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
388 q.setShowDebugInfo(DEBUG);
389 q.addFilterQuery(idQuery);
390 q.addHighlightField(highlightField);
393 q.setHighlightSnippets(1);
394 q.setHighlightFragsize(SNIPPET_LENGTH);
397 q.setParam(
"hl.useFastVectorHighlighter",
"on");
398 q.setParam(
"hl.tag.pre",
"«");
399 q.setParam(
"hl.tag.post",
"«");
400 q.setParam(
"hl.fragListBuilder",
"simple");
403 q.setParam(
"hl.fragCharSize", Integer.toString(queryStr.length()));
407 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
410 QueryResponse response = solrServer.query(q, METHOD.POST);
411 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
412 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
413 if (responseHighlightID == null) {
416 List<String> contentHighlights = responseHighlightID.get(highlightField);
417 if (contentHighlights == null) {
421 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
423 }
catch (NoOpenCoreException ex) {
424 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);
426 }
catch (KeywordSearchModuleException ex) {
427 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);
433 public KeywordList getKeywordList() {
444 public int compare(SolrDocument left, SolrDocument right) {
450 String leftID = left.getFieldValue(idName).toString();
453 leftID = leftID.substring(0, index);
457 String rightID = right.getFieldValue(idName).toString();
460 rightID = rightID.substring(0, index);
463 Long leftLong =
new Long(leftID);
464 Long rightLong =
new Long(rightID);
465 return leftLong.compareTo(rightLong);
static final String CHUNK_ID_SEPARATOR
int compare(SolrDocument left, SolrDocument right)