20 package org.sleuthkit.autopsy.keywordsearch;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashSet;
25 import java.util.List;
27 import java.util.logging.Level;
29 import java.util.regex.Pattern;
30 import java.util.regex.PatternSyntaxException;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.response.TermsResponse;
33 import org.apache.solr.client.solrj.response.TermsResponse.Term;
37 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
39 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
45 class TermComponentQuery
implements KeywordSearchQuery {
47 private static final int TERMS_UNLIMITED = -1;
49 private static final String TERMS_SEARCH_FIELD = Server.Schema.CONTENT_WS.toString();
50 private static final String TERMS_HANDLER =
"/terms";
51 private static final int TERMS_TIMEOUT = 90 * 1000;
52 private static final Logger logger = Logger.getLogger(TermComponentQuery.class.getName());
53 private String queryEscaped;
54 private final KeywordList keywordList;
55 private final Keyword keyword;
56 private boolean isEscaped;
57 private List<Term> terms;
58 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
60 private static final int MAX_TERMS_RESULTS = 20000;
62 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
64 public TermComponentQuery(KeywordList keywordList, Keyword keyword) {
66 this.keyword = keyword;
67 this.keywordList = keywordList;
68 this.queryEscaped = keyword.getQuery();
74 public void addFilter(KeywordQueryFilter filter) {
75 this.filters.add(filter);
79 public void setField(String field) {
84 public void setSubstringQuery() {
85 queryEscaped =
".*" + queryEscaped +
".*";
89 public void escape() {
90 queryEscaped = Pattern.quote(keyword.getQuery());
95 public boolean validate() {
96 if (queryEscaped.equals(
"")) {
100 boolean valid =
true;
102 Pattern.compile(queryEscaped);
103 }
catch (PatternSyntaxException ex1) {
105 }
catch (IllegalArgumentException ex2) {
112 public boolean isEscaped() {
117 public boolean isLiteral() {
124 protected SolrQuery createQuery() {
125 final SolrQuery q =
new SolrQuery();
126 q.setRequestHandler(TERMS_HANDLER);
128 q.setTermsLimit(TERMS_UNLIMITED);
129 q.setTermsRegexFlag(
"case_insensitive");
133 q.setTermsRegex(queryEscaped);
134 q.addTermsField(TERMS_SEARCH_FIELD);
135 q.setTimeAllowed(TERMS_TIMEOUT);
144 protected List<Term> executeQuery(SolrQuery q)
throws NoOpenCoreException {
146 Server solrServer = KeywordSearch.getServer();
147 TermsResponse tr = solrServer.queryTerms(q);
148 List<Term> termsCol = tr.getTerms(TERMS_SEARCH_FIELD);
150 }
catch (KeywordSearchModuleException ex) {
151 logger.log(Level.WARNING,
"Error executing the regex terms query: " + keyword.getQuery(), ex);
157 public String getEscapedQueryString() {
158 return this.queryEscaped;
162 public String getQueryString() {
163 return keyword.getQuery();
167 public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String termHit, KeywordHit hit, String snippet, String listName) {
168 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
171 BlackboardArtifact bba;
172 KeywordCachedArtifact writeResult;
173 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
175 bba = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
176 writeResult =
new KeywordCachedArtifact(bba);
177 }
catch (Exception e) {
178 logger.log(Level.WARNING,
"Error adding bb artifact for keyword hit", e);
183 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, termHit));
185 if ((listName != null) && (listName.equals(
"") ==
false)) {
186 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
190 if (snippet != null) {
191 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
194 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, keyword.getQuery()));
196 if (hit.isArtifactHit()) {
197 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
201 bba.addAttributes(attributes);
202 writeResult.add(attributes);
204 }
catch (TskException e) {
205 logger.log(Level.WARNING,
"Error adding bb attributes for terms search artifact", e);
212 public QueryResults performQuery() throws NoOpenCoreException {
214 final SolrQuery q = createQuery();
215 q.setShowDebugInfo(DEBUG);
216 q.setTermsLimit(MAX_TERMS_RESULTS);
217 logger.log(Level.INFO,
"Query: {0}", q.toString());
218 terms = executeQuery(q);
220 QueryResults results =
new QueryResults(
this, keywordList);
223 for (Term term : terms) {
224 final String termStr = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
226 LuceneQuery filesQuery =
new LuceneQuery(keywordList,
new Keyword(termStr,
true));
229 for (KeywordQueryFilter filter : filters) {
233 filesQuery.addFilter(filter);
236 QueryResults subResults = filesQuery.performQuery();
237 Set<KeywordHit> filesResults =
new HashSet<>();
238 for (Keyword key : subResults.getKeywords()) {
239 List<KeywordHit> keyRes = subResults.getResults(key);
240 resultSize += keyRes.size();
241 filesResults.addAll(keyRes);
243 results.addResult(
new Keyword(term.getTerm(),
false),
new ArrayList<>(filesResults));
244 }
catch (NoOpenCoreException e) {
245 logger.log(Level.WARNING,
"Error executing Solr query,", e);
247 }
catch (RuntimeException e) {
248 logger.log(Level.WARNING,
"Error executing Solr query,", e);
254 logger.log(Level.INFO,
"Regex # results: {0}", resultSize);
260 public KeywordList getKeywordList() {