Autopsy  4.5.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
TermsComponentQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.apache.commons.lang.StringUtils;
33 import org.apache.solr.client.solrj.SolrQuery;
34 import org.apache.solr.client.solrj.response.TermsResponse.Term;
35 import org.openide.util.Exceptions;
40 import org.sleuthkit.datamodel.AbstractFile;
41 import org.sleuthkit.datamodel.Account;
42 import org.sleuthkit.datamodel.AccountFileInstance;
43 import org.sleuthkit.datamodel.BlackboardArtifact;
44 import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
45 import org.sleuthkit.datamodel.BlackboardAttribute;
46 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
47 import org.sleuthkit.datamodel.Content;
48 import org.sleuthkit.datamodel.TskCoreException;
49 import org.sleuthkit.datamodel.TskData;
50 
57 final class TermsComponentQuery implements KeywordSearchQuery {
58 
59  private static final Logger LOGGER = Logger.getLogger(TermsComponentQuery.class.getName());
60  private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
61  private static final String SEARCH_HANDLER = "/terms"; //NON-NLS
62  private static final String SEARCH_FIELD = Server.Schema.TEXT.toString();
63  private static final int TERMS_SEARCH_TIMEOUT = 90 * 1000; // Milliseconds
64  private static final String CASE_INSENSITIVE = "case_insensitive"; //NON-NLS
65  private static final boolean DEBUG_FLAG = Version.Type.DEVELOPMENT.equals(Version.getBuildType());
66  private static final int MAX_TERMS_QUERY_RESULTS = 20000;
67 
68  private final KeywordList keywordList;
69  private final Keyword originalKeyword;
70  private final List<KeywordQueryFilter> filters = new ArrayList<>(); // THIS APPEARS TO BE UNUSED
71 
72  private String searchTerm;
73  private boolean searchTermIsEscaped;
74 
75  /*
76  * The following fields are part of the initial implementation of credit
77  * card account search and should be factored into another class when time
78  * permits.
79  */
85  static final Pattern CREDIT_CARD_NUM_PATTERN
86  = Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
87  static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
88  /*
89  * Track 1 is alphanumeric.
90  *
91  * This regex matches 12-19 digit ccns embeded in a track 1 formated
92  * string. This regex matches (and extracts groups) even if the
93  * entire track is not present as long as the part that is conforms
94  * to the track format.
95  */
96  "(?:" //begin nested optinal group //NON-NLS
97  + "%?" //optional start sentinal: % //NON-NLS
98  + "B)?" //format code //NON-NLS
99  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
100  + "\\^" //separator //NON-NLS
101  + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
102  + "(?:\\^" //separator //NON-NLS
103  + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
104  + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
105  + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
106  + "(?:\\?" // end sentinal: ? //NON-NLS
107  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
108  + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
109  static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
110  /*
111  * Track 2 is numeric plus six punctuation symbolls :;<=>?
112  *
113  * This regex matches 12-19 digit ccns embeded in a track 2 formated
114  * string. This regex matches (and extracts groups) even if the
115  * entire track is not present as long as the part that is conforms
116  * to the track format.
117  *
118  */
119  "[:;<=>?]?" //(optional)start sentinel //NON-NLS
120  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
121  + "(?:[:;<=>?]" //separator //NON-NLS
122  + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
123  + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
124  + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
125  + "(?:[:;<=>?]" //end sentinel //NON-NLS
126  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
127  + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
128  static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
129 
142  // TODO: Why is both the list and the keyword added to the state of this
143  // object?
144  // TODO: Why is the search term not escaped and given substring wildcards,
145  // if needed, here in the constructor?
146  TermsComponentQuery(KeywordList keywordList, Keyword keyword) {
147  this.keywordList = keywordList;
148  this.originalKeyword = keyword;
149  this.searchTerm = keyword.getSearchTerm();
150  }
151 
158  @Override
159  public KeywordList getKeywordList() {
160  return keywordList;
161  }
162 
169  @Override
170  public String getQueryString() {
171  return originalKeyword.getSearchTerm();
172  }
173 
181  @Override
182  public boolean isLiteral() {
183  return false;
184  }
185 
190  @Override
191  public void setSubstringQuery() {
192  searchTerm = ".*" + searchTerm + ".*";
193  }
194 
198  @Override
199  public void escape() {
200  searchTerm = Pattern.quote(originalKeyword.getSearchTerm());
201  searchTermIsEscaped = true;
202  }
203 
209  @Override
210  public boolean isEscaped() {
211  return searchTermIsEscaped;
212  }
213 
220  @Override
221  public String getEscapedQueryString() {
222  return this.searchTerm;
223  }
224 
230  @Override
231  public boolean validate() {
232  if (searchTerm.isEmpty()) {
233  return false;
234  }
235  try {
236  Pattern.compile(searchTerm);
237  return true;
238  } catch (IllegalArgumentException ex) {
239  return false;
240  }
241  }
242 
249  @Override
250  public void setField(String field) {
251  }
252 
258  // TODO: Document this better.
259  @Override
260  public void addFilter(KeywordQueryFilter filter) {
261  this.filters.add(filter);
262  }
263 
274  @Override
275  public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
276  /*
277  * Do a query using the Solr terms component to find any terms in the
278  * index that match the regex.
279  */
280  final SolrQuery termsQuery = new SolrQuery();
281  termsQuery.setRequestHandler(SEARCH_HANDLER);
282  termsQuery.setTerms(true);
283  termsQuery.setTermsRegexFlag(CASE_INSENSITIVE);
284  termsQuery.setTermsRegex(searchTerm);
285  termsQuery.addTermsField(SEARCH_FIELD);
286  termsQuery.setTimeAllowed(TERMS_SEARCH_TIMEOUT);
287  termsQuery.setShowDebugInfo(DEBUG_FLAG);
288  termsQuery.setTermsLimit(MAX_TERMS_QUERY_RESULTS);
289  List<Term> terms = KeywordSearch.getServer().queryTerms(termsQuery).getTerms(SEARCH_FIELD);
290  /*
291  * Do a term query for each term that matched the regex.
292  */
293  QueryResults results = new QueryResults(this);
294  for (Term term : terms) {
295  /*
296  * If searching for credit card account numbers, do a Luhn check on
297  * the term and discard it if it does not pass.
298  */
299  if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
300  Matcher matcher = CREDIT_CARD_NUM_PATTERN.matcher(term.getTerm());
301  if (false == matcher.find()
302  || false == CreditCardValidator.isValidCCN(matcher.group("ccn"))) {
303  continue;
304  }
305  }
306 
307  /*
308  * Do an ordinary query with the escaped term and convert the query
309  * results into a single list of keyword hits without duplicates.
310  *
311  * Note that the filters field appears to be unused. There is an old
312  * comment here, what does it mean? "Note: we can't set filter query
313  * on terms query but setting filter query on fileResults query will
314  * yield the same result." The filter is NOT being added to the term
315  * query.
316  */
317  String escapedTerm = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
318  LuceneQuery termQuery = new LuceneQuery(keywordList, new Keyword(escapedTerm, true, true));
319  filters.forEach(termQuery::addFilter); // This appears to be unused
320  QueryResults termQueryResult = termQuery.performQuery();
321  Set<KeywordHit> termHits = new HashSet<>();
322  for (Keyword word : termQueryResult.getKeywords()) {
323  termHits.addAll(termQueryResult.getResults(word));
324  }
325  results.addResult(new Keyword(term.getTerm(), false, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), new ArrayList<>(termHits));
326  }
327  return results;
328  }
329 
346  @Override
347  public BlackboardArtifact postKeywordHitToBlackboard(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
348 
349  /*
350  * CCN hits are handled specially
351  */
352  if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
353  createCCNAccount(content, hit, snippet, listName);
354  return null;
355  }
356 
357  /*
358  * Create a "plain vanilla" keyword hit artifact with keyword and regex
359  * attributes,
360  */
361  BlackboardArtifact newArtifact;
362  Collection<BlackboardAttribute> attributes = new ArrayList<>();
363 
364  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
365  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKeyword.getSearchTerm()));
366 
367  try {
368  newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
369 
370  } catch (TskCoreException ex) {
371  LOGGER.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", ex); //NON-NLS
372  return null;
373  }
374 
375  if (StringUtils.isNotBlank(listName)) {
376  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
377  }
378  if (snippet != null) {
379  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
380  }
381 
382  hit.getArtifactID().ifPresent(
383  artifactID -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
384  );
385 
386  // TermsComponentQuery is now being used exclusively for substring searches.
387  attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
388 
389  try {
390  newArtifact.addAttributes(attributes);
391  return newArtifact;
392  } catch (TskCoreException e) {
393  LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
394  return null;
395  }
396  }
397 
398  private void createCCNAccount(Content content, KeywordHit hit, String snippet, String listName) {
399 
400  if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
401  LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS
402  return;
403  }
404 
405  /*
406  * Create a credit card account with attributes parsed from from the
407  * snippet for the hit and looked up based on the parsed bank
408  * identifcation number.
409  */
410  Collection<BlackboardAttribute> attributes = new ArrayList<>();
411 
412  Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
413  Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
414  if (matcher.find()) {
415  parseTrack1Data(parsedTrackAttributeMap, matcher);
416  }
417  matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
418  if (matcher.find()) {
419  parseTrack2Data(parsedTrackAttributeMap, matcher);
420  }
421  final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
422  if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
423  if (hit.isArtifactHit()) {
424  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
425  } else {
426  long contentId = 0;
427  try {
428  contentId = hit.getContentID();
429  } catch (TskCoreException ex) {
430  LOGGER.log(Level.SEVERE, String.format("Failed to content id from keyword hit: term = %s, snippet = '%s'", searchTerm, hit.getSnippet()), ex); //NON-NLS
431  }
432  if (contentId > 0) {
433  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), contentId)); //NON-NLS
434  } else {
435  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s'", searchTerm, hit.getSnippet())); //NON-NLS
436  }
437  }
438  return;
439  }
440  attributes.addAll(parsedTrackAttributeMap.values());
441 
442  /*
443  * Look up the bank name, scheme, etc. attributes for the bank
444  * indentification number (BIN).
445  */
446  final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
447  CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
448  if (binInfo != null) {
449  binInfo.getScheme().ifPresent(scheme
450  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
451  binInfo.getCardType().ifPresent(cardType
452  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
453  binInfo.getBrand().ifPresent(brand
454  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
455  binInfo.getBankName().ifPresent(bankName
456  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
457  binInfo.getBankPhoneNumber().ifPresent(phoneNumber
458  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
459  binInfo.getBankURL().ifPresent(url
460  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
461  binInfo.getCountry().ifPresent(country
462  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
463  binInfo.getBankCity().ifPresent(city
464  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
465  }
466 
467  /*
468  * If the hit is from unused or unallocated space, record the Solr
469  * document id to support showing just the chunk that contained the hit.
470  */
471  if (content instanceof AbstractFile) {
472  AbstractFile file = (AbstractFile) content;
473  if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
474  || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
475  attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
476  }
477  }
478 
479  if (StringUtils.isNotBlank(listName)) {
480  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
481  }
482  if (snippet != null) {
483  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
484  }
485 
486  hit.getArtifactID().ifPresent(
487  artifactID -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
488  );
489 
490  // TermsComponentQuery is now being used exclusively for substring searches.
491  attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
492 
493  /*
494  * Create an account.
495  */
496  try {
497  AccountFileInstance ccAccountInstance = Case.getCurrentCase().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD, ccnAttribute.getValueString(), MODULE_NAME, content);
498  ccAccountInstance.addAttributes(attributes);
499  //newArtifact = Case.getCurrentCase().getSleuthkitCase().getBlackboardArtifact(ccAccountInstance.getArtifactId());
500  } catch (TskCoreException ex) {
501  LOGGER.log(Level.SEVERE, "Error creating CCN account instance", ex); //NON-NLS
502  }
503 
504  }
505 
514  static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
515  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
516  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
517  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
518  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
519  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
520  }
521 
531  static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
532  parseTrack2Data(attributeMap, matcher);
533  addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
534  }
535 
547  static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
548  BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
549  attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
550  String value = matcher.group(groupName);
551  if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
552  value = CharMatcher.anyOf(" -").removeFrom(value);
553  }
554  if (StringUtils.isNotBlank(value)) {
555  return new BlackboardAttribute(attrType, MODULE_NAME, value);
556  }
557  return null;
558  });
559  }
560 
561 }

Copyright © 2012-2016 Basis Technology. Generated on: Tue Feb 20 2018
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.