Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
TermsComponentQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2016 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.apache.commons.lang.StringUtils;
33 import org.apache.commons.validator.routines.checkdigit.LuhnCheckDigit;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.response.TermsResponse.Term;
39 import org.sleuthkit.datamodel.AbstractFile;
40 import org.sleuthkit.datamodel.Account;
41 import org.sleuthkit.datamodel.BlackboardArtifact;
42 import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
43 import org.sleuthkit.datamodel.BlackboardAttribute;
44 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
45 import org.sleuthkit.datamodel.TskCoreException;
46 import org.sleuthkit.datamodel.TskData;
47 
54 final class TermsComponentQuery implements KeywordSearchQuery {
55 
56  private static final Logger LOGGER = Logger.getLogger(TermsComponentQuery.class.getName());
57  private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
58  private static final String SEARCH_HANDLER = "/terms"; //NON-NLS
59  private static final String SEARCH_FIELD = Server.Schema.CONTENT_WS.toString();
60  private static final int TERMS_SEARCH_TIMEOUT = 90 * 1000; // Milliseconds
61  private static final String CASE_INSENSITIVE = "case_insensitive"; //NON-NLS
62  private static final boolean DEBUG_FLAG = Version.Type.DEVELOPMENT.equals(Version.getBuildType());
63  private static final int MAX_TERMS_QUERY_RESULTS = 20000;
64  private final KeywordList keywordList;
65  private final Keyword keyword;
66  private String searchTerm;
67  private boolean searchTermIsEscaped;
68  private final List<KeywordQueryFilter> filters = new ArrayList<>(); // THIS APPEARS TO BE UNUSED
69 
70  /*
71  * The following fields are part of the initial implementation of credit
72  * card account search and should be factored into another class when time
73  * permits.
74  */
75  private static final Pattern CREDIT_CARD_NUM_PATTERN = Pattern.compile("(?<ccn>[3456]([ -]?\\d){11,18})"); //12-19 digits, with possible single spaces or dashes in between. First digit is 3,4,5, or 6 //NON-NLS
76  private static final LuhnCheckDigit CREDIT_CARD_NUM_LUHN_CHECK = new LuhnCheckDigit();
77  private static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
78  /*
79  * Track 1 is alphanumeric.
80  *
81  * This regex matches 12-19 digit ccns embeded in a track 1 formated
82  * string. This regex matches (and extracts groups) even if the
83  * entire track is not present as long as the part that is conforms
84  * to the track format.
85  */
86  "(?:" //begin nested optinal group //NON-NLS
87  + "%?" //optional start sentinal: % //NON-NLS
88  + "B)?" //format code //NON-NLS
89  + "(?<accountNumber>[3456]([ -]?\\d){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
90  + "\\^" //separator //NON-NLS
91  + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
92  + "(?:\\^" //separator //NON-NLS
93  + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
94  + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
95  + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
96  + "(?:\\?" // end sentinal: ? //NON-NLS
97  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
98  + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
99  private static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
100  /*
101  * Track 2 is numeric plus six punctuation symbolls :;<=>?
102  *
103  * This regex matches 12-19 digit ccns embeded in a track 2 formated
104  * string. This regex matches (and extracts groups) even if the
105  * entire track is not present as long as the part that is conforms
106  * to the track format.
107  *
108  */
109  "[:;<=>?]?" //(optional)start sentinel //NON-NLS
110  + "(?<accountNumber>[3456]([ -]?\\d){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
111  + "(?:[:;<=>?]" //separator //NON-NLS
112  + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
113  + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
114  + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
115  + "(?:[:;<=>?]" //end sentinel //NON-NLS
116  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
117  + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
118  private static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
119 
132  // TODO: Why is both the list and the keyword added to the state of this
133  // object?
134  // TODO: Why is the search term not escaped and given substring wildcards,
135  // if needed, here in the constructor?
136  TermsComponentQuery(KeywordList keywordList, Keyword keyword) {
137  this.keywordList = keywordList;
138  this.keyword = keyword;
139  this.searchTerm = keyword.getSearchTerm();
140  }
141 
148  @Override
149  public KeywordList getKeywordList() {
150  return keywordList;
151  }
152 
159  @Override
160  public String getQueryString() {
161  return keyword.getSearchTerm();
162  }
163 
171  @Override
172  public boolean isLiteral() {
173  return false;
174  }
175 
180  @Override
181  public void setSubstringQuery() {
182  searchTerm = ".*" + searchTerm + ".*";
183  }
184 
188  @Override
189  public void escape() {
190  searchTerm = Pattern.quote(keyword.getSearchTerm());
191  searchTermIsEscaped = true;
192  }
193 
199  @Override
200  public boolean isEscaped() {
201  return searchTermIsEscaped;
202  }
203 
210  @Override
211  public String getEscapedQueryString() {
212  return this.searchTerm;
213  }
214 
220  @Override
221  public boolean validate() {
222  if (searchTerm.isEmpty()) {
223  return false;
224  }
225  try {
226  Pattern.compile(searchTerm);
227  return true;
228  } catch (IllegalArgumentException ex) {
229  return false;
230  }
231  }
232 
239  @Override
240  public void setField(String field) {
241  }
242 
248  // TODO: Document this better.
249  @Override
250  public void addFilter(KeywordQueryFilter filter) {
251  this.filters.add(filter);
252  }
253 
264  @Override
265  public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
266  /*
267  * Do a query using the Solr terms component to find any terms in the
268  * index that match the regex.
269  */
270  final SolrQuery termsQuery = new SolrQuery();
271  termsQuery.setRequestHandler(SEARCH_HANDLER);
272  termsQuery.setTerms(true);
273  termsQuery.setTermsRegexFlag(CASE_INSENSITIVE);
274  termsQuery.setTermsRegex(searchTerm);
275  termsQuery.addTermsField(SEARCH_FIELD);
276  termsQuery.setTimeAllowed(TERMS_SEARCH_TIMEOUT);
277  termsQuery.setShowDebugInfo(DEBUG_FLAG);
278  termsQuery.setTermsLimit(MAX_TERMS_QUERY_RESULTS);
279  List<Term> terms = KeywordSearch.getServer().queryTerms(termsQuery).getTerms(SEARCH_FIELD);
280  /*
281  * Do a term query for each term that matched the regex.
282  */
283  QueryResults results = new QueryResults(this, keywordList);
284  for (Term term : terms) {
285  /*
286  * If searching for credit card account numbers, do a Luhn check on
287  * the term and discard it if it does not pass.
288  */
289  if (keyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
290  Matcher matcher = CREDIT_CARD_NUM_PATTERN.matcher(term.getTerm());
291  matcher.find();
292  final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn"));
293  if (false == CREDIT_CARD_NUM_LUHN_CHECK.isValid(ccn)) {
294  continue;
295  }
296  }
297 
298  /*
299  * Do an ordinary query with the escaped term and convert the query
300  * results into a single list of keyword hits without duplicates.
301  *
302  * Note that the filters field appears to be unused. There is an old
303  * comment here, what does it mean? "Note: we can't set filter query
304  * on terms query but setting filter query on fileResults query will
305  * yield the same result." The filter is NOT being added to the term
306  * query.
307  */
308  String escapedTerm = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
309  LuceneQuery termQuery = new LuceneQuery(keywordList, new Keyword(escapedTerm, true));
310  filters.forEach(termQuery::addFilter); // This appears to be unused
311  QueryResults termQueryResult = termQuery.performQuery();
312  Set<KeywordHit> termHits = new HashSet<>();
313  for (Keyword word : termQueryResult.getKeywords()) {
314  termHits.addAll(termQueryResult.getResults(word));
315  }
316  results.addResult(new Keyword(term.getTerm(), false), new ArrayList<>(termHits));
317  }
318  return results;
319  }
320 
335  // TODO: Are we actually making meaningful use of the KeywordCachedArtifact
336  // class?
337  @Override
338  public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String searchTerm, KeywordHit hit, String snippet, String listName) {
339  /*
340  * Create either a "plain vanilla" keyword hit artifact with keyword and
341  * regex attributes, or a credit card account artifact with attributes
342  * parsed from from the snippet for the hit and looked up based on the
343  * parsed bank identifcation number.
344  */
345  BlackboardArtifact newArtifact;
346  Collection<BlackboardAttribute> attributes = new ArrayList<>();
347  if (keyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
348  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, searchTerm));
349  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, keyword.getSearchTerm()));
350  try {
351  newArtifact = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
352 
353  } catch (TskCoreException ex) {
354  LOGGER.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", ex); //NON-NLS
355  return null;
356  }
357  } else {
358  /*
359  * Parse the credit card account attributes from the snippet for the
360  * hit.
361  */
362  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE, MODULE_NAME, Account.Type.CREDIT_CARD.name()));
363  Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
364  Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
365  if (matcher.find()) {
366  parseTrack1Data(parsedTrackAttributeMap, matcher);
367  }
368  matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
369  if (matcher.find()) {
370  parseTrack2Data(parsedTrackAttributeMap, matcher);
371  }
372  final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
373  if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
374  if (hit.isArtifactHit()) {
375  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifact().getArtifactID())); //NON-NLS
376  } else {
377  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getContent().getId())); //NON-NLS
378  }
379  return null;
380  }
381  attributes.addAll(parsedTrackAttributeMap.values());
382 
383  /*
384  * Look up the bank name, scheme, etc. attributes for the bank
385  * indentification number (BIN).
386  */
387  final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
388  CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
389  if (binInfo != null) {
390  binInfo.getScheme().ifPresent(scheme
391  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
392  binInfo.getCardType().ifPresent(cardType
393  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
394  binInfo.getBrand().ifPresent(brand
395  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
396  binInfo.getBankName().ifPresent(bankName
397  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
398  binInfo.getBankPhoneNumber().ifPresent(phoneNumber
399  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
400  binInfo.getBankURL().ifPresent(url
401  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
402  binInfo.getCountry().ifPresent(country
403  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
404  binInfo.getBankCity().ifPresent(city
405  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
406  }
407 
408  /*
409  * If the hit is from unused or unallocated space, record the Solr
410  * document id to support showing just the chunk that contained the
411  * hit.
412  */
413  if (hit.getContent() instanceof AbstractFile) {
414  AbstractFile file = (AbstractFile) hit.getContent();
415  if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
416  || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
417  attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
418  }
419  }
420 
421  /*
422  * Create an account artifact.
423  */
424  try {
425  newArtifact = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_ACCOUNT);
426  } catch (TskCoreException ex) {
427  LOGGER.log(Level.SEVERE, "Error adding artifact for account to blackboard", ex); //NON-NLS
428  return null;
429  }
430  }
431 
432  if (StringUtils.isNotBlank(listName)) {
433  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
434  }
435  if (snippet != null) {
436  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
437  }
438  if (hit.isArtifactHit()) {
439  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
440  }
441 
442  try {
443  newArtifact.addAttributes(attributes);
444  KeywordCachedArtifact writeResult = new KeywordCachedArtifact(newArtifact);
445  writeResult.add(attributes);
446  return writeResult;
447  } catch (TskCoreException e) {
448  LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
449  return null;
450  }
451  }
452 
461  static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
462  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
463  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
464  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
465  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
466  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
467  }
468 
478  static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
479  parseTrack2Data(attributeMap, matcher);
480  addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
481  }
482 
494  static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
495  BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
496  attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
497  String value = matcher.group(groupName);
498  if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
499  value = CharMatcher.anyOf(" -").removeFrom(value);
500  }
501  if (StringUtils.isNotBlank(value)) {
502  return new BlackboardAttribute(attrType, MODULE_NAME, value);
503  }
504  return null;
505  });
506  }
507 
508 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.