19 package org.sleuthkit.autopsy.keywordsearch;
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.List;
27 import java.util.logging.Level;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 import org.apache.commons.lang3.StringUtils;
31 import org.apache.commons.validator.routines.DomainValidator;
32 import org.apache.solr.client.solrj.SolrQuery;
33 import org.apache.solr.client.solrj.SolrQuery.SortClause;
34 import org.apache.solr.client.solrj.SolrRequest;
35 import org.apache.solr.client.solrj.response.QueryResponse;
36 import org.apache.solr.common.SolrDocument;
37 import org.apache.solr.common.SolrDocumentList;
38 import org.apache.solr.common.params.CursorMarkParams;
39 import org.openide.util.NbBundle;
50 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
52 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
71 final class RegexQuery
implements KeywordSearchQuery {
73 public static final Logger LOGGER = Logger.getLogger(RegexQuery.class.getName());
85 private static final CharSequence[] UNSUPPORTED_CHARS = {
"\\d",
"\\D",
"\\w",
"\\W",
"\\s",
"\\S",
"\\n",
86 "\\t",
"\\r",
"\\f",
"\\a",
"\\e",
"\\v",
"\\V",
"\\h",
"\\H",
"\\p"};
88 private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
89 private static final int MIN_EMAIL_ADDR_LENGTH = 8;
91 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
92 private final KeywordList keywordList;
93 private final Keyword originalKeyword;
94 private final String keywordString;
95 private final boolean queryStringContainsWildcardPrefix;
96 private final boolean queryStringContainsWildcardSuffix;
98 private boolean escaped;
99 private String escapedQuery;
100 private String field = Server.Schema.CONTENT_STR.toString();
108 RegexQuery(KeywordList keywordList, Keyword keyword) {
109 this.keywordList = keywordList;
110 this.originalKeyword = keyword;
111 this.keywordString = keyword.getSearchTerm();
113 this.queryStringContainsWildcardPrefix = this.keywordString.startsWith(
".*");
114 this.queryStringContainsWildcardSuffix = this.keywordString.endsWith(
".*");
118 public KeywordList getKeywordList() {
123 public boolean validate() {
124 if (keywordString.isEmpty()) {
129 Pattern.compile(keywordString, Pattern.UNICODE_CHARACTER_CLASS);
135 for (CharSequence c : UNSUPPORTED_CHARS) {
136 if (keywordString.contains(c)) {
141 }
catch (IllegalArgumentException ex) {
147 public QueryResults performQuery() throws NoOpenCoreException {
149 final Server solrServer = KeywordSearch.getServer();
150 SolrQuery solrQuery =
new SolrQuery();
169 solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) +
":/"
170 + (queryStringContainsWildcardPrefix ?
"" :
".*") + getQueryString()
171 + (queryStringContainsWildcardSuffix ?
"" :
".*") +
"/");
174 solrQuery.setFields(Server.Schema.CONTENT_STR.toString(), Server.Schema.ID.toString(), Server.Schema.CHUNK_SIZE.toString());
177 .map(KeywordQueryFilter::toString)
178 .forEach(solrQuery::addFilterQuery);
180 solrQuery.setRows(MAX_RESULTS_PER_CURSOR_MARK);
182 solrQuery.setSort(SortClause.asc(Server.Schema.ID.toString()));
184 String cursorMark = CursorMarkParams.CURSOR_MARK_START;
185 SolrDocumentList resultList;
186 boolean allResultsProcessed =
false;
187 QueryResults results =
new QueryResults(
this);
189 while (!allResultsProcessed) {
191 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
192 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
193 resultList = response.getResults();
195 for (SolrDocument resultDoc : resultList) {
197 List<KeywordHit> keywordHits = createKeywordHits(resultDoc);
198 for (KeywordHit hit : keywordHits) {
199 Keyword keywordInstance =
new Keyword(hit.getHit(),
true,
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm());
200 List<KeywordHit> hitsForKeyword = results.getResults(keywordInstance);
201 if (hitsForKeyword == null) {
202 hitsForKeyword =
new ArrayList<>();
203 results.addResult(keywordInstance, hitsForKeyword);
205 hitsForKeyword.add(hit);
207 }
catch (TskCoreException ex) {
208 LOGGER.log(Level.SEVERE,
"Error creating keyword hits", ex);
212 String nextCursorMark = response.getNextCursorMark();
213 if (cursorMark.equals(nextCursorMark)) {
214 allResultsProcessed =
true;
216 cursorMark = nextCursorMark;
217 }
catch (KeywordSearchModuleException ex) {
218 LOGGER.log(Level.SEVERE,
"Error executing Regex Solr Query: " + keywordString, ex);
219 MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class,
"Server.query.exception.msg", keywordString), ex.getCause().getMessage());
226 private List<KeywordHit> createKeywordHits(SolrDocument solrDoc)
throws TskCoreException {
228 List<KeywordHit> hits =
new ArrayList<>();
229 final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
230 final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
232 final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
234 final Pattern pattern = Pattern.compile(keywordString);
236 for (Object content_obj : content_str) {
237 String content = (String) content_obj;
238 Matcher hitMatcher = pattern.matcher(content);
241 while (hitMatcher.find(offset)) {
242 StringBuilder snippet =
new StringBuilder();
247 if (chunkSize != null && hitMatcher.start() >= chunkSize) {
251 String hit = hitMatcher.group();
253 offset = hitMatcher.end();
254 final ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType();
262 if (!queryStringContainsWildcardSuffix
263 && (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER
264 || artifactAttributeType == ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) {
265 if (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) {
267 hit = hit.replaceAll(
"^[^0-9\\(]",
"");
270 hit = hit.replaceAll(
"^[^0-9]",
"");
273 hit = hit.replaceAll(
"[^0-9]$",
"");
276 if (artifactAttributeType == null) {
277 addHit(content, snippet, hitMatcher, hit, hits, docId);
279 switch (artifactAttributeType) {
286 if (hit.length() >= MIN_EMAIL_ADDR_LENGTH
287 && DomainValidator.getInstance(
true).isValidTld(hit.substring(hit.lastIndexOf(
'.')))) {
288 addHit(content, snippet, hitMatcher, hit, hits, docId);
292 case TSK_CARD_NUMBER:
298 Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit);
300 for (
int rLength = hit.length(); rLength >= 12; rLength--) {
301 ccnMatcher.region(0, rLength);
302 if (ccnMatcher.find()) {
303 final String group = ccnMatcher.group(
"ccn");
304 if (CreditCardValidator.isValidCCN(group)) {
305 addHit(content, snippet, hitMatcher, hit, hits, docId);
312 addHit(content, snippet, hitMatcher, hit, hits, docId);
319 }
catch (TskCoreException ex) {
321 }
catch (Throwable error) {
330 throw new TskCoreException(
"Failed to create keyword hits for Solr document id " + docId +
" due to " + error.getMessage());
335 private void addHit(String content, StringBuilder snippet, Matcher hitMatcher, String hit, List<KeywordHit> hits,
final String docId)
throws TskCoreException {
340 int maxIndex = content.length() - 1;
341 snippet.append(content.substring(Integer.max(0, hitMatcher.start() - 20), Integer.max(0, hitMatcher.start())));
342 snippet.appendCodePoint(171);
344 snippet.appendCodePoint(171);
345 snippet.append(content.substring(Integer.min(maxIndex, hitMatcher.end()), Integer.min(maxIndex, hitMatcher.end() + 20)));
347 hits.add(
new KeywordHit(docId, snippet.toString(), hit));
351 public void addFilter(KeywordQueryFilter filter) {
352 this.filters.add(filter);
356 public void setField(String field) {
361 public void setSubstringQuery() {
365 synchronized public void escape() {
366 if (isEscaped() ==
false) {
367 escapedQuery = KeywordSearchUtil.escapeLuceneQuery(keywordString);
373 synchronized public boolean isEscaped() {
378 public boolean isLiteral() {
383 public String getQueryString() {
384 return originalKeyword.getSearchTerm();
388 synchronized public String getEscapedQueryString() {
389 if (
false == isEscaped()) {
396 public BlackboardArtifact writeSingleFileHitsToBlackBoard(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
397 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
399 if (content == null) {
400 LOGGER.log(Level.WARNING,
"Error adding artifact for keyword hit to blackboard");
410 BlackboardArtifact newArtifact;
411 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
412 if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
413 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
414 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString()));
416 newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
417 }
catch (TskCoreException ex) {
418 LOGGER.log(Level.SEVERE,
"Error adding artifact for keyword hit to blackboard", ex);
426 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE, MODULE_NAME, Account.Type.CREDIT_CARD.name()));
427 Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap =
new HashMap<>();
428 Matcher matcher = TermsComponentQuery.CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
429 if (matcher.find()) {
430 parseTrack1Data(parsedTrackAttributeMap, matcher);
432 matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
433 if (matcher.find()) {
434 parseTrack2Data(parsedTrackAttributeMap, matcher);
436 final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(
new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
437 if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
438 if (hit.isArtifactHit()) {
439 LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get()));
441 LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID()));
445 attributes.addAll(parsedTrackAttributeMap.values());
451 final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
452 CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
453 if (binInfo != null) {
454 binInfo.getScheme().ifPresent(scheme
455 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
456 binInfo.getCardType().ifPresent(cardType
457 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
458 binInfo.getBrand().ifPresent(brand
459 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
460 binInfo.getBankName().ifPresent(bankName
461 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
462 binInfo.getBankPhoneNumber().ifPresent(phoneNumber
463 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
464 binInfo.getBankURL().ifPresent(url
465 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
466 binInfo.getCountry().ifPresent(country
467 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
468 binInfo.getBankCity().ifPresent(city
469 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
477 if (content instanceof AbstractFile) {
478 AbstractFile file = (AbstractFile) content;
479 if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
480 || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
481 attributes.add(
new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
489 newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_ACCOUNT);
490 }
catch (TskCoreException ex) {
491 LOGGER.log(Level.SEVERE,
"Error adding artifact for account to blackboard", ex);
496 if (StringUtils.isNotBlank(listName)) {
497 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
499 if (snippet != null) {
500 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
503 hit.getArtifactID().ifPresent(artifactID
504 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
507 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
510 newArtifact.addAttributes(attributes);
512 }
catch (TskCoreException e) {
513 LOGGER.log(Level.SEVERE,
"Error adding bb attributes for terms search artifact", e);
526 static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
527 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER,
"accountNumber", matcher);
528 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION,
"expiration", matcher);
529 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE,
"serviceCode", matcher);
530 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY,
"discretionary", matcher);
531 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC,
"LRC", matcher);
543 static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
544 parseTrack2Data(attributeMap, matcher);
545 addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON,
"name", matcher);
560 static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
561 BlackboardAttribute.Type type =
new BlackboardAttribute.Type(attrType);
562 attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
563 String value = matcher.group(groupName);
564 if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
565 attributeMap.put(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD),
566 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, value));
567 value = CharMatcher.anyOf(
" -").removeFrom(value);
569 if (StringUtils.isNotBlank(value)) {
570 return new BlackboardAttribute(attrType, MODULE_NAME, value);