Autopsy  4.4
Graphical digital forensics platform for The Sleuth Kit and other tools.
AccountsText.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.collect.Iterators;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.Optional;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import javax.annotation.concurrent.GuardedBy;
33 import org.apache.commons.lang3.StringUtils;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.openide.util.NbBundle;
40 import org.sleuthkit.datamodel.BlackboardArtifact;
41 import org.sleuthkit.datamodel.BlackboardAttribute;
42 import org.sleuthkit.datamodel.TskCoreException;
43 
53 class AccountsText implements IndexedText {
54 
55  private static final Logger logger = Logger.getLogger(AccountsText.class.getName());
56  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
57 
58  private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
59  private static final String ANCHOR_NAME_PREFIX = AccountsText.class.getName() + "_";
60 
61  private static final String INSERT_PREFIX = "<a name='" + ANCHOR_NAME_PREFIX; //NON-NLS
62  private static final String INSERT_POSTFIX = "'></a>$0"; //$0 will insert current regex match //NON-NLS
63  private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
64 
65  private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
66  private static final BlackboardAttribute.Type TSK_CARD_NUMBER = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER);
67  private static final BlackboardAttribute.Type TSK_KEYWORD = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
68 
69  private static final String FIELD = Server.Schema.CONTENT_STR.toString();
70 
71  private final Server solrServer = KeywordSearch.getServer();
72 
73  private final long solrObjectId;
74  private final Collection<? extends BlackboardArtifact> artifacts;
75  private final Set<String> accountNumbers = new HashSet<>();
76  private final String displayName;
77 
78  @GuardedBy("this")
79  private boolean isPageInfoLoaded = false;
80  private int numberPagesForFile = 0;
81  private Integer currentPage = 0;
82 
83  /*
84  * map from page/chunk to number of hits. value is 0 if not yet known.
85  */
86  private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
87  /*
88  * set of pages, used for iterating back and forth. Only stores pages with
89  * hits
90  */
91  private final Set<Integer> pages = numberOfHitsPerPage.keySet();
92  /*
93  * map from page/chunk number to current hit on that page.
94  */
95  private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
96 
97  AccountsText(long objectID, BlackboardArtifact artifact) {
98  this(objectID, Arrays.asList(artifact));
99 
100  }
101 
102  @NbBundle.Messages({
103  "AccountsText.creditCardNumber=Credit Card Number",
104  "AccountsText.creditCardNumbers=Credit Card Numbers"})
105  AccountsText(long objectID, Collection<? extends BlackboardArtifact> artifacts) {
106  this.solrObjectId = objectID;
107  this.artifacts = artifacts;
108  displayName = artifacts.size() == 1
109  ? Bundle.AccountsText_creditCardNumber()
110  : Bundle.AccountsText_creditCardNumbers();
111  }
112 
113  long getObjectId() {
114  return this.solrObjectId;
115  }
116 
117  @Override
118  public int getNumberPages() {
119  return this.numberPagesForFile;
120  }
121 
122  @Override
123  public int getCurrentPage() {
124  return this.currentPage;
125  }
126 
127  @Override
128  public boolean hasNextPage() {
129  return getIndexOfCurrentPage() < pages.size() - 1;
130 
131  }
132 
133  @Override
134  public boolean hasPreviousPage() {
135  return getIndexOfCurrentPage() > 0;
136  }
137 
138  @Override
139  @NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.")
140  public int nextPage() {
141  if (hasNextPage()) {
142  currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
143  return currentPage;
144  } else {
145  throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
146  }
147  }
148 
149  @Override
150  @NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.")
151  public int previousPage() {
152  if (hasPreviousPage()) {
153  currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
154  return currentPage;
155  } else {
156  throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
157  }
158  }
159 
160  private int getIndexOfCurrentPage() {
161  return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
162  }
163 
164  @Override
165  public boolean hasNextItem() {
166  if (this.currentHitPerPage.containsKey(currentPage)) {
167  return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
168  } else {
169  return false;
170  }
171  }
172 
173  @Override
174  public boolean hasPreviousItem() {
175  if (this.currentHitPerPage.containsKey(currentPage)) {
176  return this.currentHitPerPage.get(currentPage) > 1;
177  } else {
178  return false;
179  }
180  }
181 
182  @Override
183  @NbBundle.Messages("AccountsText.nextItem.exception.msg=No next item.")
184  public int nextItem() {
185  if (hasNextItem()) {
186  return currentHitPerPage.merge(currentPage, 1, Integer::sum);
187  } else {
188  throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
189  }
190  }
191 
192  @Override
193  @NbBundle.Messages("AccountsText.previousItem.exception.msg=No previous item.")
194  public int previousItem() {
195  if (hasPreviousItem()) {
196  return currentHitPerPage.merge(currentPage, -1, Integer::sum);
197  } else {
198  throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
199  }
200  }
201 
202  @Override
203  public int currentItem() {
204  if (this.currentHitPerPage.containsKey(currentPage)) {
205  return currentHitPerPage.get(currentPage);
206  } else {
207  return 0;
208  }
209  }
210 
215  synchronized private void loadPageInfo() throws IllegalStateException, TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
216  if (isPageInfoLoaded) {
217  return;
218  }
219 
220  this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
221 
222  boolean needsQuery = false;
223 
224  for (BlackboardArtifact artifact : artifacts) {
225  if (solrObjectId != artifact.getObjectID()) {
226  throw new IllegalStateException("not all artifacts are from the same object!");
227  }
228 
229  //add both the canonical form and the form in the text as accountNumbers to highlight.
230  this.accountNumbers.add(artifact.getAttribute(TSK_KEYWORD).getValueString());
231  this.accountNumbers.add(artifact.getAttribute(TSK_CARD_NUMBER).getValueString());
232 
233  //if the chunk id is present just use that.
234  Optional<Integer> chunkID =
235  Optional.ofNullable(artifact.getAttribute(TSK_KEYWORD_SEARCH_DOCUMENT_ID))
236  .map(BlackboardAttribute::getValueString)
237  .map(String::trim)
238  .map(kwsdocID -> StringUtils.substringAfterLast(kwsdocID, Server.CHUNK_ID_SEPARATOR))
239  .map(Integer::valueOf);
240  if (chunkID.isPresent()) {
241  numberOfHitsPerPage.put(chunkID.get(), 0);
242  currentHitPerPage.put(chunkID.get(), 0);
243  } else {
244  //otherwise we need to do a query to figure out the paging.
245  needsQuery = true;
246  }
247  }
248 
249  if (needsQuery) {
250  // Run a query to figure out which chunks for the current object have hits.
251  Keyword queryKeyword = new Keyword(CCN_REGEX, false, false);
252  KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(queryKeyword, new KeywordList(Arrays.asList(queryKeyword)));
253  chunksQuery.addFilter(new KeywordQueryFilter(KeywordQueryFilter.FilterType.CHUNK, this.solrObjectId));
254  //load the chunks/pages from the result of the query.
255  loadPageInfoFromHits(chunksQuery.performQuery());
256  }
257 
258  this.currentPage = pages.stream().findFirst().orElse(1);
259 
260  isPageInfoLoaded = true;
261  }
262  private static final String CCN_REGEX = "(%?)(B?)([0-9][ \\-]*?){12,19}(\\^?)";
263 
267  synchronized private void loadPageInfoFromHits(QueryResults hits) {
268  //organize the hits by page, filter as needed
269  for (Keyword k : hits.getKeywords()) {
270  for (KeywordHit hit : hits.getResults(k)) {
271  int chunkID = hit.getChunkId();
272  if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
273  String hitString = hit.getHit();
274  if (accountNumbers.stream().anyMatch(hitString::contains)) {
275  numberOfHitsPerPage.put(chunkID, 0); //unknown number of matches in the page
276  currentHitPerPage.put(chunkID, 0); //set current hit to 0th
277  }
278  }
279  }
280  }
281  }
282 
283  @Override
284  @NbBundle.Messages({"AccountsText.getMarkup.noMatchMsg="
285  + "<html><pre><span style\\\\='background\\\\:yellow'>There were no keyword hits on this page. <br />"
286  + "The keyword could have been in the file name."
287  + " <br />Advance to another page if present, or to view the original text, choose File Text"
288  + " <br />in the drop down menu to the right...</span></pre></html>",
289  "AccountsText.getMarkup.queryFailedMsg="
290  + "<html><pre><span style\\\\='background\\\\:yellow'>Failed to retrieve keyword hit results."
291  + " <br />Confirm that Autopsy can connect to the Solr server. "
292  + "<br /></span></pre></html>"})
293  public String getText() {
294  try {
295  loadPageInfo(); //inits once
296 
297  SolrQuery q = new SolrQuery();
298  q.setShowDebugInfo(DEBUG); //debug
299 
300  String contentIdStr = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
301  final String filterQuery = Server.Schema.ID.toString() + ":" + contentIdStr;
302  //set the documentID filter
303  q.setQuery(filterQuery);
304  q.setFields(FIELD);
305 
306  QueryResponse queryResponse = solrServer.query(q, METHOD.POST);
307 
308  String highlightedText =
309  HighlightedText.attemptManualHighlighting(
310  queryResponse.getResults(),
311  Server.Schema.CONTENT_STR.toString(),
312  accountNumbers
313  ).trim();
314 
315  highlightedText = insertAnchors(highlightedText);
316 
317  // extracted content (minus highlight tags) is HTML-escaped
318  return "<html><pre>" + highlightedText + "</pre></html>"; //NON-NLS
319  } catch (Exception ex) {
320  logger.log(Level.WARNING, "Error getting highlighted text for " + solrObjectId, ex); //NON-NLS
321  return Bundle.AccountsText_getMarkup_queryFailedMsg();
322  }
323  }
324 
333  private String insertAnchors(String searchableContent) {
334  /*
335  * use regex matcher to iterate over occurences of HIGHLIGHT_PRE, and
336  * prepend them with an anchor tag.
337  */
338  Matcher m = ANCHOR_DETECTION_PATTERN.matcher(searchableContent);
339  StringBuffer sb = new StringBuffer(searchableContent.length());
340  int count = 0;
341  while (m.find()) {
342  count++;
343  m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
344  }
345  m.appendTail(sb);
346  //store total hits for this page, now that we know it
347  this.numberOfHitsPerPage.put(this.currentPage, count);
348  if (this.currentItem() == 0 && this.hasNextItem()) {
349  this.nextItem();
350  }
351  return sb.toString();
352  }
353 
354  @Override
355  public String toString() {
356  return displayName;
357  }
358 
359  @Override
360  public boolean isSearchable() {
361  return true;
362  }
363 
364  @Override
365  public String getAnchorPrefix() {
366  return ANCHOR_NAME_PREFIX;
367  }
368 
369  @Override
370  public int getNumberHits() {
371  if (!this.numberOfHitsPerPage.containsKey(this.currentPage)) {
372  return 0;
373  }
374  return this.numberOfHitsPerPage.get(this.currentPage);
375  }
376 }
static Version.Type getBuildType()
Definition: Version.java:87
QueryResponse query(SolrQuery sq)
Definition: Server.java:1001
synchronized static Logger getLogger(String name)
Definition: Logger.java:161

Copyright © 2012-2016 Basis Technology. Generated on: Tue Jun 13 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.