Autopsy  4.4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
AccountsText.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.collect.Iterators;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.Optional;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import javax.annotation.concurrent.GuardedBy;
33 import org.apache.commons.lang3.StringUtils;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.openide.util.NbBundle;
40 import org.sleuthkit.datamodel.BlackboardArtifact;
41 import org.sleuthkit.datamodel.BlackboardAttribute;
42 import org.sleuthkit.datamodel.TskCoreException;
43 
53 class AccountsText implements IndexedText {
54 
55  private static final Logger logger = Logger.getLogger(AccountsText.class.getName());
56  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
57 
58  private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
59  private static final String ANCHOR_NAME_PREFIX = AccountsText.class.getName() + "_";
60 
61  private static final String INSERT_PREFIX = "<a name='" + ANCHOR_NAME_PREFIX; //NON-NLS
62  private static final String INSERT_POSTFIX = "'></a>$0"; //$0 will insert current regex match //NON-NLS
63  private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
64 
65  private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
66  private static final BlackboardAttribute.Type TSK_CARD_NUMBER = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER);
67  private static final BlackboardAttribute.Type TSK_KEYWORD = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
68 
69  private static final String FIELD = Server.Schema.CONTENT_STR.toString();
70 
71  private final Server solrServer = KeywordSearch.getServer();
72 
73  private final long solrObjectId;
74  private final Collection<? extends BlackboardArtifact> artifacts;
75  private final Set<String> accountNumbers = new HashSet<>();
76  private final String title;
77 
78  @GuardedBy("this")
79  private boolean isPageInfoLoaded = false;
80  private int numberPagesForFile = 0;
81  private Integer currentPage = 0;
82 
83  /*
84  * map from page/chunk to number of hits. value is 0 if not yet known.
85  */
86  private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
87  /*
88  * set of pages, used for iterating back and forth. Only stores pages with
89  * hits
90  */
91  private final Set<Integer> pages = numberOfHitsPerPage.keySet();
92  /*
93  * map from page/chunk number to current hit on that page.
94  */
95  private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
96 
97  AccountsText(long objectID, BlackboardArtifact artifact) {
98  this(objectID, Arrays.asList(artifact));
99 
100  }
101 
102  @NbBundle.Messages({
103  "AccountsText.creditCardNumber=Credit Card Number",
104  "AccountsText.creditCardNumbers=Credit Card Numbers"})
105  AccountsText(long objectID, Collection<? extends BlackboardArtifact> artifacts) {
106  this.solrObjectId = objectID;
107  this.artifacts = artifacts;
108  title = artifacts.size() == 1
109  ? Bundle.AccountsText_creditCardNumber()
110  : Bundle.AccountsText_creditCardNumbers();
111  }
112 
113  long getObjectId() {
114  return this.solrObjectId;
115  }
116 
117  @Override
118  public int getNumberPages() {
119  return this.numberPagesForFile;
120  }
121 
122  @Override
123  public int getCurrentPage() {
124  return this.currentPage;
125  }
126 
127  @Override
128  public boolean hasNextPage() {
129  return getIndexOfCurrentPage() < pages.size() - 1;
130 
131  }
132 
133  @Override
134  public boolean hasPreviousPage() {
135  return getIndexOfCurrentPage() > 0;
136  }
137 
138  @Override
139  @NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.")
140  public int nextPage() {
141  if (hasNextPage()) {
142  currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
143  return currentPage;
144  } else {
145  throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
146  }
147  }
148 
149  @Override
150  @NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.")
151  public int previousPage() {
152  if (hasPreviousPage()) {
153  currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
154  return currentPage;
155  } else {
156  throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
157  }
158  }
159 
160  private int getIndexOfCurrentPage() {
161  return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
162  }
163 
164  @Override
165  public boolean hasNextItem() {
166  if (this.currentHitPerPage.containsKey(currentPage)) {
167  return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
168  } else {
169  return false;
170  }
171  }
172 
173  @Override
174  public boolean hasPreviousItem() {
175  if (this.currentHitPerPage.containsKey(currentPage)) {
176  return this.currentHitPerPage.get(currentPage) > 1;
177  } else {
178  return false;
179  }
180  }
181 
182  @Override
183  @NbBundle.Messages("AccountsText.nextItem.exception.msg=No next item.")
184  public int nextItem() {
185  if (hasNextItem()) {
186  return currentHitPerPage.merge(currentPage, 1, Integer::sum);
187  } else {
188  throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
189  }
190  }
191 
192  @Override
193  @NbBundle.Messages("AccountsText.previousItem.exception.msg=No previous item.")
194  public int previousItem() {
195  if (hasPreviousItem()) {
196  return currentHitPerPage.merge(currentPage, -1, Integer::sum);
197  } else {
198  throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
199  }
200  }
201 
202  @Override
203  public int currentItem() {
204  if (this.currentHitPerPage.containsKey(currentPage)) {
205  return currentHitPerPage.get(currentPage);
206  } else {
207  return 0;
208  }
209  }
210 
215  synchronized private void loadPageInfo() throws IllegalStateException, TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
216  if (isPageInfoLoaded) {
217  return;
218  }
219 
220  this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
221 
222  boolean needsQuery = false;
223 
224  for (BlackboardArtifact artifact : artifacts) {
225  if (solrObjectId != artifact.getObjectID()) {
226  throw new IllegalStateException("not all artifacts are from the same object!");
227  }
228 
229  //add both the canonical form and the form in the text as accountNumbers to highlight.
230  BlackboardAttribute attribute = artifact.getAttribute(TSK_KEYWORD);
231  if (attribute != null) {
232  this.accountNumbers.add(attribute.getValueString());
233  }
234  attribute = artifact.getAttribute(TSK_CARD_NUMBER);
235  if (attribute != null) {
236  this.accountNumbers.add(attribute.getValueString());
237  }
238 
239  //if the chunk id is present just use that.
240  Optional<Integer> chunkID =
241  Optional.ofNullable(artifact.getAttribute(TSK_KEYWORD_SEARCH_DOCUMENT_ID))
242  .map(BlackboardAttribute::getValueString)
243  .map(String::trim)
244  .map(kwsdocID -> StringUtils.substringAfterLast(kwsdocID, Server.CHUNK_ID_SEPARATOR))
245  .map(Integer::valueOf);
246  if (chunkID.isPresent()) {
247  numberOfHitsPerPage.put(chunkID.get(), 0);
248  currentHitPerPage.put(chunkID.get(), 0);
249  } else {
250  //otherwise we need to do a query to figure out the paging.
251  needsQuery = true;
252  }
253  }
254 
255  if (needsQuery) {
256  // Run a query to figure out which chunks for the current object have hits.
257  Keyword queryKeyword = new Keyword(CCN_REGEX, false, false);
258  KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(queryKeyword, new KeywordList(Arrays.asList(queryKeyword)));
259  chunksQuery.addFilter(new KeywordQueryFilter(KeywordQueryFilter.FilterType.CHUNK, this.solrObjectId));
260  //load the chunks/pages from the result of the query.
261  loadPageInfoFromHits(chunksQuery.performQuery());
262  }
263 
264  this.currentPage = pages.stream().findFirst().orElse(1);
265 
266  isPageInfoLoaded = true;
267  }
268  private static final String CCN_REGEX = "(%?)(B?)([0-9][ \\-]*?){12,19}(\\^?)";
269 
273  synchronized private void loadPageInfoFromHits(QueryResults hits) {
274  //organize the hits by page, filter as needed
275  for (Keyword k : hits.getKeywords()) {
276  for (KeywordHit hit : hits.getResults(k)) {
277  int chunkID = hit.getChunkId();
278  if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
279  String hitString = hit.getHit();
280  if (accountNumbers.stream().anyMatch(hitString::contains)) {
281  numberOfHitsPerPage.put(chunkID, 0); //unknown number of matches in the page
282  currentHitPerPage.put(chunkID, 0); //set current hit to 0th
283  }
284  }
285  }
286  }
287  }
288 
289  @Override
290  @NbBundle.Messages({"AccountsText.getMarkup.noMatchMsg="
291  + "<html><pre><span style\\\\='background\\\\:yellow'>There were no keyword hits on this page. <br />"
292  + "The keyword could have been in the file name."
293  + " <br />Advance to another page if present, or to view the original text, choose File Text"
294  + " <br />in the drop down menu to the right...</span></pre></html>",
295  "AccountsText.getMarkup.queryFailedMsg="
296  + "<html><pre><span style\\\\='background\\\\:yellow'>Failed to retrieve keyword hit results."
297  + " <br />Confirm that Autopsy can connect to the Solr server. "
298  + "<br /></span></pre></html>"})
299  public String getText() {
300  try {
301  loadPageInfo(); //inits once
302 
303  SolrQuery q = new SolrQuery();
304  q.setShowDebugInfo(DEBUG); //debug
305 
306  String contentIdStr = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
307  final String filterQuery = Server.Schema.ID.toString() + ":" + contentIdStr;
308  //set the documentID filter
309  q.setQuery(filterQuery);
310  q.setFields(FIELD);
311 
312  QueryResponse queryResponse = solrServer.query(q, METHOD.POST);
313 
314  String highlightedText =
315  HighlightedText.attemptManualHighlighting(
316  queryResponse.getResults(),
317  Server.Schema.CONTENT_STR.toString(),
318  accountNumbers
319  ).trim();
320 
321  highlightedText = insertAnchors(highlightedText);
322 
323  // extracted content (minus highlight tags) is HTML-escaped
324  return "<html><pre>" + highlightedText + "</pre></html>"; //NON-NLS
325  } catch (Exception ex) {
326  logger.log(Level.SEVERE, "Error getting highlighted text for Solr doc id " + this.solrObjectId + ", chunkID " + this.currentPage , ex); //NON-NLS
327  return Bundle.AccountsText_getMarkup_queryFailedMsg();
328  }
329  }
330 
339  private String insertAnchors(String searchableContent) {
340  /*
341  * use regex matcher to iterate over occurences of HIGHLIGHT_PRE, and
342  * prepend them with an anchor tag.
343  */
344  Matcher m = ANCHOR_DETECTION_PATTERN.matcher(searchableContent);
345  StringBuffer sb = new StringBuffer(searchableContent.length());
346  int count = 0;
347  while (m.find()) {
348  count++;
349  m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
350  }
351  m.appendTail(sb);
352  //store total hits for this page, now that we know it
353  this.numberOfHitsPerPage.put(this.currentPage, count);
354  if (this.currentItem() == 0 && this.hasNextItem()) {
355  this.nextItem();
356  }
357  return sb.toString();
358  }
359 
360  @Override
361  public String toString() {
362  return title;
363  }
364 
365  @Override
366  public boolean isSearchable() {
367  return true;
368  }
369 
370  @Override
371  public String getAnchorPrefix() {
372  return ANCHOR_NAME_PREFIX;
373  }
374 
375  @Override
376  public int getNumberHits() {
377  if (!this.numberOfHitsPerPage.containsKey(this.currentPage)) {
378  return 0;
379  }
380  return this.numberOfHitsPerPage.get(this.currentPage);
381  }
382 }
static Version.Type getBuildType()
Definition: Version.java:87
QueryResponse query(SolrQuery sq)
Definition: Server.java:1001
synchronized static Logger getLogger(String name)
Definition: Logger.java:161

Copyright © 2012-2016 Basis Technology. Generated on: Fri Sep 29 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.