Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
AccountsText.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2016 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.LinkedHashMap;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.Set;
28 import java.util.TreeSet;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import java.util.stream.Collectors;
33 import org.apache.commons.lang.StringUtils;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.apache.solr.common.SolrDocument;
38 import org.openide.util.NbBundle;
41 
51 class AccountsText implements IndexedText {
52 
53  private static final Logger LOGGER = Logger.getLogger(AccountsText.class.getName());
54  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
55 
56  private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
57  private static final String HIGHLIGHT_POST = "</span>"; //NON-NLS
58  private static final String ANCHOR_NAME_PREFIX = AccountsText.class.getName() + "_";
59 
60  private static final String INSERT_PREFIX = "<a name='" + ANCHOR_NAME_PREFIX; //NON-NLS
61  private static final String INSERT_POSTFIX = "'></a>$0"; //$0 will insert current regex match //NON-NLS
62  private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
63 
64  private static final String HIGHLIGHT_FIELD = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
65 
66  private final Server solrServer;
67  private final String solrDocumentId;
68  private final long solrObjectId;
69  private final Integer chunkId;
70  private final Set<String> keywords = new HashSet<>();
71  private final String displayName;
72  private final String queryString;
73 
74  private boolean isPageInfoLoaded = false;
75  private int numberPagesForFile = 0;
76  private int currentPage = 0;
77  //list of pages, used for iterating back and forth. Only stores pages with hits
78  private final List<Integer> pages = new ArrayList<>();
79  //map from page/chunk to number of hits. value is 0 if not yet known.
80  private final LinkedHashMap<Integer, Integer> numberOfHitsPerPage = new LinkedHashMap<>();
81  //map from page/chunk number to current hit on that page.
82  private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
83 
84  @NbBundle.Messages({
85  "AccountsText.creditCardNumber=Credit Card Number",
86  "AccountsText.creditCardNumbers=Credit Card Numbers"})
87  AccountsText(String objectId, Set<String> keywords) {
88  this.solrDocumentId = objectId;
89  this.keywords.addAll(keywords);
90 
91  //build the query string
92  this.queryString = HIGHLIGHT_FIELD + ":"
93  + keywords.stream()
94  .map(keyword -> "/.*?" + KeywordSearchUtil.escapeLuceneQuery(keyword) + ".*?/")//surround each "keyword" with match anything regex.
95  .collect(Collectors.joining(" ")); //collect as space separated string
96 
97  this.solrServer = KeywordSearch.getServer();
98 
99  final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR);
100  if (-1 == separatorIndex) {
101  //no chunk id in solrDocumentId
102  this.solrObjectId = Long.parseLong(solrDocumentId);
103  this.chunkId = null;
104  } else {
105  //solrDocumentId includes chunk id
106  this.solrObjectId = Long.parseLong(solrDocumentId.substring(0, separatorIndex));
107  this.chunkId = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1));
108  }
109 
110  displayName = keywords.size() == 1
111  ? Bundle.AccountsText_creditCardNumber()
112  : Bundle.AccountsText_creditCardNumbers();
113  }
114 
115  long getObjectId() {
116  return this.solrObjectId;
117  }
118 
119  @Override
120  public int getNumberPages() {
121  return this.numberPagesForFile;
122  }
123 
124  @Override
125  public int getCurrentPage() {
126  return this.currentPage;
127  }
128 
129  @Override
130  public boolean hasNextPage() {
131  return pages.indexOf(this.currentPage) < pages.size() - 1;
132 
133  }
134 
135  @Override
136  public boolean hasPreviousPage() {
137  return pages.indexOf(this.currentPage) > 0;
138 
139  }
140 
141  @Override
142  @NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.")
143  public int nextPage() {
144  if (hasNextPage()) {
145  currentPage = pages.get(pages.indexOf(this.currentPage) + 1);
146  return currentPage;
147  } else {
148  throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
149  }
150  }
151 
152  @Override
153  @NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.")
154  public int previousPage() {
155  if (hasPreviousPage()) {
156  currentPage = pages.get(pages.indexOf(this.currentPage) - 1);
157  return currentPage;
158  } else {
159  throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
160  }
161  }
162 
163  @Override
164  public boolean hasNextItem() {
165  if (this.currentHitPerPage.containsKey(currentPage)) {
166  return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
167  } else {
168  return false;
169  }
170  }
171 
172  @Override
173  public boolean hasPreviousItem() {
174  if (this.currentHitPerPage.containsKey(currentPage)) {
175  return this.currentHitPerPage.get(currentPage) > 1;
176  } else {
177  return false;
178  }
179  }
180 
181  @Override
182  @NbBundle.Messages("AccountsText.nextItem.exception.msg=No next item.")
183  public int nextItem() {
184  if (hasNextItem()) {
185  return currentHitPerPage.merge(currentPage, 1, Integer::sum);
186  } else {
187  throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
188  }
189  }
190 
191  @Override
192  @NbBundle.Messages("AccountsText.previousItem.exception.msg=No previous item.")
193  public int previousItem() {
194  if (hasPreviousItem()) {
195  return currentHitPerPage.merge(currentPage, -1, Integer::sum);
196  } else {
197  throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
198  }
199  }
200 
201  @Override
202  public int currentItem() {
203  if (this.currentHitPerPage.containsKey(currentPage)) {
204  return currentHitPerPage.get(currentPage);
205  } else {
206  return 0;
207  }
208  }
209 
210  @Override
211  public LinkedHashMap<Integer, Integer> getHitsPages() {
212  return this.numberOfHitsPerPage;
213  }
214 
219  synchronized private void loadPageInfo() {
220  if (isPageInfoLoaded) {
221  return;
222  }
223  if (chunkId != null) {//if a chunk is specified, only show that chunk/page
224  this.numberPagesForFile = 1;
225  this.currentPage = chunkId;
226  this.numberOfHitsPerPage.put(chunkId, 0);
227  this.pages.add(chunkId);
228  this.currentHitPerPage.put(chunkId, 0);
229  } else {
230  try {
231  this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
232  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
233  LOGGER.log(Level.WARNING, "Could not get number pages for content " + this.solrDocumentId, ex); //NON-NLS
234  return;
235  }
236 
237  //if has chunks, get pages with hits
238  TreeSet<Integer> sortedPagesWithHits = new TreeSet<>();
239  SolrQuery q = new SolrQuery();
240  q.setShowDebugInfo(DEBUG); //debug
241  q.setQuery(queryString);
242  q.setFields(Server.Schema.ID.toString()); //for this case we only need the document ids
243  q.addFilterQuery(Server.Schema.ID.toString() + ":" + this.solrObjectId + Server.CHUNK_ID_SEPARATOR + "*");
244 
245  try {
246  QueryResponse response = solrServer.query(q, METHOD.POST);
247  for (SolrDocument resultDoc : response.getResults()) {
248  final String resultDocumentId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
249  // Put the solr chunk id in the map
250  String resultChunkID = StringUtils.substringAfter(resultDocumentId, Server.CHUNK_ID_SEPARATOR);
251  if (StringUtils.isNotBlank(resultChunkID)) {
252  sortedPagesWithHits.add(Integer.parseInt(resultChunkID));
253  } else {
254  sortedPagesWithHits.add(0);
255  }
256  }
257 
258  } catch (KeywordSearchModuleException | NoOpenCoreException | NumberFormatException ex) {
259  LOGGER.log(Level.WARNING, "Error executing Solr highlighting query: " + keywords, ex); //NON-NLS
260  }
261 
262  //set page to first page having highlights
263  if (sortedPagesWithHits.isEmpty()) {
264  this.currentPage = 0;
265  } else {
266  this.currentPage = sortedPagesWithHits.first();
267  }
268 
269  for (Integer page : sortedPagesWithHits) {
270  numberOfHitsPerPage.put(page, 0); //unknown number of matches in the page
271  pages.add(page);
272  currentHitPerPage.put(page, 0); //set current hit to 0th
273  }
274  }
275 
276  isPageInfoLoaded = true;
277  }
278 
279  @Override
280  @NbBundle.Messages({"AccountsText.getMarkup.noMatchMsg="
281  + "<html><pre><span style\\\\='background\\\\:yellow'>There were no keyword hits on this page. <br />"
282  + "The keyword could have been in the file name."
283  + " <br />Advance to another page if present, or to view the original text, choose File Text"
284  + " <br />in the drop down menu to the right...</span></pre></html>",
285  "AccountsText.getMarkup.queryFailedMsg="
286  + "<html><pre><span style\\\\='background\\\\:yellow'>Failed to retrieve keyword hit results."
287  + " <br />Confirm that Autopsy can connect to the Solr server. "
288  + "<br /></span></pre></html>"})
289  public String getText() {
290  loadPageInfo(); //inits once
291 
292  SolrQuery q = new SolrQuery();
293  q.setShowDebugInfo(DEBUG); //debug
294  q.addHighlightField(HIGHLIGHT_FIELD);
295  q.setQuery(queryString);
296 
297  //set the documentID filter
298  String queryDocumentID = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
299  q.addFilterQuery(Server.Schema.ID.toString() + ":" + queryDocumentID);
300 
301  //configure the highlighter
302  q.setParam("hl.useFastVectorHighlighter", "true"); //fast highlighter scales better than standard one NON-NLS
303  q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
304  q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
305  q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
306  q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //docs says makes sense for the original Highlighter only, but not really //NON-NLS
307 
308  try {
309  //extract highlighting and bail early on null responses
310  Map<String, Map<String, List<String>>> highlightingPerDocument = solrServer.query(q, METHOD.POST).getHighlighting();
311  Map<String, List<String>> highlightingPerField = highlightingPerDocument.get(queryDocumentID);
312  if (highlightingPerField == null) {
313  return Bundle.AccountsText_getMarkup_noMatchMsg();
314  }
315  List<String> highlights = highlightingPerField.get(HIGHLIGHT_FIELD);
316  if (highlights == null) {
317  return Bundle.AccountsText_getMarkup_noMatchMsg();
318  }
319 
320  //There should only be one item
321  String highlighting = highlights.get(0).trim();
322 
323  /*
324  * use regex matcher to iterate over occurences of HIGHLIGHT_PRE,
325  * and prepend them with an anchor tag.
326  */
327  Matcher m = ANCHOR_DETECTION_PATTERN.matcher(highlighting);
328  StringBuffer sb = new StringBuffer(highlighting.length());
329  int count = 0;
330  while (m.find()) {
331  count++;
332  m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
333  }
334  m.appendTail(sb);
335 
336  //store total hits for this page, now that we know it
337  this.numberOfHitsPerPage.put(this.currentPage, count);
338  if (this.currentItem() == 0 && this.hasNextItem()) {
339  this.nextItem();
340  }
341 
342  // extracted content (minus highlight tags) is HTML-escaped
343  return "<html><pre>" + sb.toString() + "</pre></html>"; //NON-NLS
344  } catch (Exception ex) {
345  LOGGER.log(Level.WARNING, "Error executing Solr highlighting query: " + keywords, ex); //NON-NLS
346  return Bundle.AccountsText_getMarkup_queryFailedMsg();
347  }
348  }
349 
350  @Override
351  public String toString() {
352  return displayName;
353  }
354 
355  @Override
356  public boolean isSearchable() {
357  return true;
358  }
359 
360  @Override
361  public String getAnchorPrefix() {
362  return ANCHOR_NAME_PREFIX;
363  }
364 
365  @Override
366  public int getNumberHits() {
367  if (!this.numberOfHitsPerPage.containsKey(this.currentPage)) {
368  return 0;
369  }
370  return this.numberOfHitsPerPage.get(this.currentPage);
371  }
372 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.