Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
HighlightedText.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2015 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.LinkedHashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.TreeSet;
27 import java.util.logging.Level;
28 
29 import org.openide.util.NbBundle;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
33 import org.apache.solr.client.solrj.response.QueryResponse;
34 import org.openide.util.NbBundle.Messages;
39 
44 class HighlightedText implements IndexedText, TextMarkupLookup {
45 
46  private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
47  private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
48  private static final String HIGHLIGHT_POST = "</span>"; //NON-NLS
49  private static final String ANCHOR_PREFIX = HighlightedText.class.getName() + "_";
50 
51  private long objectId;
52  private String keywordHitQuery;
53  private Server solrServer;
54  private int numberPages;
55  private int currentPage;
56  private boolean isRegex = false;
57  private boolean group = true;
58  private boolean hasChunks = false;
59  //stores all pages/chunks that have hits as key, and number of hits as a value, or 0 if yet unknown
60  private LinkedHashMap<Integer, Integer> hitsPages;
61  //stored page num -> current hit number mapping
62  private HashMap<Integer, Integer> pagesToHits;
63  private List<Integer> pages;
64  private QueryResults hits = null; //original hits that may get passed in
65  private String originalQuery = null; //or original query if hits are not available
66  private boolean isPageInfoLoaded = false;
67  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
68 
69  HighlightedText(long objectId, String keywordHitQuery, boolean isRegex) {
70  this.objectId = objectId;
71  this.keywordHitQuery = keywordHitQuery;
72  this.isRegex = isRegex;
73  this.group = true;
74  this.hitsPages = new LinkedHashMap<>();
75  this.pages = new ArrayList<>();
76  this.pagesToHits = new HashMap<>();
77 
78  this.solrServer = KeywordSearch.getServer();
79  this.numberPages = 0;
80  this.currentPage = 0;
81  //hits are unknown
82 
83  }
84 
85  //when the results are not known and need to requery to get hits
86  HighlightedText(long objectId, String solrQuery, boolean isRegex, String originalQuery) {
87  this(objectId, KeywordSearchUtil.quoteQuery(solrQuery), isRegex);
88  this.originalQuery = originalQuery;
89  }
90 
91  HighlightedText(long objectId, String solrQuery, boolean isRegex, QueryResults hits) {
92  this(objectId, solrQuery, isRegex);
93  this.hits = hits;
94  }
95 
96  HighlightedText(long objectId, String solrQuery, boolean isRegex, boolean group, QueryResults hits) {
97  this(objectId, solrQuery, isRegex, hits);
98  this.group = group;
99  }
100 
105  @Messages({"HighlightedText.query.exception.msg=Could not perform the query to get chunk info and get highlights:"})
106  private void loadPageInfo() {
107  if (isPageInfoLoaded) {
108  return;
109  }
110  try {
111  this.numberPages = solrServer.queryNumFileChunks(this.objectId);
112  } catch (KeywordSearchModuleException ex) {
113  logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS
114  return;
115  } catch (NoOpenCoreException ex) {
116  logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS
117  return;
118  }
119 
120  if (this.numberPages == 0) {
121  hasChunks = false;
122  } else {
123  hasChunks = true;
124  }
125 
126  //if has chunks, get pages with hits
127  if (hasChunks) {
128  //extract pages of interest, sorted
129 
130  /*
131  * If this is being called from the artifacts / dir tree, then we
132  * need to perform the search to get the highlights.
133  */
134  if (hits == null) {
135  String queryStr = KeywordSearchUtil.escapeLuceneQuery(this.keywordHitQuery);
136  if (isRegex) {
137  //use white-space sep. field to get exact matches only of regex query result
138  queryStr = Server.Schema.CONTENT_WS + ":" + "\"" + queryStr + "\"";
139  }
140 
141  Keyword keywordQuery = new Keyword(queryStr, !isRegex);
142  List<Keyword> keywords = new ArrayList<>();
143  keywords.add(keywordQuery);
144  KeywordSearchQuery chunksQuery = new LuceneQuery(new KeywordList(keywords), keywordQuery);
145 
146  chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId));
147  try {
148  hits = chunksQuery.performQuery();
149  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
150  logger.log(Level.SEVERE, "Could not perform the query to get chunk info and get highlights:" + keywordQuery.getSearchTerm(), ex); //NON-NLS
151  MessageNotifyUtil.Notify.error(Bundle.HighlightedText_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage());
152  return;
153  }
154  }
155 
156  //organize the hits by page, filter as needed
157  TreeSet<Integer> pagesSorted = new TreeSet<>();
158  for (Keyword k : hits.getKeywords()) {
159  for (KeywordHit hit : hits.getResults(k)) {
160  int chunkID = hit.getChunkId();
161  if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
162  pagesSorted.add(chunkID);
163  }
164  }
165  }
166 
167  //set page to first page having highlights
168  if (pagesSorted.isEmpty()) {
169  this.currentPage = 0;
170  } else {
171  this.currentPage = pagesSorted.first();
172  }
173 
174  for (Integer page : pagesSorted) {
175  hitsPages.put(page, 0); //unknown number of matches in the page
176  pages.add(page);
177  pagesToHits.put(page, 0); //set current hit to 0th
178  }
179 
180  } else {
181  //no chunks
182  this.numberPages = 1;
183  this.currentPage = 1;
184  hitsPages.put(1, 0);
185  pages.add(1);
186  pagesToHits.put(1, 0);
187  }
188  isPageInfoLoaded = true;
189  }
190 
191  //constructor for dummy singleton factory instance for Lookup
192  private HighlightedText() {
193  }
194 
195  long getObjectId() {
196  return this.objectId;
197  }
198 
199  @Override
200  public int getNumberPages() {
201  return this.numberPages;
202  //return number of pages that have hits
203  //return this.hitsPages.keySet().size();
204  }
205 
206  @Override
207  public int getCurrentPage() {
208  return this.currentPage;
209  }
210 
211  @Override
212  public boolean hasNextPage() {
213  final int numPages = pages.size();
214  int idx = pages.indexOf(this.currentPage);
215  return idx < numPages - 1;
216 
217  }
218 
219  @Override
220  public boolean hasPreviousPage() {
221  int idx = pages.indexOf(this.currentPage);
222  return idx > 0;
223 
224  }
225 
226  @Override
227  public int nextPage() {
228  if (!hasNextPage()) {
229  throw new IllegalStateException(
230  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextPage.exception.msg"));
231  }
232  int idx = pages.indexOf(this.currentPage);
233  currentPage = pages.get(idx + 1);
234  return currentPage;
235  }
236 
237  @Override
238  public int previousPage() {
239  if (!hasPreviousPage()) {
240  throw new IllegalStateException(
241  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousPage.exception.msg"));
242  }
243  int idx = pages.indexOf(this.currentPage);
244  currentPage = pages.get(idx - 1);
245  return currentPage;
246  }
247 
248  @Override
249  public boolean hasNextItem() {
250  if (!this.pagesToHits.containsKey(currentPage)) {
251  return false;
252  }
253  return this.pagesToHits.get(currentPage) < this.hitsPages.get(currentPage);
254  }
255 
256  @Override
257  public boolean hasPreviousItem() {
258  if (!this.pagesToHits.containsKey(currentPage)) {
259  return false;
260  }
261  return this.pagesToHits.get(currentPage) > 1;
262  }
263 
264  @Override
265  public int nextItem() {
266  if (!hasNextItem()) {
267  throw new IllegalStateException(
268  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextItem.exception.msg"));
269  }
270  int cur = pagesToHits.get(currentPage) + 1;
271  pagesToHits.put(currentPage, cur);
272  return cur;
273  }
274 
275  @Override
276  public int previousItem() {
277  if (!hasPreviousItem()) {
278  throw new IllegalStateException(
279  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousItem.exception.msg"));
280  }
281  int cur = pagesToHits.get(currentPage) - 1;
282  pagesToHits.put(currentPage, cur);
283  return cur;
284  }
285 
286  @Override
287  public int currentItem() {
288  if (!this.pagesToHits.containsKey(currentPage)) {
289  return 0;
290  }
291  return pagesToHits.get(currentPage);
292  }
293 
294  @Override
295  public LinkedHashMap<Integer, Integer> getHitsPages() {
296  return this.hitsPages;
297  }
298 
299  @Override
300  public String getText() {
301  loadPageInfo(); //inits once
302 
303  String highLightField = null;
304 
305  if (isRegex) {
306  highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
307  } else {
308  highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
309  }
310 
311  SolrQuery q = new SolrQuery();
312  q.setShowDebugInfo(DEBUG); //debug
313 
314  // input query has already been properly constructed and escaped
315  q.setQuery(keywordHitQuery);
316 
317  String contentIdStr = Long.toString(this.objectId);
318  if (hasChunks) {
319  contentIdStr += "_" + Integer.toString(this.currentPage);
320  }
321 
322  final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
323  q.addFilterQuery(filterQuery);
324  q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)
325 
326  //q.setHighlightSimplePre(HIGHLIGHT_PRE); //original highlighter only
327  //q.setHighlightSimplePost(HIGHLIGHT_POST); //original highlighter only
328  q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
329 
330  //tune the highlighter
331  q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
332  q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
333  q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
334  q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
335 
336  //docs says makes sense for the original Highlighter only, but not really
337  q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
338 
339  try {
340  QueryResponse response = solrServer.query(q, METHOD.POST);
341  Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
342 
343  Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
344  if (responseHighlightID == null) {
345  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
346  }
347  List<String> contentHighlights = responseHighlightID.get(highLightField);
348  if (contentHighlights == null) {
349  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
350  } else {
351  // extracted content (minus highlight tags) is HTML-escaped
352  String highlightedContent = contentHighlights.get(0).trim();
353  highlightedContent = insertAnchors(highlightedContent);
354 
355  return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
356  }
357  } catch (Exception ex) {
358  logger.log(Level.WARNING, "Error executing Solr highlighting query: " + keywordHitQuery, ex); //NON-NLS
359  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg");
360  }
361  }
362 
363  @Override
364  public String toString() {
365  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.toString");
366  }
367 
368  @Override
369  public boolean isSearchable() {
370  return true;
371  }
372 
373  @Override
374  public String getAnchorPrefix() {
375  return ANCHOR_PREFIX;
376  }
377 
378  @Override
379  public int getNumberHits() {
380  if (!this.hitsPages.containsKey(this.currentPage)) {
381  return 0;
382  }
383  return this.hitsPages.get(this.currentPage);
384  }
385 
386  private String insertAnchors(String searchableContent) {
387  int searchOffset = 0;
388  int index = -1;
389 
390  StringBuilder buf = new StringBuilder(searchableContent);
391 
392  final String searchToken = HIGHLIGHT_PRE;
393  final int indexSearchTokLen = searchToken.length();
394  final String insertPre = "<a name='" + ANCHOR_PREFIX; //NON-NLS
395  final String insertPost = "'></a>"; //NON-NLS
396  int count = 0;
397  while ((index = buf.indexOf(searchToken, searchOffset)) >= 0) {
398  String insertString = insertPre + Integer.toString(count + 1) + insertPost;
399  int insertStringLen = insertString.length();
400  buf.insert(index, insertString);
401  searchOffset = index + indexSearchTokLen + insertStringLen; //next offset past this anchor
402  ++count;
403  }
404 
405  //store total hits for this page, now that we know it
406  this.hitsPages.put(this.currentPage, count);
407  if (this.currentItem() == 0 && this.hasNextItem()) {
408  this.nextItem();
409  }
410 
411  return buf.toString();
412  }
413  //dummy instance for Lookup only
414  private static TextMarkupLookup instance = null;
415 
416  //getter of the singleton dummy instance solely for Lookup purpose
417  //this instance does not actually work with Solr
418  public static synchronized TextMarkupLookup getDefault() {
419  if (instance == null) {
420  instance = new HighlightedText();
421  }
422  return instance;
423  }
424 
425  @Override
426  // factory method to create an instance of this object
427  public TextMarkupLookup createInstance(long objectId, String keywordHitQuery, boolean isRegex, String originalQuery) {
428  return new HighlightedText(objectId, keywordHitQuery, isRegex, originalQuery);
429  }
430 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.