Autopsy  3.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
HighlightedTextMarkup.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2015 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.LinkedHashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.TreeSet;
27 import java.util.logging.Level;
28 
29 import org.openide.util.NbBundle;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
33 import org.apache.solr.client.solrj.response.QueryResponse;
38 
42 class HighlightedTextMarkup implements TextMarkup, TextMarkupLookup {
43 
44  private static final Logger logger = Logger.getLogger(HighlightedTextMarkup.class.getName());
45  private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
46  private static final String HIGHLIGHT_POST = "</span>"; //NON-NLS
47  private static final String ANCHOR_PREFIX = HighlightedTextMarkup.class.getName() + "_";
48 
49  private long objectId;
50  private String keywordHitQuery;
51  private Server solrServer;
52  private int numberPages;
53  private int currentPage;
54  private boolean isRegex = false;
55  private boolean group = true;
56  private boolean hasChunks = false;
57  //stores all pages/chunks that have hits as key, and number of hits as a value, or 0 if yet unknown
58  private LinkedHashMap<Integer, Integer> hitsPages;
59  //stored page num -> current hit number mapping
60  private HashMap<Integer, Integer> pagesToHits;
61  private List<Integer> pages;
62  private QueryResults hits = null; //original hits that may get passed in
63  private String originalQuery = null; //or original query if hits are not available
64  private boolean isPageInfoLoaded = false;
65  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
66 
67  HighlightedTextMarkup(long objectId, String keywordHitQuery, boolean isRegex) {
68  this.objectId = objectId;
69  this.keywordHitQuery = keywordHitQuery;
70  this.isRegex = isRegex;
71  this.group = true;
72  this.hitsPages = new LinkedHashMap<>();
73  this.pages = new ArrayList<>();
74  this.pagesToHits = new HashMap<>();
75 
76  this.solrServer = KeywordSearch.getServer();
77  this.numberPages = 0;
78  this.currentPage = 0;
79  //hits are unknown
80 
81  }
82 
83  //when the results are not known and need to requery to get hits
84  HighlightedTextMarkup(long objectId, String solrQuery, boolean isRegex, String originalQuery) {
85  this(objectId, solrQuery, isRegex);
86  this.originalQuery = originalQuery;
87  }
88 
89  HighlightedTextMarkup(long objectId, String solrQuery, boolean isRegex, QueryResults hits) {
90  this(objectId, solrQuery, isRegex);
91  this.hits = hits;
92  }
93 
94  HighlightedTextMarkup(long objectId, String solrQuery, boolean isRegex, boolean group, QueryResults hits) {
95  this(objectId, solrQuery, isRegex, hits);
96  this.group = group;
97  }
98 
102  private void loadPageInfo() {
103  if (isPageInfoLoaded) {
104  return;
105  }
106  try {
107  this.numberPages = solrServer.queryNumFileChunks(this.objectId);
108  } catch (KeywordSearchModuleException ex) {
109  logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS
110  return;
111  } catch (NoOpenCoreException ex) {
112  logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS
113  return;
114  }
115 
116  if (this.numberPages == 0) {
117  hasChunks = false;
118  } else {
119  hasChunks = true;
120  }
121 
122  //if has chunks, get pages with hits
123  if (hasChunks) {
124  //extract pages of interest, sorted
125 
126  /* If this is being called from the artifacts / dir tree, then we
127  * need to perform the search to get the highlights.
128  */
129  if (hits == null) {
130  String queryStr = KeywordSearchUtil.escapeLuceneQuery(this.keywordHitQuery);
131  if (isRegex) {
132  //use white-space sep. field to get exact matches only of regex query result
133  queryStr = Server.Schema.CONTENT_WS + ":" + "\"" + queryStr + "\"";
134  }
135 
136  Keyword keywordQuery = new Keyword(queryStr, !isRegex);
137  List<Keyword> keywords = new ArrayList<>();
138  keywords.add(keywordQuery);
139  KeywordSearchQuery chunksQuery = new LuceneQuery(new KeywordList(keywords), keywordQuery);
140 
141  chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId));
142  try {
143  hits = chunksQuery.performQuery();
144  } catch (NoOpenCoreException ex) {
145  logger.log(Level.INFO, "Could not get chunk info and get highlights", ex); //NON-NLS
146  return;
147  }
148  }
149 
150  //organize the hits by page, filter as needed
151  TreeSet<Integer> pagesSorted = new TreeSet<>();
152  for (Keyword k : hits.getKeywords()) {
153  for (KeywordHit hit : hits.getResults(k)) {
154  int chunkID = hit.getChunkId();
155  if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
156  pagesSorted.add(chunkID);
157  }
158  }
159  }
160 
161  //set page to first page having highlights
162  if (pagesSorted.isEmpty()) {
163  this.currentPage = 0;
164  } else {
165  this.currentPage = pagesSorted.first();
166  }
167 
168  for (Integer page : pagesSorted) {
169  hitsPages.put(page, 0); //unknown number of matches in the page
170  pages.add(page);
171  pagesToHits.put(page, 0); //set current hit to 0th
172  }
173 
174  } else {
175  //no chunks
176  this.numberPages = 1;
177  this.currentPage = 1;
178  hitsPages.put(1, 0);
179  pages.add(1);
180  pagesToHits.put(1, 0);
181  }
182  isPageInfoLoaded = true;
183  }
184 
185  //constructor for dummy singleton factory instance for Lookup
186  private HighlightedTextMarkup() {
187  }
188 
189  long getObjectId() {
190  return this.objectId;
191  }
192 
193  @Override
194  public int getNumberPages() {
195  return this.numberPages;
196  //return number of pages that have hits
197  //return this.hitsPages.keySet().size();
198  }
199 
200  @Override
201  public int getCurrentPage() {
202  return this.currentPage;
203  }
204 
205  @Override
206  public boolean hasNextPage() {
207  final int numPages = pages.size();
208  int idx = pages.indexOf(this.currentPage);
209  return idx < numPages - 1;
210 
211  }
212 
213  @Override
214  public boolean hasPreviousPage() {
215  int idx = pages.indexOf(this.currentPage);
216  return idx > 0;
217 
218  }
219 
220  @Override
221  public int nextPage() {
222  if (!hasNextPage()) {
223  throw new IllegalStateException(
224  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextPage.exception.msg"));
225  }
226  int idx = pages.indexOf(this.currentPage);
227  currentPage = pages.get(idx + 1);
228  return currentPage;
229  }
230 
231  @Override
232  public int previousPage() {
233  if (!hasPreviousPage()) {
234  throw new IllegalStateException(
235  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousPage.exception.msg"));
236  }
237  int idx = pages.indexOf(this.currentPage);
238  currentPage = pages.get(idx - 1);
239  return currentPage;
240  }
241 
242  @Override
243  public boolean hasNextItem() {
244  if (!this.pagesToHits.containsKey(currentPage)) {
245  return false;
246  }
247  return this.pagesToHits.get(currentPage) < this.hitsPages.get(currentPage);
248  }
249 
250  @Override
251  public boolean hasPreviousItem() {
252  if (!this.pagesToHits.containsKey(currentPage)) {
253  return false;
254  }
255  return this.pagesToHits.get(currentPage) > 1;
256  }
257 
258  @Override
259  public int nextItem() {
260  if (!hasNextItem()) {
261  throw new IllegalStateException(
262  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextItem.exception.msg"));
263  }
264  int cur = pagesToHits.get(currentPage) + 1;
265  pagesToHits.put(currentPage, cur);
266  return cur;
267  }
268 
269  @Override
270  public int previousItem() {
271  if (!hasPreviousItem()) {
272  throw new IllegalStateException(
273  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousItem.exception.msg"));
274  }
275  int cur = pagesToHits.get(currentPage) - 1;
276  pagesToHits.put(currentPage, cur);
277  return cur;
278  }
279 
280  @Override
281  public int currentItem() {
282  if (!this.pagesToHits.containsKey(currentPage)) {
283  return 0;
284  }
285  return pagesToHits.get(currentPage);
286  }
287 
288  @Override
289  public LinkedHashMap<Integer, Integer> getHitsPages() {
290  return this.hitsPages;
291  }
292 
293  @Override
294  public String getMarkup() {
295  loadPageInfo(); //inits once
296 
297  String highLightField = null;
298 
299  String highlightQuery = keywordHitQuery;
300 
301  if (isRegex) {
302  highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
303  //escape special lucene chars if not already escaped (if not a compound query)
304  //TODO a better way to mark it a compound highlight query
305  final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
306  if (!highlightQuery.contains(findSubstr)) {
307  highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
308  }
309  } else {
310  highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
311  //escape special lucene chars always for literal queries query
312  highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
313  }
314 
315  SolrQuery q = new SolrQuery();
316  q.setShowDebugInfo(DEBUG); //debug
317 
318  String queryStr = null;
319 
320  if (isRegex) {
321  StringBuilder sb = new StringBuilder();
322  sb.append(highLightField).append(":");
323  if (group) {
324  sb.append("\"");
325  }
326  sb.append(highlightQuery);
327  if (group) {
328  sb.append("\"");
329  }
330  queryStr = sb.toString();
331  } else {
332  //use default field, simplifies query
333  //always force grouping/quotes
334  queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
335  }
336 
337  q.setQuery(queryStr);
338 
339  String contentIdStr = Long.toString(this.objectId);
340  if (hasChunks) {
341  contentIdStr += "_" + Integer.toString(this.currentPage);
342  }
343 
344 
345  final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
346  q.addFilterQuery(filterQuery);
347  q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)
348 
349  //q.setHighlightSimplePre(HIGHLIGHT_PRE); //original highlighter only
350  //q.setHighlightSimplePost(HIGHLIGHT_POST); //original highlighter only
351  q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
352 
353  //tune the highlighter
354  q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
355  q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
356  q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
357  q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
358 
359  //docs says makes sense for the original Highlighter only, but not really
360  q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
361 
362  try {
363  QueryResponse response = solrServer.query(q, METHOD.POST);
364  Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
365 
366  Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
367  if (responseHighlightID == null) {
368  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
369 
370  }
371  List<String> contentHighlights = responseHighlightID.get(highLightField);
372  if (contentHighlights == null) {
373  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
374  } else {
375  // extracted content (minus highlight tags) is HTML-escaped
376  String highlightedContent = contentHighlights.get(0).trim();
377  highlightedContent = insertAnchors(highlightedContent);
378 
379 
380  return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
381  }
382  } catch (NoOpenCoreException ex) {
383  logger.log(Level.WARNING, "Couldn't query markup for page: " + currentPage, ex); //NON-NLS
384  return "";
385  } catch (KeywordSearchModuleException ex) {
386  logger.log(Level.WARNING, "Could not query markup for page: " + currentPage, ex); //NON-NLS
387  return "";
388  }
389  }
390 
391  @Override
392  public String toString() {
393  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.toString");
394  }
395 
396  @Override
397  public boolean isSearchable() {
398  return true;
399  }
400 
401  @Override
402  public String getAnchorPrefix() {
403  return ANCHOR_PREFIX;
404  }
405 
406  @Override
407  public int getNumberHits() {
408  if (!this.hitsPages.containsKey(this.currentPage)) {
409  return 0;
410  }
411  return this.hitsPages.get(this.currentPage);
412  }
413 
414  private String insertAnchors(String searchableContent) {
415  int searchOffset = 0;
416  int index = -1;
417 
418  StringBuilder buf = new StringBuilder(searchableContent);
419 
420  final String searchToken = HIGHLIGHT_PRE;
421  final int indexSearchTokLen = searchToken.length();
422  final String insertPre = "<a name='" + ANCHOR_PREFIX; //NON-NLS
423  final String insertPost = "'></a>"; //NON-NLS
424  int count = 0;
425  while ((index = buf.indexOf(searchToken, searchOffset)) >= 0) {
426  String insertString = insertPre + Integer.toString(count + 1) + insertPost;
427  int insertStringLen = insertString.length();
428  buf.insert(index, insertString);
429  searchOffset = index + indexSearchTokLen + insertStringLen; //next offset past this anchor
430  ++count;
431  }
432 
433  //store total hits for this page, now that we know it
434  this.hitsPages.put(this.currentPage, count);
435  if (this.currentItem() == 0 && this.hasNextItem()) {
436  this.nextItem();
437  }
438 
439  return buf.toString();
440  }
441  //dummy instance for Lookup only
442  private static TextMarkupLookup instance = null;
443 
444  //getter of the singleton dummy instance solely for Lookup purpose
445  //this instance does not actually work with Solr
446  public static synchronized TextMarkupLookup getDefault() {
447  if (instance == null) {
448  instance = new HighlightedTextMarkup();
449  }
450  return instance;
451  }
452 
453  @Override
454  // factory method to create an instance of this object
455  public TextMarkupLookup createInstance(long objectId, String keywordHitQuery, boolean isRegex, String originalQuery) {
456  return new HighlightedTextMarkup(objectId, keywordHitQuery, isRegex, originalQuery);
457  }
458 }

Copyright © 2012-2015 Basis Technology. Generated on: Mon Oct 19 2015
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.