19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.LinkedHashMap;
25 import java.util.List;
28 import java.util.TreeSet;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import java.util.stream.Collectors;
33 import org.apache.commons.lang.StringUtils;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.apache.solr.common.SolrDocument;
38 import org.openide.util.NbBundle;
51 class AccountsText
implements IndexedText {
53 private static final Logger LOGGER = Logger.getLogger(AccountsText.class.getName());
54 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
56 private static final String HIGHLIGHT_PRE =
"<span style='background:yellow'>";
57 private static final String HIGHLIGHT_POST =
"</span>";
58 private static final String ANCHOR_NAME_PREFIX = AccountsText.class.
getName() +
"_";
60 private static final String INSERT_PREFIX =
"<a name='" + ANCHOR_NAME_PREFIX;
61 private static final String INSERT_POSTFIX =
"'></a>$0";
62 private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
64 private static final String HIGHLIGHT_FIELD = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
66 private final Server solrServer;
67 private final String solrDocumentId;
68 private final long solrObjectId;
69 private final Integer chunkId;
70 private final Set<String> keywords =
new HashSet<>();
71 private final String displayName;
72 private final String queryString;
74 private boolean isPageInfoLoaded =
false;
75 private int numberPagesForFile = 0;
76 private int currentPage = 0;
78 private final List<Integer> pages =
new ArrayList<>();
80 private final LinkedHashMap<Integer, Integer> numberOfHitsPerPage =
new LinkedHashMap<>();
82 private final HashMap<Integer, Integer> currentHitPerPage =
new HashMap<>();
85 "AccountsText.creditCardNumber=Credit Card Number",
86 "AccountsText.creditCardNumbers=Credit Card Numbers"})
87 AccountsText(String objectId, Set<String> keywords) {
88 this.solrDocumentId = objectId;
89 this.keywords.addAll(keywords);
92 this.queryString = HIGHLIGHT_FIELD +
":"
94 .map(keyword ->
"/.*?" + KeywordSearchUtil.escapeLuceneQuery(keyword) +
".*?/")
95 .collect(Collectors.joining(
" "));
97 this.solrServer = KeywordSearch.getServer();
99 final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR);
100 if (-1 == separatorIndex) {
102 this.solrObjectId = Long.parseLong(solrDocumentId);
106 this.solrObjectId = Long.parseLong(solrDocumentId.substring(0, separatorIndex));
107 this.chunkId = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1));
110 displayName = keywords.size() == 1
111 ? Bundle.AccountsText_creditCardNumber()
112 : Bundle.AccountsText_creditCardNumbers();
116 return this.solrObjectId;
120 public int getNumberPages() {
121 return this.numberPagesForFile;
125 public int getCurrentPage() {
126 return this.currentPage;
130 public boolean hasNextPage() {
131 return pages.indexOf(this.currentPage) < pages.size() - 1;
136 public boolean hasPreviousPage() {
137 return pages.indexOf(this.currentPage) > 0;
142 @NbBundle.Messages(
"AccountsText.nextPage.exception.msg=No next page.")
143 public int nextPage() {
145 currentPage = pages.get(pages.indexOf(
this.currentPage) + 1);
148 throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
153 @NbBundle.Messages(
"AccountsText.previousPage.exception.msg=No previous page.")
154 public int previousPage() {
155 if (hasPreviousPage()) {
156 currentPage = pages.get(pages.indexOf(
this.currentPage) - 1);
159 throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
164 public boolean hasNextItem() {
165 if (this.currentHitPerPage.containsKey(currentPage)) {
166 return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
173 public boolean hasPreviousItem() {
174 if (this.currentHitPerPage.containsKey(currentPage)) {
175 return this.currentHitPerPage.get(currentPage) > 1;
182 @NbBundle.Messages(
"AccountsText.nextItem.exception.msg=No next item.")
183 public int nextItem() {
185 return currentHitPerPage.merge(currentPage, 1, Integer::sum);
187 throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
192 @NbBundle.Messages(
"AccountsText.previousItem.exception.msg=No previous item.")
193 public int previousItem() {
194 if (hasPreviousItem()) {
195 return currentHitPerPage.merge(currentPage, -1, Integer::sum);
197 throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
202 public int currentItem() {
203 if (this.currentHitPerPage.containsKey(currentPage)) {
204 return currentHitPerPage.get(currentPage);
211 public LinkedHashMap<Integer, Integer> getHitsPages() {
212 return this.numberOfHitsPerPage;
219 synchronized private void loadPageInfo() {
220 if (isPageInfoLoaded) {
223 if (chunkId != null) {
224 this.numberPagesForFile = 1;
225 this.currentPage = chunkId;
226 this.numberOfHitsPerPage.put(chunkId, 0);
227 this.pages.add(chunkId);
228 this.currentHitPerPage.put(chunkId, 0);
231 this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
232 }
catch (KeywordSearchModuleException | NoOpenCoreException ex) {
233 LOGGER.log(Level.WARNING,
"Could not get number pages for content " +
this.solrDocumentId, ex);
238 TreeSet<Integer> sortedPagesWithHits =
new TreeSet<>();
239 SolrQuery q =
new SolrQuery();
240 q.setShowDebugInfo(DEBUG);
241 q.setQuery(queryString);
242 q.setFields(Server.Schema.ID.toString());
243 q.addFilterQuery(Server.Schema.ID.toString() +
":" + this.solrObjectId + Server.CHUNK_ID_SEPARATOR +
"*");
246 QueryResponse response = solrServer.query(q, METHOD.POST);
247 for (SolrDocument resultDoc : response.getResults()) {
248 final String resultDocumentId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
250 String resultChunkID = StringUtils.substringAfter(resultDocumentId, Server.CHUNK_ID_SEPARATOR);
251 if (StringUtils.isNotBlank(resultChunkID)) {
252 sortedPagesWithHits.add(Integer.parseInt(resultChunkID));
254 sortedPagesWithHits.add(0);
258 }
catch (KeywordSearchModuleException | NoOpenCoreException | NumberFormatException ex) {
259 LOGGER.log(Level.WARNING,
"Error executing Solr highlighting query: " + keywords, ex);
263 if (sortedPagesWithHits.isEmpty()) {
264 this.currentPage = 0;
266 this.currentPage = sortedPagesWithHits.first();
269 for (Integer page : sortedPagesWithHits) {
270 numberOfHitsPerPage.put(page, 0);
272 currentHitPerPage.put(page, 0);
276 isPageInfoLoaded =
true;
280 @NbBundle.Messages({
"AccountsText.getMarkup.noMatchMsg="
281 +
"<html><pre><span style\\\\='background\\\\:yellow'>There were no keyword hits on this page. <br />"
282 +
"The keyword could have been in the file name."
283 +
" <br />Advance to another page if present, or to view the original text, choose File Text"
284 +
" <br />in the drop down menu to the right...</span></pre></html>",
285 "AccountsText.getMarkup.queryFailedMsg="
286 +
"<html><pre><span style\\\\='background\\\\:yellow'>Failed to retrieve keyword hit results."
287 +
" <br />Confirm that Autopsy can connect to the Solr server. "
288 +
"<br /></span></pre></html>"})
289 public String getText() {
292 SolrQuery q =
new SolrQuery();
293 q.setShowDebugInfo(DEBUG);
294 q.addHighlightField(HIGHLIGHT_FIELD);
295 q.setQuery(queryString);
298 String queryDocumentID = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
299 q.addFilterQuery(Server.Schema.ID.toString() +
":" + queryDocumentID);
302 q.setParam(
"hl.useFastVectorHighlighter",
"true");
303 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE);
304 q.setParam(
"hl.tag.post", HIGHLIGHT_POST);
305 q.setParam(
"hl.fragListBuilder",
"single");
306 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
310 Map<String, Map<String, List<String>>> highlightingPerDocument = solrServer.query(q, METHOD.POST).getHighlighting();
311 Map<String, List<String>> highlightingPerField = highlightingPerDocument.get(queryDocumentID);
312 if (highlightingPerField == null) {
313 return Bundle.AccountsText_getMarkup_noMatchMsg();
315 List<String> highlights = highlightingPerField.get(HIGHLIGHT_FIELD);
316 if (highlights == null) {
317 return Bundle.AccountsText_getMarkup_noMatchMsg();
321 String highlighting = highlights.get(0).trim();
327 Matcher m = ANCHOR_DETECTION_PATTERN.matcher(highlighting);
328 StringBuffer sb =
new StringBuffer(highlighting.length());
332 m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
337 this.numberOfHitsPerPage.put(this.currentPage, count);
338 if (this.currentItem() == 0 && this.hasNextItem()) {
343 return "<html><pre>" + sb.toString() +
"</pre></html>";
344 }
catch (Exception ex) {
345 LOGGER.log(Level.WARNING,
"Error executing Solr highlighting query: " + keywords, ex);
346 return Bundle.AccountsText_getMarkup_queryFailedMsg();
351 public String toString() {
356 public boolean isSearchable() {
361 public String getAnchorPrefix() {
362 return ANCHOR_NAME_PREFIX;
366 public int getNumberHits() {
367 if (!this.numberOfHitsPerPage.containsKey(
this.currentPage)) {
370 return this.numberOfHitsPerPage.get(this.currentPage);