19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.util.ArrayList;
 
   22 import java.util.HashMap;
 
   23 import java.util.HashSet;
 
   24 import java.util.LinkedHashMap;
 
   25 import java.util.List;
 
   28 import java.util.TreeSet;
 
   29 import java.util.logging.Level;
 
   30 import java.util.regex.Matcher;
 
   31 import java.util.regex.Pattern;
 
   32 import java.util.stream.Collectors;
 
   33 import org.apache.commons.lang.StringUtils;
 
   34 import org.apache.solr.client.solrj.SolrQuery;
 
   35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 
   36 import org.apache.solr.client.solrj.response.QueryResponse;
 
   37 import org.apache.solr.common.SolrDocument;
 
   38 import org.openide.util.NbBundle;
 
   51 class AccountsText 
implements IndexedText {
 
   53     private static final Logger LOGGER = Logger.getLogger(AccountsText.class.getName());
 
   54     private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
 
   56     private static final String HIGHLIGHT_PRE = 
"<span style='background:yellow'>"; 
 
   57     private static final String HIGHLIGHT_POST = 
"</span>"; 
 
   58     private static final String ANCHOR_NAME_PREFIX = AccountsText.class.
getName() + 
"_";
 
   60     private static final String INSERT_PREFIX = 
"<a name='" + ANCHOR_NAME_PREFIX; 
 
   61     private static final String INSERT_POSTFIX = 
"'></a>$0"; 
 
   62     private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
 
   64     private static final String HIGHLIGHT_FIELD = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
 
   66     private final Server solrServer;
 
   67     private final String solrDocumentId;
 
   68     private final long solrObjectId;
 
   69     private final Integer chunkId;
 
   70     private final Set<String> keywords = 
new HashSet<>();
 
   71     private final String displayName;
 
   72     private final String queryString;
 
   74     private boolean isPageInfoLoaded = 
false;
 
   75     private int numberPagesForFile = 0;
 
   76     private int currentPage = 0;
 
   78     private final List<Integer> pages = 
new ArrayList<>();
 
   80     private final LinkedHashMap<Integer, Integer> numberOfHitsPerPage = 
new LinkedHashMap<>();
 
   82     private final HashMap<Integer, Integer> currentHitPerPage = 
new HashMap<>();
 
   85         "AccountsText.creditCardNumber=Credit Card Number",
 
   86         "AccountsText.creditCardNumbers=Credit Card Numbers"})
 
   87     AccountsText(String objectId, Set<String> keywords) {
 
   88         this.solrDocumentId = objectId;
 
   89         this.keywords.addAll(keywords);
 
   92         this.queryString = HIGHLIGHT_FIELD + 
":" 
   94                 .map(keyword -> 
"/.*?" + KeywordSearchUtil.escapeLuceneQuery(keyword) + 
".*?/")
 
   95                 .collect(Collectors.joining(
" ")); 
 
   97         this.solrServer = KeywordSearch.getServer();
 
   99         final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR);
 
  100         if (-1 == separatorIndex) {
 
  102             this.solrObjectId = Long.parseLong(solrDocumentId);
 
  106             this.solrObjectId = Long.parseLong(solrDocumentId.substring(0, separatorIndex));
 
  107             this.chunkId = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1));
 
  110         displayName = keywords.size() == 1
 
  111                 ? Bundle.AccountsText_creditCardNumber()
 
  112                 : Bundle.AccountsText_creditCardNumbers();
 
  116         return this.solrObjectId;
 
  120     public int getNumberPages() {
 
  121         return this.numberPagesForFile;
 
  125     public int getCurrentPage() {
 
  126         return this.currentPage;
 
  130     public boolean hasNextPage() {
 
  131         return pages.indexOf(this.currentPage) < pages.size() - 1;
 
  136     public boolean hasPreviousPage() {
 
  137         return pages.indexOf(this.currentPage) > 0;
 
  142     @NbBundle.Messages(
"AccountsText.nextPage.exception.msg=No next page.")
 
  143     public int nextPage() {
 
  145             currentPage = pages.get(pages.indexOf(
this.currentPage) + 1);
 
  148             throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
 
  153     @NbBundle.Messages(
"AccountsText.previousPage.exception.msg=No previous page.")
 
  154     public int previousPage() {
 
  155         if (hasPreviousPage()) {
 
  156             currentPage = pages.get(pages.indexOf(
this.currentPage) - 1);
 
  159             throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
 
  164     public boolean hasNextItem() {
 
  165         if (this.currentHitPerPage.containsKey(currentPage)) {
 
  166             return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
 
  173     public boolean hasPreviousItem() {
 
  174         if (this.currentHitPerPage.containsKey(currentPage)) {
 
  175             return this.currentHitPerPage.get(currentPage) > 1;
 
  182     @NbBundle.Messages(
"AccountsText.nextItem.exception.msg=No next item.")
 
  183     public int nextItem() {
 
  185             return currentHitPerPage.merge(currentPage, 1, Integer::sum);
 
  187             throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
 
  192     @NbBundle.Messages(
"AccountsText.previousItem.exception.msg=No previous item.")
 
  193     public int previousItem() {
 
  194         if (hasPreviousItem()) {
 
  195             return currentHitPerPage.merge(currentPage, -1, Integer::sum);
 
  197             throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
 
  202     public int currentItem() {
 
  203         if (this.currentHitPerPage.containsKey(currentPage)) {
 
  204             return currentHitPerPage.get(currentPage);
 
  211     public LinkedHashMap<Integer, Integer> getHitsPages() {
 
  212         return this.numberOfHitsPerPage;
 
  219     synchronized private void loadPageInfo() {
 
  220         if (isPageInfoLoaded) {
 
  223         if (chunkId != null) {
 
  224             this.numberPagesForFile = 1;
 
  225             this.currentPage = chunkId;
 
  226             this.numberOfHitsPerPage.put(chunkId, 0);
 
  227             this.pages.add(chunkId);
 
  228             this.currentHitPerPage.put(chunkId, 0);
 
  231                 this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
 
  232             } 
catch (KeywordSearchModuleException | NoOpenCoreException ex) {
 
  233                 LOGGER.log(Level.WARNING, 
"Could not get number pages for content " + 
this.solrDocumentId, ex); 
 
  238             TreeSet<Integer> sortedPagesWithHits = 
new TreeSet<>();
 
  239             SolrQuery q = 
new SolrQuery();
 
  240             q.setShowDebugInfo(DEBUG); 
 
  241             q.setQuery(queryString);
 
  242             q.setFields(Server.Schema.ID.toString());  
 
  243             q.addFilterQuery(Server.Schema.ID.toString() + 
":" + this.solrObjectId + Server.CHUNK_ID_SEPARATOR + 
"*");
 
  246                 QueryResponse response = solrServer.query(q, METHOD.POST);
 
  247                 for (SolrDocument resultDoc : response.getResults()) {
 
  248                     final String resultDocumentId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
 
  250                     String resultChunkID = StringUtils.substringAfter(resultDocumentId, Server.CHUNK_ID_SEPARATOR);
 
  251                     if (StringUtils.isNotBlank(resultChunkID)) {
 
  252                         sortedPagesWithHits.add(Integer.parseInt(resultChunkID));
 
  254                         sortedPagesWithHits.add(0);
 
  258             } 
catch (KeywordSearchModuleException | NoOpenCoreException | NumberFormatException ex) {
 
  259                 LOGGER.log(Level.WARNING, 
"Error executing Solr highlighting query: " + keywords, ex); 
 
  263             if (sortedPagesWithHits.isEmpty()) {
 
  264                 this.currentPage = 0;
 
  266                 this.currentPage = sortedPagesWithHits.first();
 
  269             for (Integer page : sortedPagesWithHits) {
 
  270                 numberOfHitsPerPage.put(page, 0); 
 
  272                 currentHitPerPage.put(page, 0); 
 
  276         isPageInfoLoaded = 
true;
 
  280     @NbBundle.Messages({
"AccountsText.getMarkup.noMatchMsg=" 
  281         + 
"<html><pre><span style\\\\='background\\\\:yellow'>There were no keyword hits on this page. <br />" 
  282         + 
"The keyword could have been in the file name." 
  283         + 
" <br />Advance to another page if present, or to view the original text, choose File Text" 
  284         + 
" <br />in the drop down menu to the right...</span></pre></html>",
 
  285         "AccountsText.getMarkup.queryFailedMsg=" 
  286         + 
"<html><pre><span style\\\\='background\\\\:yellow'>Failed to retrieve keyword hit results." 
  287         + 
" <br />Confirm that Autopsy can connect to the Solr server. " 
  288         + 
"<br /></span></pre></html>"})
 
  289     public String getText() {
 
  292         SolrQuery q = 
new SolrQuery();
 
  293         q.setShowDebugInfo(DEBUG); 
 
  294         q.addHighlightField(HIGHLIGHT_FIELD);
 
  295         q.setQuery(queryString);
 
  298         String queryDocumentID = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
 
  299         q.addFilterQuery(Server.Schema.ID.toString() + 
":" + queryDocumentID);
 
  302         q.setParam(
"hl.useFastVectorHighlighter", 
"true"); 
 
  303         q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE); 
 
  304         q.setParam(
"hl.tag.post", HIGHLIGHT_POST); 
 
  305         q.setParam(
"hl.fragListBuilder", 
"single"); 
 
  306         q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); 
 
  310             Map<String, Map<String, List<String>>> highlightingPerDocument = solrServer.query(q, METHOD.POST).getHighlighting();
 
  311             Map<String, List<String>> highlightingPerField = highlightingPerDocument.get(queryDocumentID);
 
  312             if (highlightingPerField == null) {
 
  313                 return Bundle.AccountsText_getMarkup_noMatchMsg();
 
  315             List<String> highlights = highlightingPerField.get(HIGHLIGHT_FIELD);
 
  316             if (highlights == null) {
 
  317                 return Bundle.AccountsText_getMarkup_noMatchMsg();
 
  321             String highlighting = highlights.get(0).trim();
 
  327             Matcher m = ANCHOR_DETECTION_PATTERN.matcher(highlighting);
 
  328             StringBuffer sb = 
new StringBuffer(highlighting.length());
 
  332                 m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
 
  337             this.numberOfHitsPerPage.put(this.currentPage, count);
 
  338             if (this.currentItem() == 0 && this.hasNextItem()) {
 
  343             return "<html><pre>" + sb.toString() + 
"</pre></html>"; 
 
  344         } 
catch (Exception ex) {
 
  345             LOGGER.log(Level.WARNING, 
"Error executing Solr highlighting query: " + keywords, ex); 
 
  346             return Bundle.AccountsText_getMarkup_queryFailedMsg();
 
  351     public String toString() {
 
  356     public boolean isSearchable() {
 
  361     public String getAnchorPrefix() {
 
  362         return ANCHOR_NAME_PREFIX;
 
  366     public int getNumberHits() {
 
  367         if (!this.numberOfHitsPerPage.containsKey(
this.currentPage)) {
 
  370         return this.numberOfHitsPerPage.get(this.currentPage);