19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import com.google.common.collect.Iterators;
 
   22 import com.google.common.collect.Range;
 
   23 import com.google.common.collect.TreeRangeSet;
 
   24 import java.util.Arrays;
 
   25 import java.util.Collection;
 
   26 import java.util.HashMap;
 
   27 import java.util.HashSet;
 
   28 import java.util.List;
 
   31 import java.util.TreeMap;
 
   32 import java.util.logging.Level;
 
   33 import java.util.stream.Collectors;
 
   34 import javax.annotation.concurrent.GuardedBy;
 
   35 import org.apache.commons.lang.StringEscapeUtils;
 
   36 import org.apache.commons.lang.StringUtils;
 
   37 import org.apache.commons.lang3.math.NumberUtils;
 
   38 import org.apache.solr.client.solrj.SolrQuery;
 
   39 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 
   40 import org.apache.solr.client.solrj.response.QueryResponse;
 
   41 import org.apache.solr.common.SolrDocumentList;
 
   42 import org.openide.util.NbBundle;
 
   54 class HighlightedText 
implements IndexedText {
 
   56     private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
 
   58     private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
 
   60     private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_TYPE = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE);
 
   61     private static final BlackboardAttribute.Type TSK_KEYWORD = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
 
   62     static private final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT);
 
   63     static private final BlackboardAttribute.Type TSK_KEYWORD_REGEXP = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP);
 
   65     private static final String HIGHLIGHT_PRE = 
"<span style='background:yellow'>"; 
 
   66     private static final String HIGHLIGHT_POST = 
"</span>"; 
 
   67     private static final String ANCHOR_PREFIX = HighlightedText.class.getName() + 
"_"; 
 
   69     final private Server solrServer = KeywordSearch.getServer();
 
   71     private final long solrObjectId;
 
   75     private final Set<String> keywords = 
new HashSet<>();
 
   77     private int numberPages;
 
   78     private Integer currentPage = 0;
 
   81     private 
boolean isPageInfoLoaded = false;
 
   86     private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
 
   91     private final Set<Integer> pages = numberOfHitsPerPage.keySet();
 
   95     private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
 
   97     private QueryResults hits = null; 
 
   98     private BlackboardArtifact artifact;
 
   99     private KeywordSearch.QueryType qt;
 
  100     private 
boolean isLiteral;
 
  113     HighlightedText(
long solrObjectId, QueryResults hits) {
 
  114         this.solrObjectId = solrObjectId;
 
  126     HighlightedText(BlackboardArtifact artifact) 
throws TskCoreException {
 
  127         this.artifact = artifact;
 
  128         BlackboardAttribute attribute = artifact.getAttribute(TSK_ASSOCIATED_ARTIFACT);
 
  129         if (attribute != null) {
 
  130             this.solrObjectId = attribute.getValueLong();
 
  132             this.solrObjectId = artifact.getObjectID();
 
  141     synchronized private void loadPageInfo() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
 
  142         if (isPageInfoLoaded) {
 
  146         this.numberPages = solrServer.queryNumFileChunks(this.solrObjectId);
 
  148         if (artifact != null) {
 
  149             loadPageInfoFromArtifact();
 
  150         } 
else if (numberPages != 0) {
 
  152             loadPageInfoFromHits();
 
  155             this.numberPages = 1;
 
  156             this.currentPage = 1;
 
  157             numberOfHitsPerPage.put(1, 0);
 
  158             currentHitPerPage.put(1, 0);
 
  159             isPageInfoLoaded = 
true;
 
  169     synchronized private void loadPageInfoFromArtifact() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
 
  170         final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
 
  171         this.keywords.add(keyword);
 
  174         final BlackboardAttribute queryTypeAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE);
 
  175         qt = (queryTypeAttribute != null)
 
  176                 ? KeywordSearch.QueryType.values()[queryTypeAttribute.getValueInt()] : null;
 
  178         Keyword keywordQuery = null;
 
  182                 keywordQuery = 
new Keyword(keyword, 
true, 
true);
 
  185                 String regexp = artifact.getAttribute(TSK_KEYWORD_REGEXP).getValueString();
 
  186                 keywordQuery = 
new Keyword(regexp, 
false, 
false);
 
  189         KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(keywordQuery, 
new KeywordList(Arrays.asList(keywordQuery)));
 
  193         chunksQuery.addFilter(
new KeywordQueryFilter(FilterType.CHUNK, 
this.solrObjectId));
 
  195         hits = chunksQuery.performQuery();
 
  196         loadPageInfoFromHits();
 
  202     synchronized private void loadPageInfoFromHits() {
 
  203         isLiteral = hits.getQuery().isLiteral();
 
  211         for (Keyword k : hits.getKeywords()) {
 
  212             for (KeywordHit hit : hits.getResults(k)) {
 
  213                 int chunkID = hit.getChunkId();
 
  214                 if (artifact != null) {
 
  215                     if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
 
  216                         String hit1 = hit.getHit();
 
  217                         if (keywords.stream().anyMatch(hit1::contains)) {
 
  218                             numberOfHitsPerPage.put(chunkID, 0); 
 
  219                             currentHitPerPage.put(chunkID, 0); 
 
  224                     if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
 
  226                         numberOfHitsPerPage.put(chunkID, 0); 
 
  227                         currentHitPerPage.put(chunkID, 0); 
 
  229                         if (StringUtils.isNotBlank(hit.getHit())) {
 
  230                             this.keywords.add(hit.getHit());
 
  238         this.currentPage = pages.stream().findFirst().orElse(1);
 
  240         isPageInfoLoaded = 
true;
 
  251     static private String constructEscapedSolrQuery(String query) {
 
  252         return LuceneQuery.HIGHLIGHT_FIELD + 
":" + 
"\"" + KeywordSearchUtil.escapeLuceneQuery(query) + 
"\"";
 
  255     private int getIndexOfCurrentPage() {
 
  256         return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
 
  260     public int getNumberPages() {
 
  262         return this.numberPages;
 
  266     public int getCurrentPage() {
 
  267         return this.currentPage;
 
  271     public boolean hasNextPage() {
 
  272         return getIndexOfCurrentPage() < pages.size() - 1;
 
  276     public boolean hasPreviousPage() {
 
  277         return getIndexOfCurrentPage() > 0;
 
  281     public int nextPage() {
 
  283             currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
 
  286             throw new IllegalStateException(
"No next page.");
 
  291     public int previousPage() {
 
  292         if (hasPreviousPage()) {
 
  293             currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
 
  296             throw new IllegalStateException(
"No previous page.");
 
  301     public boolean hasNextItem() {
 
  302         if (!this.currentHitPerPage.containsKey(currentPage)) {
 
  305         return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
 
  309     public boolean hasPreviousItem() {
 
  310         if (!this.currentHitPerPage.containsKey(currentPage)) {
 
  313         return this.currentHitPerPage.get(currentPage) > 1;
 
  317     public int nextItem() {
 
  318         if (!hasNextItem()) {
 
  319             throw new IllegalStateException(
"No next item.");
 
  321         int cur = currentHitPerPage.get(currentPage) + 1;
 
  322         currentHitPerPage.put(currentPage, cur);
 
  327     public int previousItem() {
 
  328         if (!hasPreviousItem()) {
 
  329             throw new IllegalStateException(
"No previous item.");
 
  331         int cur = currentHitPerPage.get(currentPage) - 1;
 
  332         currentHitPerPage.put(currentPage, cur);
 
  337     public int currentItem() {
 
  338         if (!this.currentHitPerPage.containsKey(currentPage)) {
 
  341         return currentHitPerPage.get(currentPage);
 
  345     public String getText() {
 
  347         String highlightField = 
"";
 
  350             SolrQuery q = 
new SolrQuery();
 
  351             q.setShowDebugInfo(DEBUG); 
 
  353             String contentIdStr = Long.toString(this.solrObjectId);
 
  354             if (numberPages != 0) {
 
  355                 chunkID = Integer.toString(this.currentPage);
 
  356                 contentIdStr += 
"0".equals(chunkID) ? 
"" : 
"_" + chunkID;
 
  358             final String filterQuery = Server.Schema.ID.toString() + 
":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
 
  360             highlightField = LuceneQuery.HIGHLIGHT_FIELD;
 
  363                 final String highlightQuery = keywords.stream()
 
  364                         .map(HighlightedText::constructEscapedSolrQuery)
 
  365                         .collect(Collectors.joining(
" "));
 
  367                 q.setQuery(highlightQuery);
 
  368                 q.addField(highlightField);
 
  369                 q.addFilterQuery(filterQuery);
 
  370                 q.addHighlightField(highlightField);
 
  371                 q.setHighlightFragsize(0); 
 
  374                 q.setParam(
"hl.useFastVectorHighlighter", 
"on"); 
 
  375                 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE); 
 
  376                 q.setParam(
"hl.tag.post", HIGHLIGHT_POST); 
 
  377                 q.setParam(
"hl.fragListBuilder", 
"single"); 
 
  380                 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); 
 
  386                 q.setQuery(filterQuery);
 
  387                 q.addField(highlightField);
 
  390             QueryResponse response = solrServer.query(q, METHOD.POST);
 
  395             if (response.getResults().size() > 1) {
 
  396                 logger.log(Level.WARNING, 
"Unexpected number of results for Solr highlighting query: {0}", q); 
 
  398             String highlightedContent;
 
  399             Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
 
  401             if (responseHighlight == null) {
 
  402                 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
 
  404                 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
 
  406                 if (responseHighlightID == null) {
 
  407                     highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
 
  409                     List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
 
  410                     if (contentHighlights == null) {
 
  411                         highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
 
  414                         highlightedContent = contentHighlights.get(0).trim();
 
  418             highlightedContent = insertAnchors(highlightedContent);
 
  420             return "<html><pre>" + highlightedContent + 
"</pre></html>"; 
 
  421         } 
catch (TskCoreException | KeywordSearchModuleException | NoOpenCoreException ex) {
 
  422             logger.log(Level.SEVERE, 
"Error getting highlighted text for Solr doc id " + solrObjectId + 
", chunkID " + chunkID + 
", highlight query: " + highlightField, ex); 
 
  423             return Bundle.IndexedText_errorMessage_errorGettingText();
 
  428     public String toString() {
 
  429         return NbBundle.getMessage(this.getClass(), 
"HighlightedMatchesSource.toString");
 
  433     public boolean isSearchable() {
 
  438     public String getAnchorPrefix() {
 
  439         return ANCHOR_PREFIX;
 
  443     public int getNumberHits() {
 
  444         if (!this.numberOfHitsPerPage.containsKey(
this.currentPage)) {
 
  447         return this.numberOfHitsPerPage.get(this.currentPage);
 
  465     static String attemptManualHighlighting(SolrDocumentList solrDocumentList, String highlightField, Collection<String> keywords) {
 
  466         if (solrDocumentList.isEmpty()) {
 
  467             return Bundle.IndexedText_errorMessage_errorGettingText();
 
  473         String text = solrDocumentList.get(0).getOrDefault(highlightField, 
"").toString();
 
  480         text = StringEscapeUtils.escapeHtml(text);
 
  482         TreeRangeSet<Integer> highlights = TreeRangeSet.create();
 
  485         for (String keyword : keywords) {
 
  487             final String escapedKeyword = StringEscapeUtils.escapeHtml(keyword);
 
  488             int searchOffset = 0;
 
  489             int hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
 
  490             while (hitOffset != -1) {
 
  492                 searchOffset = hitOffset + escapedKeyword.length();
 
  495                 highlights.add(Range.closedOpen(hitOffset, searchOffset));
 
  498                 hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
 
  502         StringBuilder highlightedText = 
new StringBuilder(text);
 
  503         int totalHighLightLengthInserted = 0;
 
  505         for (Range<Integer> highlightRange : highlights.asRanges()) {
 
  506             int hStart = highlightRange.lowerEndpoint();
 
  507             int hEnd = highlightRange.upperEndpoint();
 
  510             highlightedText.insert(hStart + totalHighLightLengthInserted, HIGHLIGHT_PRE);
 
  511             totalHighLightLengthInserted += HIGHLIGHT_PRE.length();
 
  512             highlightedText.insert(hEnd + totalHighLightLengthInserted, HIGHLIGHT_POST);
 
  513             totalHighLightLengthInserted += HIGHLIGHT_POST.length();
 
  516         return highlightedText.toString();
 
  527     private String insertAnchors(String searchableContent) {
 
  528         StringBuilder buf = 
new StringBuilder(searchableContent);
 
  529         final String searchToken = HIGHLIGHT_PRE;
 
  530         final int indexSearchTokLen = searchToken.length();
 
  531         final String insertPre = 
"<a name='" + ANCHOR_PREFIX; 
 
  532         final String insertPost = 
"'></a>"; 
 
  534         int searchOffset = 0;
 
  535         int index = buf.indexOf(searchToken, searchOffset);
 
  537             String insertString = insertPre + Integer.toString(count + 1) + insertPost;
 
  538             int insertStringLen = insertString.length();
 
  539             buf.insert(index, insertString);
 
  540             searchOffset = index + indexSearchTokLen + insertStringLen; 
 
  542             index = buf.indexOf(searchToken, searchOffset);
 
  546         this.numberOfHitsPerPage.put(this.currentPage, count);
 
  547         if (this.currentItem() == 0 && this.hasNextItem()) {
 
  551         return buf.toString();