19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import com.google.common.collect.Iterators;
 
   22 import com.google.common.collect.Range;
 
   23 import com.google.common.collect.RangeSet;
 
   24 import com.google.common.collect.TreeRangeSet;
 
   25 import java.util.Arrays;
 
   26 import java.util.Collection;
 
   27 import java.util.HashMap;
 
   28 import java.util.HashSet;
 
   29 import java.util.List;
 
   32 import java.util.TreeMap;
 
   33 import java.util.logging.Level;
 
   34 import java.util.stream.Collectors;
 
   35 import javax.annotation.concurrent.GuardedBy;
 
   36 import org.apache.commons.lang.StringEscapeUtils;
 
   37 import org.apache.commons.lang.StringUtils;
 
   38 import org.apache.commons.lang3.math.NumberUtils;
 
   39 import org.apache.solr.client.solrj.SolrQuery;
 
   40 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 
   41 import org.apache.solr.client.solrj.response.QueryResponse;
 
   42 import org.apache.solr.common.SolrDocumentList;
 
   43 import org.openide.util.NbBundle;
 
   44 import org.openide.util.NbBundle.Messages;
 
   56 class HighlightedText 
implements IndexedText {
 
   58     private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
 
   60     private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
 
   62     private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_TYPE = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE);
 
   63     private static final BlackboardAttribute.Type TSK_KEYWORD = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
 
   64     static private final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT);
 
   65     static private final BlackboardAttribute.Type TSK_KEYWORD_REGEXP = 
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP);
 
   67     private static final String HIGHLIGHT_PRE = 
"<span style='background:yellow'>"; 
 
   68     private static final String HIGHLIGHT_POST = 
"</span>"; 
 
   69     private static final String ANCHOR_PREFIX = HighlightedText.class.getName() + 
"_"; 
 
   71     final private Server solrServer = KeywordSearch.getServer();
 
   73     private final long objectId;
 
   77     private final Set<String> keywords = 
new HashSet<>();
 
   79     private int numberPages;
 
   80     private Integer currentPage = 0;
 
   83     private 
boolean isPageInfoLoaded = false;
 
   88     private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
 
   93     private final Set<Integer> pages = numberOfHitsPerPage.keySet();
 
   97     private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
 
   99     private QueryResults hits = null; 
 
  100     private BlackboardArtifact artifact;
 
  101     private KeywordSearch.QueryType qt;
 
  102     private 
boolean isLiteral;
 
  115     HighlightedText(
long objectId, QueryResults hits) {
 
  116         this.objectId = objectId;
 
  128     HighlightedText(BlackboardArtifact artifact) 
throws TskCoreException {
 
  129         this.artifact = artifact;
 
  130         BlackboardAttribute attribute = artifact.getAttribute(TSK_ASSOCIATED_ARTIFACT);
 
  131         if (attribute != null) {
 
  132             this.objectId = attribute.getValueLong();
 
  134             this.objectId = artifact.getObjectID();
 
  143     @Messages({
"HighlightedText.query.exception.msg=Could not perform the query to get chunk info and get highlights:"})
 
  144     synchronized private void loadPageInfo() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
 
  145         if (isPageInfoLoaded) {
 
  149         this.numberPages = solrServer.queryNumFileChunks(this.objectId);
 
  151         if (artifact != null) {
 
  152             loadPageInfoFromArtifact();
 
  153         } 
else if (numberPages != 0) {
 
  155             loadPageInfoFromHits();
 
  158             this.numberPages = 1;
 
  159             this.currentPage = 1;
 
  160             numberOfHitsPerPage.put(1, 0);
 
  162             currentHitPerPage.put(1, 0);
 
  163             isPageInfoLoaded = 
true;
 
  173     synchronized private void loadPageInfoFromArtifact() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
 
  174         final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
 
  175         this.keywords.add(keyword);
 
  178         final BlackboardAttribute queryTypeAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE);
 
  179         qt = (queryTypeAttribute != null)
 
  180                 ? KeywordSearch.QueryType.values()[queryTypeAttribute.getValueInt()] : null;
 
  182         Keyword keywordQuery = null;
 
  186                 keywordQuery = 
new Keyword(keyword, 
true, 
true);
 
  189                 String regexp = artifact.getAttribute(TSK_KEYWORD_REGEXP).getValueString();
 
  190                 keywordQuery = 
new Keyword(regexp, 
false, 
false);
 
  193         KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(keywordQuery, 
new KeywordList(Arrays.asList(keywordQuery)));
 
  197         chunksQuery.addFilter(
new KeywordQueryFilter(FilterType.CHUNK, 
this.objectId));
 
  199         hits = chunksQuery.performQuery();
 
  200         loadPageInfoFromHits();
 
  206     synchronized private void loadPageInfoFromHits() {
 
  207         isLiteral = hits.getQuery().isLiteral();
 
  209         for (Keyword k : hits.getKeywords()) {
 
  210             for (KeywordHit hit : hits.getResults(k)) {
 
  211                 int chunkID = hit.getChunkId();
 
  212                 if (artifact != null) {
 
  213                     if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
 
  214                         String hit1 = hit.getHit();
 
  215                         if (keywords.stream().anyMatch(hit1::contains)) {
 
  216                             numberOfHitsPerPage.put(chunkID, 0); 
 
  217                             currentHitPerPage.put(chunkID, 0); 
 
  222                     if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
 
  224                         numberOfHitsPerPage.put(chunkID, 0); 
 
  225                         currentHitPerPage.put(chunkID, 0); 
 
  227                         if (StringUtils.isNotBlank(hit.getHit())) {
 
  228                             this.keywords.add(hit.getHit());
 
  236         this.currentPage = pages.stream().findFirst().orElse(1);
 
  238         isPageInfoLoaded = 
true;
 
  249     static private String constructEscapedSolrQuery(String query) {
 
  250         return LuceneQuery.HIGHLIGHT_FIELD + 
":" + 
"\"" + KeywordSearchUtil.escapeLuceneQuery(query) + 
"\"";
 
  253     private int getIndexOfCurrentPage() {
 
  254         return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
 
  258     public int getNumberPages() {
 
  260         return this.numberPages;
 
  264     public int getCurrentPage() {
 
  265         return this.currentPage;
 
  269     public boolean hasNextPage() {
 
  270         return getIndexOfCurrentPage() < pages.size() - 1;
 
  274     public boolean hasPreviousPage() {
 
  275         return getIndexOfCurrentPage() > 0;
 
  279     public int nextPage() {
 
  281             currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
 
  284             throw new IllegalStateException(
"No next page.");
 
  289     public int previousPage() {
 
  290         if (hasPreviousPage()) {
 
  291             currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
 
  294             throw new IllegalStateException(
"No previous page.");
 
  299     public boolean hasNextItem() {
 
  300         if (!this.currentHitPerPage.containsKey(currentPage)) {
 
  303         return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
 
  307     public boolean hasPreviousItem() {
 
  308         if (!this.currentHitPerPage.containsKey(currentPage)) {
 
  311         return this.currentHitPerPage.get(currentPage) > 1;
 
  315     public int nextItem() {
 
  316         if (!hasNextItem()) {
 
  317             throw new IllegalStateException(
"No next item.");
 
  319         int cur = currentHitPerPage.get(currentPage) + 1;
 
  320         currentHitPerPage.put(currentPage, cur);
 
  325     public int previousItem() {
 
  326         if (!hasPreviousItem()) {
 
  327             throw new IllegalStateException(
"No previous item.");
 
  329         int cur = currentHitPerPage.get(currentPage) - 1;
 
  330         currentHitPerPage.put(currentPage, cur);
 
  335     public int currentItem() {
 
  336         if (!this.currentHitPerPage.containsKey(currentPage)) {
 
  339         return currentHitPerPage.get(currentPage);
 
  343     public String getText() {
 
  345         String highlightField = 
"";
 
  348             SolrQuery q = 
new SolrQuery();
 
  349             q.setShowDebugInfo(DEBUG); 
 
  351             String contentIdStr = Long.toString(this.objectId);
 
  352             if (numberPages != 0) {
 
  353                 chunkID = Integer.toString(this.currentPage);
 
  354                 contentIdStr += 
"0".equals(chunkID) ? 
"" : 
"_" + chunkID;
 
  356             final String filterQuery = Server.Schema.ID.toString() + 
":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
 
  358             double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
 
  360             highlightField = (isLiteral || (indexSchemaVersion < 2.0))
 
  361                     ? LuceneQuery.HIGHLIGHT_FIELD
 
  362                     : Server.Schema.CONTENT_STR.toString();
 
  365                 final String highlightQuery = keywords.stream()
 
  366                         .map(HighlightedText::constructEscapedSolrQuery)
 
  367                         .collect(Collectors.joining(
" "));
 
  369                 q.setQuery(highlightQuery);
 
  370                 q.addField(highlightField);
 
  371                 q.addFilterQuery(filterQuery);
 
  372                 q.addHighlightField(highlightField);
 
  373                 q.setHighlightFragsize(0); 
 
  376                 q.setParam(
"hl.useFastVectorHighlighter", 
"on"); 
 
  377                 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE); 
 
  378                 q.setParam(
"hl.tag.post", HIGHLIGHT_POST); 
 
  379                 q.setParam(
"hl.fragListBuilder", 
"single"); 
 
  382                 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); 
 
  388                 q.setQuery(filterQuery);
 
  389                 q.addField(highlightField);
 
  392             QueryResponse response = solrServer.query(q, METHOD.POST);
 
  397             if (response.getResults().size() > 1) {
 
  398                 logger.log(Level.WARNING, 
"Unexpected number of results for Solr highlighting query: {0}", q); 
 
  400             String highlightedContent;
 
  401             Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
 
  403             if (responseHighlight == null) {
 
  404                 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
 
  406                 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
 
  408                 if (responseHighlightID == null) {
 
  409                     highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
 
  411                     List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
 
  412                     if (contentHighlights == null) {
 
  413                         highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
 
  416                         highlightedContent = contentHighlights.get(0).trim();
 
  420             highlightedContent = insertAnchors(highlightedContent);
 
  422             return "<html><pre>" + highlightedContent + 
"</pre></html>"; 
 
  423         } 
catch (TskCoreException | KeywordSearchModuleException | NoOpenCoreException ex) {
 
  424             logger.log(Level.SEVERE, 
"Error getting highlighted text for Solr doc id " + objectId + 
", chunkID " + chunkID + 
", highlight query: " + highlightField, ex); 
 
  425             return NbBundle.getMessage(this.getClass(), 
"HighlightedMatchesSource.getMarkup.queryFailedMsg");
 
  430     public String toString() {
 
  431         return NbBundle.getMessage(this.getClass(), 
"HighlightedMatchesSource.toString");
 
  435     public boolean isSearchable() {
 
  440     public String getAnchorPrefix() {
 
  441         return ANCHOR_PREFIX;
 
  445     public int getNumberHits() {
 
  446         if (!this.numberOfHitsPerPage.containsKey(
this.currentPage)) {
 
  449         return this.numberOfHitsPerPage.get(this.currentPage);
 
  466     static String attemptManualHighlighting(SolrDocumentList solrDocumentList, String highlightField, Collection<String> keywords) {
 
  467         if (solrDocumentList.isEmpty()) {
 
  468             return NbBundle.getMessage(HighlightedText.class, 
"HighlightedMatchesSource.getMarkup.noMatchMsg");
 
  474         String text = solrDocumentList.get(0).getOrDefault(highlightField, 
"").toString();
 
  481         text = StringEscapeUtils.escapeHtml(text);
 
  483         TreeRangeSet<Integer> highlights = TreeRangeSet.create();
 
  486         for (String keyword : keywords) {
 
  488             final String escapedKeyword = StringEscapeUtils.escapeHtml(keyword);
 
  489             int searchOffset = 0;
 
  490             int hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
 
  491             while (hitOffset != -1) {
 
  493                 searchOffset = hitOffset + escapedKeyword.length();
 
  496                 highlights.add(Range.closedOpen(hitOffset, searchOffset));
 
  499                 hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
 
  503         StringBuilder highlightedText = 
new StringBuilder(text);
 
  504         int totalHighLightLengthInserted = 0;
 
  506         for (Range<Integer> highlightRange : highlights.asRanges()) {
 
  507             int hStart = highlightRange.lowerEndpoint();
 
  508             int hEnd = highlightRange.upperEndpoint();
 
  511             highlightedText.insert(hStart + totalHighLightLengthInserted, HIGHLIGHT_PRE);
 
  512             totalHighLightLengthInserted += HIGHLIGHT_PRE.length();
 
  513             highlightedText.insert(hEnd + totalHighLightLengthInserted, HIGHLIGHT_POST);
 
  514             totalHighLightLengthInserted += HIGHLIGHT_POST.length();
 
  517         return highlightedText.toString();
 
  528     private String insertAnchors(String searchableContent) {
 
  529         StringBuilder buf = 
new StringBuilder(searchableContent);
 
  530         final String searchToken = HIGHLIGHT_PRE;
 
  531         final int indexSearchTokLen = searchToken.length();
 
  532         final String insertPre = 
"<a name='" + ANCHOR_PREFIX; 
 
  533         final String insertPost = 
"'></a>"; 
 
  535         int searchOffset = 0;
 
  536         int index = buf.indexOf(searchToken, searchOffset);
 
  538             String insertString = insertPre + Integer.toString(count + 1) + insertPost;
 
  539             int insertStringLen = insertString.length();
 
  540             buf.insert(index, insertString);
 
  541             searchOffset = index + indexSearchTokLen + insertStringLen; 
 
  543             index = buf.indexOf(searchToken, searchOffset);
 
  547         this.numberOfHitsPerPage.put(this.currentPage, count);
 
  548         if (this.currentItem() == 0 && this.hasNextItem()) {
 
  552         return buf.toString();