Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
LanguageSpecificContentQueryHelper.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2019 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.keywordsearch;
20
21import org.apache.solr.client.solrj.SolrQuery;
22import org.apache.solr.client.solrj.SolrRequest;
23import org.apache.solr.client.solrj.response.QueryResponse;
24import org.apache.solr.common.SolrDocument;
25import org.apache.solr.common.SolrDocumentList;
26import org.sleuthkit.autopsy.coreutils.EscapeUtil;
27import org.sleuthkit.autopsy.coreutils.Version;
28import org.sleuthkit.datamodel.TskException;
29
30import java.util.ArrayList;
31import java.util.Collections;
32import java.util.HashMap;
33import java.util.List;
34import java.util.Map;
35import java.util.Optional;
36import java.util.Set;
37import java.util.stream.Collectors;
38
42final class LanguageSpecificContentQueryHelper {
43
44 private LanguageSpecificContentQueryHelper() {}
45
46 private static final List<Server.Schema> QUERY_FIELDS = new ArrayList<>();
47 private static final List<Server.Schema> LANGUAGE_SPECIFIC_CONTENT_FIELDS
48 = Collections.singletonList(Server.Schema.CONTENT_JA);
49 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
50
51 static {
52 QUERY_FIELDS.add(Server.Schema.TEXT);
53 QUERY_FIELDS.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS);
54 }
55
59 static class QueryResults {
60 List<SolrDocument> chunks = new ArrayList<>();
61 Map</* ID */ String, SolrDocument> miniChunks = new HashMap<>();
62 // objectId_chunk -> "text" -> List of previews
63 Map<String, Map<String, List<String>>> highlighting = new HashMap<>();
64 }
65
72 static String expandQueryString(final String queryStr) {
73 List<String> fieldQueries = new ArrayList<>();
74 fieldQueries.add(Server.Schema.TEXT.toString() + ":" + queryStr);
75 fieldQueries.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS.stream().map(field -> field.toString() + ":" + queryStr).collect(Collectors.toList()));
76 return String.join(" OR ", fieldQueries);
77 }
78
79 static List<Server.Schema> getQueryFields() {
80 return QUERY_FIELDS;
81 }
82
83 static void updateQueryResults(QueryResults results, SolrDocument document) {
84 String id = (String) document.getFieldValue(Server.Schema.ID.toString());
85 if (MiniChunkHelper.isMiniChunkID(id)) {
86 results.miniChunks.put(MiniChunkHelper.getBaseChunkID(id), document);
87 } else {
88 results.chunks.add(document);
89 }
90 }
91
99 static Optional<List<String>> getHighlights(Map<String, List<String>> highlight) {
100 for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) {
101 if (highlight.containsKey(field.toString())) {
102 return Optional.of(highlight.get(field.toString()));
103 }
104 }
105 return Optional.empty();
106 }
107
113 static List<KeywordHit> mergeKeywordHits(List<KeywordHit> matches, Keyword originalKeyword, QueryResults queryResults) throws KeywordSearchModuleException {
114 Map<String, KeywordHit> map = findMatches(originalKeyword, queryResults).stream().collect(Collectors.toMap(KeywordHit::getSolrDocumentId, x -> x));
115 List<KeywordHit> merged = new ArrayList<>();
116
117 // first, replace KeywordHit in matches
118 for (KeywordHit match : matches) {
119 String key = match.getSolrDocumentId();
120 if (map.containsKey(key)) {
121 merged.add(map.get(key));
122 map.remove(key);
123 } else {
124 merged.add(match);
125 }
126 }
127 // second, add rest of KeywordHits from queryResults
128 merged.addAll(map.values());
129
130 return merged;
131 }
132
133 static void configureTermfreqQuery(SolrQuery query, String keyword) throws KeywordSearchModuleException, NoOpenCoreException {
134 // make a request to Solr to parse query.
135 QueryTermHelper.Result queryParserResult = QueryTermHelper.parse(keyword, LANGUAGE_SPECIFIC_CONTENT_FIELDS);
136 query.addField(buildTermfreqQuery(keyword, queryParserResult));
137 }
138
139 static String buildTermfreqQuery(String keyword, QueryTermHelper.Result result) {
140 List<String> termfreqs = new ArrayList<>();
141 for (Map.Entry<String, List<String>> e : result.fieldTermsMap.entrySet()) {
142 String field = e.getKey();
143 for (String term : e.getValue()) {
144 termfreqs.add(String.format("termfreq(\"%s\",\"%s\")", field, KeywordSearchUtil.escapeLuceneQuery(term)));
145 }
146 }
147
148 // sum of all language specific query fields.
149 // only one of these fields could be non-zero.
150 return String.format("termfreq:sum(%s)", String.join(",", termfreqs));
151 }
152
153 static int queryChunkTermfreq(Set<String> keywords, String contentID) throws KeywordSearchModuleException, NoOpenCoreException {
154 SolrQuery q = new SolrQuery();
155 q.setShowDebugInfo(DEBUG);
156
157 final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentID);
158 final String highlightQuery = keywords.stream()
159 .map(s -> LanguageSpecificContentQueryHelper.expandQueryString(
160 KeywordSearchUtil.quoteQuery(KeywordSearchUtil.escapeLuceneQuery(s))))
161 .collect(Collectors.joining(" "));
162
163 q.addFilterQuery(filterQuery);
164 q.setQuery(highlightQuery);
165 LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywords.iterator().next());
166
167 QueryResponse response = KeywordSearch.getServer().query(q, SolrRequest.METHOD.POST);
168 SolrDocumentList results = response.getResults();
169 if (results.isEmpty()) {
170 return 0;
171 }
172
173 SolrDocument document = results.get(0);
174 return ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
175 }
176
177 static int findNthIndexOf(String s, String pattern, int n) {
178 int found = 0;
179 int idx = -1;
180 int len = s.length();
181 while (idx < len && found <= n) {
182 idx = s.indexOf(pattern, idx + 1);
183 if (idx == -1) {
184 break;
185 }
186 found++;
187 }
188
189 return idx;
190 }
191
192 private static List<KeywordHit> findMatches(Keyword originalKeyword, QueryResults queryResults) throws KeywordSearchModuleException {
193 List<KeywordHit> matches = new ArrayList<>();
194 for (SolrDocument document : queryResults.chunks) {
195 String docId = (String) document.getFieldValue(Server.Schema.ID.toString());
196
197 try {
198 int hitCountInChunk = ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
199 SolrDocument miniChunk = queryResults.miniChunks.get(docId);
200 if (miniChunk == null) {
201 // last chunk does not have mini chunk because there's no overlapped region with next one
202 matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId));
203 } else {
204 int hitCountInMiniChunk = ((Float) miniChunk.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
205 if (hitCountInMiniChunk < hitCountInChunk) {
206 // there are at least one hit in base chunk
207 matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId));
208 }
209 }
210 } catch (TskException ex) {
211 throw new KeywordSearchModuleException(ex);
212 }
213 }
214 return matches;
215 }
216
220 private static KeywordHit createKeywordHit(Keyword originalKeyword, Map<String, Map<String, List<String>>> highlightResponse, String docId) throws TskException {
225 String snippet = "";
226 if (KeywordSearchSettings.getShowSnippets()) {
227 List<String> snippetList = getHighlightFieldValue(highlightResponse.get(docId)).orElse(null);
228 // list is null if there wasn't a snippet
229 if (snippetList != null) {
230 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
231 }
232 }
233
234 return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
235 }
236
240 private static Optional<List<String>> getHighlightFieldValue(Map<String, List<String>> highlight) {
241 for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) {
242 if (highlight.containsKey(field.toString())) {
243 return Optional.of(highlight.get(field.toString()));
244 }
245 }
246 return Optional.empty();
247 }
248}

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.