Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
AccountsText.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2018 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.keywordsearch;
20
21import com.google.common.collect.Iterators;
22import java.util.Arrays;
23import java.util.Collection;
24import java.util.HashMap;
25import java.util.HashSet;
26import java.util.Optional;
27import java.util.Set;
28import java.util.TreeMap;
29import java.util.logging.Level;
30import java.util.regex.Matcher;
31import java.util.regex.Pattern;
32import javax.annotation.concurrent.GuardedBy;
33import org.apache.commons.lang3.StringUtils;
34import org.apache.solr.client.solrj.SolrQuery;
35import org.apache.solr.client.solrj.SolrRequest.METHOD;
36import org.apache.solr.client.solrj.response.QueryResponse;
37import org.openide.util.NbBundle;
38import org.sleuthkit.autopsy.coreutils.Logger;
39import org.sleuthkit.autopsy.coreutils.Version;
40import org.sleuthkit.datamodel.BlackboardArtifact;
41import org.sleuthkit.datamodel.BlackboardAttribute;
42import org.sleuthkit.datamodel.TskCoreException;
43
53class AccountsText implements ExtractedText {
54
55 private static final Logger logger = Logger.getLogger(AccountsText.class.getName());
56 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
57
58 private static final String CCN_REGEX = "(%?)(B?)([0-9][ \\-]*?){12,19}(\\^?)";
59
60 private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
61 private static final String ANCHOR_NAME_PREFIX = AccountsText.class.getName() + "_";
62
63 private static final String INSERT_PREFIX = "<a name='" + ANCHOR_NAME_PREFIX; //NON-NLS
64 private static final String INSERT_POSTFIX = "'></a>$0"; //$0 will insert current regex match //NON-NLS
65 private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
66
67 private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
68 private static final BlackboardAttribute.Type TSK_CARD_NUMBER = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER);
69 private static final BlackboardAttribute.Type TSK_KEYWORD = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
70
71 private static final String FIELD = Server.Schema.CONTENT_STR.toString();
72
73 private final Server solrServer = KeywordSearch.getServer();
74
75 private final long solrObjectId;
76 private final Collection<? extends BlackboardArtifact> artifacts;
77 private final Set<String> accountNumbers = new HashSet<>();
78 private final String title;
79
80 @GuardedBy("this")
81 private boolean isPageInfoLoaded = false;
82 private int numberPagesForFile = 0;
83 private Integer currentPage = 0;
84
88 private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
89
94 private final Set<Integer> pages = numberOfHitsPerPage.keySet();
95
99 private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
100
101 AccountsText(long objectID, BlackboardArtifact artifact) {
102 this(objectID, Arrays.asList(artifact));
103 }
104
105 @NbBundle.Messages({
106 "AccountsText.creditCardNumber=Credit Card Number",
107 "AccountsText.creditCardNumbers=Credit Card Numbers"})
108 AccountsText(long objectID, Collection<? extends BlackboardArtifact> artifacts) {
109 this.solrObjectId = objectID;
110 this.artifacts = artifacts;
111 title = artifacts.size() == 1
112 ? Bundle.AccountsText_creditCardNumber()
113 : Bundle.AccountsText_creditCardNumbers();
114 }
115
116 long getObjectId() {
117 return this.solrObjectId;
118 }
119
120 @Override
121 public int getNumberPages() {
122 return this.numberPagesForFile;
123 }
124
125 @Override
126 public int getCurrentPage() {
127 return this.currentPage;
128 }
129
130 @Override
131 public boolean hasNextPage() {
132 return getIndexOfCurrentPage() < pages.size() - 1;
133
134 }
135
136 @Override
137 public boolean hasPreviousPage() {
138 return getIndexOfCurrentPage() > 0;
139 }
140
141 @Override
142 @NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.")
143 public int nextPage() {
144 if (hasNextPage()) {
145 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
146 return currentPage;
147 } else {
148 throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
149 }
150 }
151
152 @Override
153 @NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.")
154 public int previousPage() {
155 if (hasPreviousPage()) {
156 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
157 return currentPage;
158 } else {
159 throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
160 }
161 }
162
163 private int getIndexOfCurrentPage() {
164 return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
165 }
166
167 @Override
168 public boolean hasNextItem() {
169 if (this.currentHitPerPage.containsKey(currentPage)) {
170 return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
171 } else {
172 return false;
173 }
174 }
175
176 @Override
177 public boolean hasPreviousItem() {
178 if (this.currentHitPerPage.containsKey(currentPage)) {
179 return this.currentHitPerPage.get(currentPage) > 1;
180 } else {
181 return false;
182 }
183 }
184
185 @Override
186 @NbBundle.Messages("AccountsText.nextItem.exception.msg=No next item.")
187 public int nextItem() {
188 if (hasNextItem()) {
189 return currentHitPerPage.merge(currentPage, 1, Integer::sum);
190 } else {
191 throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
192 }
193 }
194
195 @Override
196 @NbBundle.Messages("AccountsText.previousItem.exception.msg=No previous item.")
197 public int previousItem() {
198 if (hasPreviousItem()) {
199 return currentHitPerPage.merge(currentPage, -1, Integer::sum);
200 } else {
201 throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
202 }
203 }
204
205 @Override
206 public int currentItem() {
207 return currentHitPerPage.getOrDefault(currentPage, 0);
208 }
209
214 synchronized private void loadPageInfo() throws IllegalStateException, TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
215 if (isPageInfoLoaded) {
216 return;
217 }
218
219 this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
220
221 boolean needsQuery = false;
222
223 for (BlackboardArtifact artifact : artifacts) {
224 if (solrObjectId != artifact.getObjectID()) {
225 throw new IllegalStateException("not all artifacts are from the same object!");
226 }
227
228 //add both the canonical form and the form in the text as accountNumbers to highlight.
229 BlackboardAttribute attribute = artifact.getAttribute(TSK_KEYWORD);
230 this.accountNumbers.add(attribute.getValueString());
231 attribute = artifact.getAttribute(TSK_CARD_NUMBER);
232 this.accountNumbers.add(attribute.getValueString());
233
234 //if the chunk id is present just use that.
235 Optional<Integer> chunkID =
236 Optional.ofNullable(artifact.getAttribute(TSK_KEYWORD_SEARCH_DOCUMENT_ID))
237 .map(BlackboardAttribute::getValueString)
238 .map(String::trim)
239 .map(kwsdocID -> StringUtils.substringAfterLast(kwsdocID, Server.CHUNK_ID_SEPARATOR))
240 .map(Integer::valueOf);
241 if (chunkID.isPresent()) {
242 numberOfHitsPerPage.put(chunkID.get(), 0);
243 currentHitPerPage.put(chunkID.get(), 0);
244 } else {
245 //otherwise we need to do a query to figure out the paging.
246 needsQuery = true;
247 // we can't break the for loop here because we need to accumulate all the accountNumbers
248 }
249 }
250
251 if (needsQuery) {
252 // Run a query to figure out which chunks for the current object have hits.
253 Keyword queryKeyword = new Keyword(CCN_REGEX, false, false);
254 KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(queryKeyword, new KeywordList(Arrays.asList(queryKeyword)));
255 chunksQuery.addFilter(new KeywordQueryFilter(KeywordQueryFilter.FilterType.CHUNK, this.solrObjectId));
256 //load the chunks/pages from the result of the query.
257 loadPageInfoFromHits(chunksQuery.performQuery());
258 }
259
260 this.currentPage = pages.stream().findFirst().orElse(1);
261
262 isPageInfoLoaded = true;
263 }
264
270 synchronized private void loadPageInfoFromHits(QueryResults hits) {
271 //organize the hits by page, filter as needed
272 for (Keyword k : hits.getKeywords()) {
273 for (KeywordHit hit : hits.getResults(k)) {
274 int chunkID = hit.getChunkId();
275 if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
276 String hitString = hit.getHit();
277 if (accountNumbers.stream().anyMatch(hitString::contains)) {
278 numberOfHitsPerPage.put(chunkID, 0); //unknown number of matches in the page
279 currentHitPerPage.put(chunkID, 0); //set current hit to 0th
280 }
281 }
282 }
283 }
284 }
285
286 @Override
287 public String getText() {
288 try {
289 loadPageInfo(); //inits once
290
291 SolrQuery q = new SolrQuery();
292 q.setShowDebugInfo(DEBUG); //debug
293
294 String contentIdStr = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
295 final String filterQuery = Server.Schema.ID.toString() + ":" + contentIdStr;
296 //set the documentID filter
297 q.setQuery(filterQuery);
298 q.setFields(FIELD);
299
300 QueryResponse queryResponse = solrServer.query(q, METHOD.POST);
301
302 String highlightedText =
303 HighlightedText.attemptManualHighlighting(
304 queryResponse.getResults(),
305 Server.Schema.CONTENT_STR.toString(),
306 accountNumbers
307 ).trim();
308
309 highlightedText = insertAnchors(highlightedText);
310
311 // extracted content (minus highlight tags) is HTML-escaped
312 return "<html><pre>" + highlightedText + "</pre></html>"; //NON-NLS
313 } catch (Exception ex) {
314 logger.log(Level.SEVERE, "Error getting highlighted text for Solr doc id " + this.solrObjectId + ", chunkID " + this.currentPage, ex); //NON-NLS
315 return Bundle.ExtractedText_errorMessage_errorGettingText();
316 }
317 }
318
327 private String insertAnchors(String searchableContent) {
328 /*
329 * use regex matcher to iterate over occurences of HIGHLIGHT_PRE, and
330 * prepend them with an anchor tag.
331 */
332 Matcher m = ANCHOR_DETECTION_PATTERN.matcher(searchableContent);
333 StringBuffer sb = new StringBuffer(searchableContent.length());
334 int count = 0;
335 while (m.find()) {
336 count++;
337 m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
338 }
339 m.appendTail(sb);
340 //store total hits for this page, now that we know it
341 this.numberOfHitsPerPage.put(this.currentPage, count);
342 if (this.currentItem() == 0 && this.hasNextItem()) {
343 this.nextItem();
344 }
345 return sb.toString();
346 }
347
348 @Override
349 public String toString() {
350 return title;
351 }
352
353 @Override
354 public boolean isSearchable() {
355 return true;
356 }
357
358 @Override
359 public String getAnchorPrefix() {
360 return ANCHOR_NAME_PREFIX;
361 }
362
363 @Override
364 public int getNumberHits() {
365 return numberOfHitsPerPage.getOrDefault(currentPage, 0);
366 }
367}
synchronized static Logger getLogger(String name)
Definition Logger.java:124
static Version.Type getBuildType()
Definition Version.java:87
QueryResponse query(SolrQuery sq)
Definition Server.java:1710

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.