Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
ExtractedTextViewer.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2023 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.keywordsearch;
20
21import java.awt.Component;
22import java.awt.Cursor;
23import java.awt.event.ActionEvent;
24import java.awt.event.ActionListener;
25import java.beans.PropertyChangeEvent;
26import java.util.ArrayList;
27import java.util.Collection;
28import java.util.EnumSet;
29import java.util.LinkedHashMap;
30import java.util.List;
31import java.util.Map;
32import java.util.logging.Level;
33import org.apache.tika.mime.MimeTypes;
34import org.openide.nodes.Node;
35import org.openide.util.Lookup;
36import org.openide.util.NbBundle;
37import org.openide.util.lookup.ServiceProvider;
38import org.sleuthkit.autopsy.casemodule.Case;
39import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
40import org.sleuthkit.autopsy.corecomponentinterfaces.TextViewer;
41import org.sleuthkit.autopsy.coreutils.Logger;
42import org.sleuthkit.autopsy.keywordsearch.AdHocSearchChildFactory.AdHocQueryResult;
43import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
44import org.sleuthkit.autopsy.textextractors.TextExtractor;
45import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
46import org.sleuthkit.datamodel.AbstractFile;
47import org.sleuthkit.datamodel.Account;
48import org.sleuthkit.datamodel.BlackboardArtifact;
49import static org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT;
50import static org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT;
51import org.sleuthkit.datamodel.BlackboardAttribute;
52import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT;
53import org.sleuthkit.datamodel.Content;
54import org.sleuthkit.datamodel.Report;
55import org.sleuthkit.datamodel.TskCoreException;
56import org.sleuthkit.datamodel.TskData;
57
62@ServiceProvider(service = TextViewer.class, position = 2)
63public class ExtractedTextViewer implements TextViewer {
64
65 private static final Logger logger = Logger.getLogger(ExtractedTextViewer.class.getName());
66
67 private static final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT_TYPE = new BlackboardAttribute.Type(TSK_ASSOCIATED_ARTIFACT);
68 private static final BlackboardAttribute.Type TSK_ACCOUNT_TYPE = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE);
69
70 private ExtractedContentPanel panel;
71 private volatile Node currentNode = null;
74
75 // cache of last 10 solrHasFullyIndexedContent() requests sent to Solr.
77
85 try {
88 logger.log(Level.SEVERE, "Failed to initialize FileTypeDetector", ex); //NON-NLS
89 }
90
92 // clear the cache when case opens or closes
93 Case.addEventTypeSubscriber(EnumSet.of(Case.Events.CURRENT_CASE), (PropertyChangeEvent evt) -> {
94 solrCache.clearCache();
95 });
96 }
97
103 @Override
104 public void setNode(final Node node) {
105 // Clear the viewer.
106 if (node == null) {
107 currentNode = null;
109 return;
110 }
111
112 /*
113 * This deals with the known bug with an unknown cause where setNode is
114 * sometimes called twice for the same node.
115 */
116 if (node.equals(currentNode)) {
117 return;
118 } else {
119 currentNode = node;
120 }
121
122 /*
123 * Assemble a collection of all of the indexed text "sources" for the
124 * node.
125 */
126 List<ExtractedText> sources = new ArrayList<>();
127 Lookup nodeLookup = node.getLookup();
128
133 AdHocQueryResult adHocQueryResult = nodeLookup.lookup(AdHocQueryResult.class);
134 AbstractFile file = nodeLookup.lookup(AbstractFile.class);
135 BlackboardArtifact artifact = nodeLookup.lookup(BlackboardArtifact.class);
136 Report report = nodeLookup.lookup(Report.class);
137
138 /*
139 * First, get text with highlighted hits if this node is for a search
140 * result.
141 */
142 ExtractedText highlightedHitText = null;
143 if (adHocQueryResult != null) {
144 /*
145 * The node is an ad hoc search result node.
146 */
147 highlightedHitText = new HighlightedText(adHocQueryResult.getSolrObjectId(), adHocQueryResult.getResults());
148 } else if (artifact != null) {
149 if (artifact.getArtifactTypeID() == TSK_KEYWORD_HIT.getTypeID()) {
150 /*
151 * The node is a keyword hit artifact node.
152 */
153 try {
154 highlightedHitText = new HighlightedText(artifact);
155 } catch (TskCoreException ex) {
156 logger.log(Level.SEVERE, "Failed to create HighlightedText for " + artifact, ex); //NON-NLS
157 }
158 } else if (artifact.getArtifactTypeID() == TSK_ACCOUNT.getTypeID() && file != null) {
159 try {
160 BlackboardAttribute attribute = artifact.getAttribute(TSK_ACCOUNT_TYPE);
161 if (attribute != null && Account.Type.CREDIT_CARD.getTypeName().equals(attribute.getValueString())) {
162 /*
163 * The node is an credit card account node.
164 */
165 highlightedHitText = getAccountsText(file, nodeLookup);
166 }
167 } catch (TskCoreException ex) {
168 logger.log(Level.SEVERE, "Failed to create AccountsText for " + file, ex); //NON-NLS
169 }
170 }
171 }
172 if (highlightedHitText != null) {
173 sources.add(highlightedHitText);
174 }
175
176 /*
177 * Next, add the "raw" (not highlighted) text, if any, for any file
178 * associated with the node.
179 */
180 ExtractedText rawContentText = null;
181 if (file != null) {
182
183 // see if Solr has fully indexed this file
184 if (solrHasFullyIndexedContent(file.getId())) {
185 rawContentText = new SolrIndexedText(file, file.getId());
186 sources.add(rawContentText);
187 } else {
188 // Solr does not have fully indexed content.
189 // see if it's a file type for which we can extract text
190 if (ableToExtractTextFromFile(file)) {
191 try {
192 rawContentText = new FileReaderExtractedText(file);
193 sources.add(rawContentText);
195 // do nothing
196 }
197 }
198 }
199 }
200
201 /*
202 * Add the "raw" (not highlighted) text, if any, for any report
203 * associated with the node.
204 */
205 if (report != null) {
206 // see if Solr has fully indexed this file
207 if (solrHasFullyIndexedContent(report.getId())) {
208 rawContentText = new SolrIndexedText(report, report.getId());
209 sources.add(rawContentText);
210 }
211 }
212
213 /*
214 * Finally, add the "raw" (not highlighted) text, if any, for any
215 * artifact associated with the node.
216 */
217 ExtractedText rawArtifactText = null;
218 try {
219 rawArtifactText = getRawArtifactText(artifact);
220 if (rawArtifactText != null) {
221 sources.add(rawArtifactText);
222 }
223 } catch (TskCoreException | NoCurrentCaseException ex) {
224 logger.log(Level.SEVERE, "Error creating RawText for " + file, ex); //NON-NLS
225 }
226
227 // Now set the default source to be displayed.
228 if (highlightedHitText != null) {
229 currentSource = highlightedHitText;
230 } else if (rawArtifactText != null) {
231 currentSource = rawArtifactText;
232 } else {
233 currentSource = rawContentText;
234 }
235
236 // Push the text sources into the panel.
237 for (ExtractedText source : sources) {
238 int currentPage = source.getCurrentPage();
239 if (currentPage == 0 && source.hasNextPage()) {
240 source.nextPage();
241 }
242 }
243 panel.updateControls(currentSource);
244
245 String contentName = "";
246 if (file != null) {
247 contentName = file.getName();
248 }
249 setPanel(contentName, sources);
250
251 }
252
253 private ExtractedText getRawArtifactText(BlackboardArtifact artifact) throws TskCoreException, NoCurrentCaseException {
254 ExtractedText rawArtifactText = null;
255 if (null != artifact) {
256 /*
257 * For keyword hit artifacts, add the text of the artifact that hit,
258 * not the hit artifact; otherwise add the text for the artifact.
259 */
260 if (artifact.getArtifactTypeID() == TSK_KEYWORD_HIT.getTypeID()
261 || artifact.getArtifactTypeID() == TSK_ACCOUNT.getTypeID()) {
262
263 BlackboardAttribute attribute = artifact.getAttribute(TSK_ASSOCIATED_ARTIFACT_TYPE);
264 if (attribute != null) {
265 long artifactId = attribute.getValueLong();
266 BlackboardArtifact associatedArtifact = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboardArtifact(artifactId);
267 if (solrHasFullyIndexedContent(associatedArtifact.getArtifactID())) {
268 rawArtifactText = new SolrIndexedText(associatedArtifact, associatedArtifact.getArtifactID());
269 }
270 }
271
272 } else {
273 if (solrHasFullyIndexedContent(artifact.getArtifactID())) {
274 rawArtifactText = new SolrIndexedText(artifact, artifact.getArtifactID());
275 }
276 }
277 }
278 return rawArtifactText;
279 }
280
281 static private ExtractedText getAccountsText(Content content, Lookup nodeLookup) throws TskCoreException {
282 /*
283 * get all the credit card artifacts
284 */
285 //if the node had artifacts in the lookup use them, other wise look up all credit card artifacts for the content.
286 Collection<? extends BlackboardArtifact> artifacts = nodeLookup.lookupAll(BlackboardArtifact.class);
287 artifacts = (artifacts == null || artifacts.isEmpty())
288 ? content.getArtifacts(TSK_ACCOUNT)
289 : artifacts;
290
291 return new AccountsText(content.getId(), artifacts);
292 }
293
294 private void scrollToCurrentHit() {
295 final ExtractedText source = panel.getSelectedSource();
296 if (source == null || !source.isSearchable()) {
297 return;
298 }
299
300 panel.scrollToAnchor(source.getAnchorPrefix() + Integer.toString(source.currentItem()));
301 }
302
303 @Override
304 public String getTitle() {
305 return NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.getTitle");
306 }
307
308 @Override
309 public String getToolTip() {
310 return NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.toolTip");
311 }
312
313 @Override
315 return new ExtractedTextViewer();
316 }
317
318 @Override
319 public synchronized Component getComponent() {
320 if (panel == null) {
321 panel = new ExtractedContentPanel();
322 panel.addPrevMatchControlListener(new PrevFindActionListener());
323 panel.addNextMatchControlListener(new NextFindActionListener());
324 panel.addPrevPageControlListener(new PrevPageActionListener());
325 panel.addNextPageControlListener(new NextPageActionListener());
326 panel.addSourceComboControlListener(new SourceChangeActionListener());
327 }
328 return panel;
329 }
330
331 @Override
332 public void resetComponent() {
333 panel.resetDisplay();
334 currentNode = null;
335 currentSource = null;
336 panel.updateControls(currentSource);
337 }
338
339 @Override
340 public boolean isSupported(Node node) {
341 if (node == null) {
342 return false;
343 }
344
345 /*
346 * If the lookup of the node contains an ad hoc search result object,
347 * then there must be indexed text that produced the hit.
348 */
349 AdHocQueryResult adHocQueryResult = node.getLookup().lookup(AdHocQueryResult.class);
350 if (adHocQueryResult != null) {
351 return true;
352 }
353
354 /*
355 * If the lookup of the node contains either a keyword hit artifact or a
356 * credit card account artifact from a credit card account numbers
357 * search, then there must be indexed text that produced the hit(s).
358 */
359 BlackboardArtifact artifact = node.getLookup().lookup(BlackboardArtifact.class);
360 if (artifact != null) {
361 final int artifactTypeID = artifact.getArtifactTypeID();
362 if (artifactTypeID == TSK_KEYWORD_HIT.getTypeID()) {
363 return true;
364 } else if (artifactTypeID == TSK_ACCOUNT.getTypeID()) {
365 try {
366 BlackboardAttribute attribute = artifact.getAttribute(TSK_ACCOUNT_TYPE);
367 if (attribute != null && Account.Type.CREDIT_CARD.getTypeName().equals(attribute.getValueString())) {
368 return true;
369 }
370 } catch (TskCoreException ex) {
371 /*
372 * If there was an error checking the account type, fall
373 * back to the check below for the file associated with the
374 * account (if there is one).
375 */
376 logger.log(Level.SEVERE, "Error getting TSK_ACCOUNT_TYPE attribute from artifact " + artifact.getArtifactID(), ex);
377 }
378 }
379 }
380
381 /*
382 * If the lookup of the node contains a file, check to see if there is
383 * indexed text for the file. Note that there should be a file in the
384 * lookup of all nodes except artifact nodes that are associated with a
385 * data source instead of a file.
386 */
387 AbstractFile file = node.getLookup().lookup(AbstractFile.class);
388 if (file != null) {
389
390 // see if Solr has fully indexed this file
391 if (solrHasFullyIndexedContent(file.getId())) {
392 return true;
393 }
394
395 // Solr does not have fully indexed content.
396 // see if it's a file type for which we can extract text
397 if (ableToExtractTextFromFile(file)) {
398 return true;
399 }
400 }
401
402 /*
403 * If the lookup of the node contains an artifact that is neither a
404 * keyword hit artifact nor a credit card account artifact, and the
405 * artifact is not associated with a file, check to see if there is
406 * indexed text for the artifact.
407 */
408 if (artifact != null) {
409 return solrHasFullyIndexedContent(artifact.getArtifactID());
410 }
411
412 /*
413 * If the lookup of the node contains no artifacts or file but does
414 * contain a report, check to see if there is indexed text for the
415 * report.
416 */
417 Report report = node.getLookup().lookup(Report.class);
418 if (report != null) {
419 return solrHasFullyIndexedContent(report.getId());
420 }
421
422 /*
423 * If the lookup of the node contains neither ad hoc search results, nor
424 * artifacts, nor a file, nor a report, there is no indexed text.
425 */
426 return false;
427 }
428
429 @Override
430 public int isPreferred(Node node) {
431 return 4;
432 }
433
442 private void setPanel(String contentName, List<ExtractedText> sources) {
443 if (panel != null) {
444 panel.setSources(contentName, sources);
445 }
446 }
447
460 private boolean solrHasFullyIndexedContent(Long objectId) {
461
462 // check if we have cached this decision
463 if (solrCache.containsKey(objectId)) {
464 return solrCache.getCombination(objectId);
465 }
466
467 final Server solrServer = KeywordSearch.getServer();
468 if (solrServer.coreIsOpen() == false) {
469 solrCache.putCombination(objectId, false);
470 return false;
471 }
472
473 // verify that all of the chunks in the file have been indexed.
474 try {
475 boolean isFullyIndexed = solrServer.queryIsFullyIndexed(objectId);
476 solrCache.putCombination(objectId, isFullyIndexed);
477 return isFullyIndexed;
479 logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS
480 solrCache.putCombination(objectId, false);
481 return false;
482 }
483 }
484
494 private boolean ableToExtractTextFromFile(AbstractFile file) {
495
496 TskData.TSK_DB_FILES_TYPE_ENUM fileType = file.getType();
497
498 if (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
499 return false;
500 }
501
502 if ((fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
503 || fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))
504 || (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED))) {
505 return false;
506 }
507
508 final long size = file.getSize();
509 if (file.isDir() || size == 0) {
510 return false;
511 }
512
513 String mimeType = fileTypeDetector.getMIMEType(file).trim().toLowerCase();
514
515 if (KeywordSearchIngestModule.ARCHIVE_MIME_TYPES.contains(mimeType)) {
516 return false;
517 }
518
519 if (MimeTypes.OCTET_STREAM.equals(mimeType)) {
520 return false;
521 }
522
523 // Often times there is an exception when trying to initiale a reader,
524 // thus making that specific file "unsupported". The only way to identify
525 // this situation is to initialize the reader.
526 try {
527 FileReaderExtractedText tmp = new FileReaderExtractedText(file);
529 return false;
530 }
531
532 return true;
533 }
534
538 private class NextFindActionListener implements ActionListener {
539
540 @Override
541 public void actionPerformed(ActionEvent e) {
542 ExtractedText source = panel.getSelectedSource();
543 if (source == null) {
544 // reset
545 panel.updateControls(null);
546 return;
547 }
548 final boolean hasNextItem = source.hasNextItem();
549 final boolean hasNextPage = source.hasNextPage();
550 int indexVal;
551 if (hasNextItem || hasNextPage) {
552 if (!hasNextItem) {
553 //flip the page
554 nextPage();
555 indexVal = source.currentItem();
556 } else {
557 indexVal = source.nextItem();
558 }
559
560 //scroll
561 panel.scrollToAnchor(source.getAnchorPrefix() + Integer.toString(indexVal));
562
563 //update display
564 panel.updateCurrentMatchDisplay(source.currentItem());
565 panel.updateTotaMatcheslDisplay(source.getNumberHits());
566
567 //update controls if needed
568 if (!source.hasNextItem() && !source.hasNextPage()) {
569 panel.enableNextMatchControl(false);
570 }
571 if (source.hasPreviousItem() || source.hasPreviousPage()) {
572 panel.enablePrevMatchControl(true);
573 }
574 }
575 }
576 }
577
581 private class PrevFindActionListener implements ActionListener {
582
583 @Override
584 public void actionPerformed(ActionEvent e) {
585 ExtractedText source = panel.getSelectedSource();
586 final boolean hasPreviousItem = source.hasPreviousItem();
587 final boolean hasPreviousPage = source.hasPreviousPage();
588 int indexVal;
589 if (hasPreviousItem || hasPreviousPage) {
590 if (!hasPreviousItem) {
591 //flip the page
592 previousPage();
593 indexVal = source.currentItem();
594 } else {
595 indexVal = source.previousItem();
596 }
597
598 //scroll
599 panel.scrollToAnchor(source.getAnchorPrefix() + Integer.toString(indexVal));
600
601 //update display
602 panel.updateCurrentMatchDisplay(source.currentItem());
603 panel.updateTotaMatcheslDisplay(source.getNumberHits());
604
605 //update controls if needed
606 if (!source.hasPreviousItem() && !source.hasPreviousPage()) {
607 panel.enablePrevMatchControl(false);
608 }
609 if (source.hasNextItem() || source.hasNextPage()) {
610 panel.enableNextMatchControl(true);
611 }
612 }
613 }
614 }
615
619 private class SourceChangeActionListener implements ActionListener {
620
621 @Override
622 public void actionPerformed(ActionEvent e) {
623 currentSource = panel.getSelectedSource();
624
625 if (currentSource == null) {
626 //TODO might need to reset something
627 return;
628 }
629
630 panel.updateControls(currentSource);
631 }
632 }
633
634 private void nextPage() {
635 // we should never have gotten here -- reset
636 if (currentSource == null) {
637 panel.updateControls(null);
638 return;
639 }
640
641 if (currentSource.hasNextPage()) {
642 currentSource.nextPage();
643
644 //set new text
645 panel.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
646 panel.refreshCurrentMarkup();
647 panel.setCursor(null);
648
649 //update display
650 panel.updateCurrentPageDisplay(currentSource.getCurrentPage());
651
652 //scroll to current selection
654
655 //update controls if needed
656 if (!currentSource.hasNextPage()) {
657 panel.enableNextPageControl(false);
658 }
659 if (currentSource.hasPreviousPage()) {
660 panel.enablePrevPageControl(true);
661 }
662
663 panel.updateSearchControls(currentSource);
664 }
665 }
666
667 private void previousPage() {
668 // reset, we should have never gotten here if null
669 if (currentSource == null) {
670 panel.updateControls(null);
671 return;
672 }
673
674 if (currentSource.hasPreviousPage()) {
675 currentSource.previousPage();
676
677 //set new text
678 panel.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
679 panel.refreshCurrentMarkup();
680 panel.setCursor(null);
681
682 //update display
683 panel.updateCurrentPageDisplay(currentSource.getCurrentPage());
684
685 //scroll to current selection
687
688 //update controls if needed
689 if (!currentSource.hasPreviousPage()) {
690 panel.enablePrevPageControl(false);
691 }
692 if (currentSource.hasNextPage()) {
693 panel.enableNextPageControl(true);
694 }
695
696 panel.updateSearchControls(currentSource);
697
698 }
699 }
700
704 private class NextPageActionListener implements ActionListener {
705
706 @Override
707 public void actionPerformed(ActionEvent e) {
708 nextPage();
709 }
710 }
711
715 private class PrevPageActionListener implements ActionListener {
716
717 @Override
718 public void actionPerformed(ActionEvent e) {
719 previousPage();
720 }
721 }
722
728
729 private static final int CACHE_SIZE = 10;
730 private final LinkedHashMap<Long, Boolean> cache;
731
733 this.cache = new LinkedHashMap<Long, Boolean>(CACHE_SIZE, 0.75f, true) {
734 @Override
735 protected boolean removeEldestEntry(Map.Entry<Long, Boolean> eldest) {
736 return size() > CACHE_SIZE;
737 }
738 };
739 }
740
741 public void putCombination(long key, boolean value) {
742 cache.put(key, value);
743 }
744
745 public Boolean getCombination(long key) {
746 return cache.get(key);
747 }
748
749 public void clearCache() {
750 cache.clear();
751 }
752
753 public boolean containsKey(long key) {
754 return cache.containsKey(key);
755 }
756 }
757}
static void addEventTypeSubscriber(Set< Events > eventTypes, PropertyChangeListener subscriber)
Definition Case.java:712
synchronized static Logger getLogger(String name)
Definition Logger.java:124
void setPanel(String contentName, List< ExtractedText > sources)
ExtractedText getRawArtifactText(BlackboardArtifact artifact)
static final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT_TYPE
static ExtractedText getAccountsText(Content content, Lookup nodeLookup)
boolean queryIsFullyIndexed(long contentID)
Definition Server.java:1648

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.