Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
EmbeddedFileExtractorIngestModule.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2015-2020 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.modules.embeddedfileextractor;
20
21import java.io.File;
22import java.nio.file.Paths;
23import java.util.HashMap;
24import java.util.Map;
25import java.util.concurrent.ConcurrentHashMap;
26import java.util.logging.Level;
27import javax.annotation.concurrent.GuardedBy;
28import org.openide.util.NbBundle;
29import org.sleuthkit.autopsy.casemodule.Case;
30import org.sleuthkit.datamodel.AbstractFile;
31import org.sleuthkit.datamodel.TskData;
32import org.sleuthkit.autopsy.ingest.IngestModule.ProcessResult;
33import org.sleuthkit.autopsy.ingest.IngestJobContext;
34import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
35import net.sf.sevenzipjbinding.SevenZipNativeInitializationException;
36import org.sleuthkit.autopsy.apputils.ApplicationLoggers;
37import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
38import java.util.logging.Logger;
39import org.sleuthkit.autopsy.ingest.FileIngestModuleAdapter;
40import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter;
41import org.sleuthkit.autopsy.modules.embeddedfileextractor.SevenZipExtractor.Archive;
42import org.sleuthkit.autopsy.threadutils.TaskRetryUtil;
43
48@NbBundle.Messages({
49 "CannotCreateOutputFolder=Unable to create output folder.",
50 "CannotRunFileTypeDetection=Unable to run file type detection.",
51 "UnableToInitializeLibraries=Unable to initialize 7Zip libraries.",
52 "EmbeddedFileExtractorIngestModule.NoOpenCase.errMsg=No open case available.",
53 "EmbeddedFileExtractorIngestModule.UnableToGetMSOfficeExtractor.errMsg=Unable to get MSOfficeEmbeddedContentExtractor."
54})
55public final class EmbeddedFileExtractorIngestModule extends FileIngestModuleAdapter {
56
57 private static final String TASK_RETRY_STATS_LOG_NAME = "task_retry_stats";
59 private static final Object execMapLock = new Object();
60 @GuardedBy("execMapLock")
61 private static final Map<Long, FileTaskExecutor> fileTaskExecsByJob = new HashMap<>();
62 //Outer concurrent hashmap with keys of JobID, inner concurrentHashmap with keys of objectID
63 private static final ConcurrentHashMap<Long, ConcurrentHashMap<Long, Archive>> mapOfDepthTrees = new ConcurrentHashMap<>();
65 private DocumentEmbeddedContentExtractor documentExtractor;
66 private SevenZipExtractor archiveExtractor;
68 private long jobId;
69
74 EmbeddedFileExtractorIngestModule() {
75 }
76
77 @Override
78 @NbBundle.Messages({
79 "EmbeddedFileExtractor_make_output_dir_err=Failed to create module output directory for Embedded File Extractor"
80 })
81 public void startUp(IngestJobContext context) throws IngestModuleException {
82 jobId = context.getJobId();
83
84 /*
85 * Construct absolute and relative paths to the output directory. The
86 * output directory is a subdirectory of the ModuleOutput folder in the
87 * case directory and is named for the module.
88 *
89 * The absolute path is used to write the extracted (derived) files to
90 * local storage.
91 *
92 * The relative path is relative to the case folder and is used in the
93 * case database for extracted (derived) file paths.
94 *
95 */
96 Case currentCase = Case.getCurrentCase();
97 String moduleDirAbsolute = Paths.get(currentCase.getModuleDirectory(), EmbeddedFileExtractorModuleFactory.getOutputFolderName()).toString();
98 String moduleDirRelative = Paths.get(currentCase.getModuleOutputDirectoryRelativePath(), EmbeddedFileExtractorModuleFactory.getOutputFolderName()).toString();
99
100 if (refCounter.incrementAndGet(jobId) == 1) {
101
102 /*
103 * Construct a per ingest job executor that will be used for calling
104 * java.io.File methods as tasks with retries. Retries are employed
105 * here due to observed issues with hangs when attempting these
106 * operations on case directories stored on a certain type of
107 * network file system. See the FileTaskExecutor class header docs
108 * for more details.
109 */
110 FileTaskExecutor fileTaskExecutor = new FileTaskExecutor(context);
111 synchronized (execMapLock) {
112 fileTaskExecsByJob.put(jobId, fileTaskExecutor);
113 }
114
115 try {
116 File extractionDirectory = new File(moduleDirAbsolute);
117 if (!fileTaskExecutor.exists(extractionDirectory)) {
118 fileTaskExecutor.mkdirs(extractionDirectory);
119 }
120 } catch (FileTaskExecutor.FileTaskFailedException | InterruptedException ex) {
121 /*
122 * The exception message is localized because ingest module
123 * start up exceptions are displayed to the user when running
124 * with the RCP GUI.
125 */
126 throw new IngestModuleException(Bundle.EmbeddedFileExtractor_make_output_dir_err(), ex);
127 }
128
129 /*
130 * Construct a hash map to keep track of depth in archives while
131 * processing archive files.
132 *
133 * TODO (Jira-7119): A ConcurrentHashMap of ConcurrentHashMaps is
134 * almost certainly the wrong data structure here. ConcurrentHashMap
135 * is intended to efficiently provide snapshots to multiple threads.
136 * A thread may not see the current state.
137 */
138 mapOfDepthTrees.put(jobId, new ConcurrentHashMap<>());
139 }
140
141 try {
144 throw new IngestModuleException(Bundle.CannotRunFileTypeDetection(), ex);
145 }
146
147 try {
148 archiveExtractor = new SevenZipExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute, fileTaskExecsByJob.get(jobId));
149 } catch (SevenZipNativeInitializationException ex) {
150 /*
151 * The exception message is localized because ingest module start up
152 * exceptions are displayed to the user when running with the RCP
153 * GUI.
154 */
155 throw new IngestModuleException(Bundle.UnableToInitializeLibraries(), ex);
156 }
157
158 try {
159 documentExtractor = new DocumentEmbeddedContentExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute, fileTaskExecsByJob.get(jobId));
160 } catch (NoCurrentCaseException ex) {
161 /*
162 * The exception message is localized because ingest module start up
163 * exceptions are displayed to the user when running with the RCP
164 * GUI.
165 */
166 throw new IngestModuleException(Bundle.EmbeddedFileExtractorIngestModule_UnableToGetMSOfficeExtractor_errMsg(), ex);
167 }
168 }
169
170 @Override
171 public ProcessResult process(AbstractFile abstractFile) {
172 /*
173 * Skip unallocated space files.
174 */
175 if ((abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS))
176 || (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK))) {
177 return ProcessResult.OK;
178 }
179
180 /*
181 * Skip known files.
182 */
183 if (abstractFile.getKnown().equals(TskData.FileKnown.KNOWN)) {
184 return ProcessResult.OK;
185 }
186
187 /*
188 * Skip directories, etc.
189 */
190 if (!abstractFile.isFile()) {
191 return ProcessResult.OK;
192 }
193
194 /*
195 * Attempt embedded file extraction for the file if it is a supported
196 * type/format.
197 */
198 if (archiveExtractor.isSevenZipExtractionSupported(abstractFile)) {
199 archiveExtractor.unpack(abstractFile, mapOfDepthTrees.get(jobId));
200 } else if (documentExtractor.isContentExtractionSupported(abstractFile)) {
201 documentExtractor.extractEmbeddedContent(abstractFile);
202 }
203 return ProcessResult.OK;
204 }
205
206 @Override
207 public void shutDown() {
208 if (refCounter.decrementAndGet(jobId) == 0) {
209 mapOfDepthTrees.remove(jobId);
210 FileTaskExecutor fileTaskExecutor;
211 synchronized (execMapLock) {
212 fileTaskExecutor = fileTaskExecsByJob.remove(jobId);
213 }
214 fileTaskExecutor.shutDown();
215 taskStatsLogger.log(Level.INFO, String.format("total tasks: %d, total task timeouts: %d, total task retries: %d, total task failures: %d (ingest job ID = %d)", TaskRetryUtil.getTotalTasksCount(), TaskRetryUtil.getTotalTaskAttemptTimeOutsCount(), TaskRetryUtil.getTotalTaskRetriesCount(), TaskRetryUtil.getTotalFailedTasksCount(), jobId));
216 }
217 }
218
227 static String getUniqueName(AbstractFile file) {
228 return Long.toString(file.getId());
229 }
230
231}
synchronized static Logger getLogger(String logName)
static final ConcurrentHashMap< Long, ConcurrentHashMap< Long, Archive > > mapOfDepthTrees

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.