Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
SevenZipExtractor.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2015-2021 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.modules.embeddedfileextractor;
20
21import java.io.File;
22import java.io.FileOutputStream;
23import java.io.IOException;
24import java.nio.charset.Charset;
25import java.nio.file.Path;
26import java.nio.file.Paths;
27import java.util.ArrayList;
28import java.util.Arrays;
29import java.util.Collection;
30import java.util.Collections;
31import java.util.Date;
32import java.util.HashMap;
33import java.util.List;
34import java.util.Map;
35import java.util.concurrent.ConcurrentHashMap;
36import java.util.logging.Level;
37import net.sf.sevenzipjbinding.ArchiveFormat;
38import static net.sf.sevenzipjbinding.ArchiveFormat.RAR;
39import net.sf.sevenzipjbinding.ExtractAskMode;
40import net.sf.sevenzipjbinding.ExtractOperationResult;
41import net.sf.sevenzipjbinding.IArchiveExtractCallback;
42import net.sf.sevenzipjbinding.ICryptoGetTextPassword;
43import net.sf.sevenzipjbinding.IInArchive;
44import net.sf.sevenzipjbinding.ISequentialOutStream;
45import net.sf.sevenzipjbinding.PropID;
46import net.sf.sevenzipjbinding.SevenZip;
47import net.sf.sevenzipjbinding.SevenZipException;
48import net.sf.sevenzipjbinding.SevenZipNativeInitializationException;
49import org.apache.tika.Tika;
50import org.apache.tika.parser.txt.CharsetDetector;
51import org.apache.tika.parser.txt.CharsetMatch;
52import org.netbeans.api.progress.ProgressHandle;
53import org.openide.util.NbBundle;
54import org.openide.util.NbBundle.Messages;
55import org.sleuthkit.autopsy.casemodule.Case;
56import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
57import org.sleuthkit.autopsy.casemodule.services.FileManager;
58import org.sleuthkit.autopsy.coreutils.FileUtil;
59import org.sleuthkit.autopsy.coreutils.Logger;
60import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
61import org.sleuthkit.autopsy.modules.encryptiondetection.EncryptionDetectionModuleFactory;
62import org.sleuthkit.autopsy.ingest.IngestJobContext;
63import org.sleuthkit.autopsy.ingest.IngestMessage;
64import org.sleuthkit.autopsy.ingest.IngestMonitor;
65import org.sleuthkit.autopsy.ingest.IngestServices;
66import org.sleuthkit.autopsy.ingest.ModuleContentEvent;
67import org.sleuthkit.autopsy.modules.embeddedfileextractor.FileTaskExecutor.FileTaskFailedException;
68import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
69import org.sleuthkit.datamodel.AbstractFile;
70import org.sleuthkit.datamodel.Blackboard;
71import org.sleuthkit.datamodel.BlackboardArtifact;
72import org.sleuthkit.datamodel.BlackboardAttribute;
73import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT;
74import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION;
75import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME;
76import org.sleuthkit.datamodel.Content;
77import org.sleuthkit.datamodel.DerivedFile;
78import org.sleuthkit.datamodel.EncodedFileOutputStream;
79import org.sleuthkit.datamodel.ReadContentInputStream;
80import org.sleuthkit.datamodel.Score;
81import org.sleuthkit.datamodel.SleuthkitCase.CaseDbTransaction;
82import org.sleuthkit.datamodel.TskCoreException;
83import org.sleuthkit.datamodel.TskData;
84
89class SevenZipExtractor {
90
91 private static final Logger logger = Logger.getLogger(SevenZipExtractor.class.getName());
92
93 private static final String MODULE_NAME = EmbeddedFileExtractorModuleFactory.getModuleName();
94
95 //encryption type strings
96 private static final String ENCRYPTION_FILE_LEVEL = NbBundle.getMessage(EmbeddedFileExtractorIngestModule.class,
97 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.encryptionFileLevel");
98 private static final String ENCRYPTION_FULL = EncryptionDetectionModuleFactory.PASSWORD_PROTECT_MESSAGE;
99
100 //zip bomb detection
101 private static final int MAX_DEPTH = 4;
102 private static final int MAX_COMPRESSION_RATIO = 600;
103 private static final long MIN_COMPRESSION_RATIO_SIZE = 500 * 1000000L;
104 private static final long MIN_FREE_DISK_SPACE = 1 * 1000 * 1000000L; //1GB
105
106 private IngestServices services = IngestServices.getInstance();
107 private final IngestJobContext context;
108 private final FileTypeDetector fileTypeDetector;
109 private final FileTaskExecutor fileTaskExecutor;
110
111 private String moduleDirRelative;
112 private String moduleDirAbsolute;
113
114 private Blackboard blackboard;
115
116 private ProgressHandle progress;
117 private int numItems;
118 private String currentArchiveName;
119
124
125 ZIP("application/zip"), //NON-NLS
126 SEVENZ("application/x-7z-compressed"), //NON-NLS
127 GZIP("application/gzip"), //NON-NLS
128 XGZIP("application/x-gzip"), //NON-NLS
129 XBZIP2("application/x-bzip2"), //NON-NLS
130 XTAR("application/x-tar"), //NON-NLS
131 XGTAR("application/x-gtar"),
132 XRAR("application/x-rar-compressed"); //NON-NLS
133
134 private final String mimeType;
135
137 this.mimeType = mimeType;
138 }
139
140 @Override
141 public String toString() {
142 return this.mimeType;
143 }
144 // TODO Expand to support more formats after upgrading Tika
145 }
146
167 SevenZipExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute, FileTaskExecutor fileTaskExecutor) throws SevenZipNativeInitializationException {
168 if (!SevenZip.isInitializedSuccessfully()) {
169 throw new SevenZipNativeInitializationException("SevenZip has not been previously initialized.");
170 }
171 this.context = context;
172 this.fileTypeDetector = fileTypeDetector;
173 this.moduleDirRelative = moduleDirRelative;
174 this.moduleDirAbsolute = moduleDirAbsolute;
175 this.fileTaskExecutor = fileTaskExecutor;
176 }
177
186 boolean isSevenZipExtractionSupported(AbstractFile file) {
187 String fileMimeType = fileTypeDetector.getMIMEType(file);
188 for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) {
189 if (checkForIngestCancellation(file)) {
190 break;
191 }
192 if (mimeType.toString().equals(fileMimeType)) {
193 return true;
194 }
195 }
196 return false;
197 }
198
199 boolean isSevenZipExtractionSupported(String mimeType) {
201 if (mimeType.contains(supportedMimeType.toString())) {
202 return true;
203 }
204 }
205 return false;
206 }
207
218 private boolean checkForIngestCancellation(AbstractFile file) {
219 if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
220 logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
221 return true;
222 }
223 return false;
224 }
225
249 private boolean isZipBombArchiveItemCheck(AbstractFile archiveFile, IInArchive inArchive, int inArchiveItemIndex, ConcurrentHashMap<Long, Archive> depthMap, String escapedFilePath) {
250 //If a file is corrupted as a result of reconstructing it from unallocated space, then
251 //7zip does a poor job estimating the original uncompressed file size.
252 //As a result, many corrupted files have wonky compression ratios and could flood the UI
253 //with false zip bomb notifications. The decision was made to skip compression ratio checks
254 //for unallocated zip files. Instead, we let the depth be an indicator of a zip bomb.
255 //Gzip archives compress a single file. They may have a sparse file,
256 //and that file could be much larger, however it won't be the exponential growth seen with more dangerous zip bombs.
257 //In addition a fair number of browser cache files will be gzip archives,
258 //and their file sizes are frequently retrieved incorrectly so ignoring gzip files is a reasonable decision.
259 if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC) || archiveFile.getMIMEType().equalsIgnoreCase(SupportedArchiveExtractionFormats.XGZIP.toString())) {
260 return false;
261 }
262
263 try {
264 final Long archiveItemSize = (Long) inArchive.getProperty(
265 inArchiveItemIndex, PropID.SIZE);
266
267 //skip the check for small files
268 if (archiveItemSize == null || archiveItemSize < MIN_COMPRESSION_RATIO_SIZE) {
269 return false;
270 }
271
272 final Long archiveItemPackedSize = (Long) inArchive.getProperty(
273 inArchiveItemIndex, PropID.PACKED_SIZE);
274
275 if (archiveItemPackedSize == null || archiveItemPackedSize <= 0) {
276 logger.log(Level.WARNING, "Cannot getting compression ratio, cannot detect if zipbomb: {0}, item: {1}", //NON-NLS
277 new Object[]{archiveFile.getName(), (String) inArchive.getProperty(inArchiveItemIndex, PropID.PATH)}); //NON-NLS
278 return false;
279 }
280
281 int cRatio = (int) (archiveItemSize / archiveItemPackedSize);
282
283 if (cRatio >= MAX_COMPRESSION_RATIO) {
284 Archive rootArchive = depthMap.get(depthMap.get(archiveFile.getId()).getRootArchiveId());
285 String details = NbBundle.getMessage(SevenZipExtractor.class,
286 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnDetails",
287 cRatio, FileUtil.escapeFileName(getArchiveFilePath(rootArchive.getArchiveFile())));
288
289 flagRootArchiveAsZipBomb(rootArchive, archiveFile, details, escapedFilePath);
290 return true;
291 } else {
292 return false;
293 }
294
295 } catch (SevenZipException ex) {
296 logger.log(Level.WARNING, "Error getting archive item size and cannot detect if zipbomb. ", ex); //NON-NLS
297 return false;
298 }
299 }
300
312 private void flagRootArchiveAsZipBomb(Archive rootArchive, AbstractFile archiveFile, String details, String escapedFilePath) {
313 rootArchive.flagAsZipBomb();
314 logger.log(Level.INFO, details);
315
316 String setName = "Possible Zip Bomb";
317 try {
318 Collection<BlackboardAttribute> attributes = Arrays.asList(
319 new BlackboardAttribute(
320 TSK_SET_NAME, MODULE_NAME,
321 setName),
322 new BlackboardAttribute(
323 TSK_DESCRIPTION, MODULE_NAME,
324 Bundle.SevenZipExtractor_zipBombArtifactCreation_text(archiveFile.getName())),
325 new BlackboardAttribute(
326 TSK_COMMENT, MODULE_NAME,
327 details));
328
329 if (!blackboard.artifactExists(archiveFile, BlackboardArtifact.Type.TSK_INTERESTING_ITEM, attributes)) {
330 BlackboardArtifact artifact = rootArchive.getArchiveFile().newAnalysisResult(
331 BlackboardArtifact.Type.TSK_INTERESTING_ITEM, Score.SCORE_LIKELY_NOTABLE,
332 null, setName, null,
333 attributes)
334 .getAnalysisResult();
335
336 try {
337 /*
338 * post the artifact which will index the artifact for
339 * keyword search, and fire an event to notify UI of this
340 * new artifact
341 */
342 blackboard.postArtifact(artifact, MODULE_NAME, context.getJobId());
343
344 String msg = NbBundle.getMessage(SevenZipExtractor.class,
345 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnMsg", archiveFile.getName(), escapedFilePath);//NON-NLS
346
347 services.postMessage(IngestMessage.createWarningMessage(MODULE_NAME, msg, details));
348
349 } catch (Blackboard.BlackboardException ex) {
350 logger.log(Level.SEVERE, "Unable to index blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
351 MessageNotifyUtil.Notify.error(
352 Bundle.SevenZipExtractor_indexError_message(), artifact.getDisplayName());
353 }
354 }
355 } catch (TskCoreException ex) {
356 logger.log(Level.SEVERE, "Error creating blackboard artifact for Zip Bomb Detection for file: " + escapedFilePath, ex); //NON-NLS
357 }
358 }
359
368 private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) {
369 // try to get the file type from the BB
370 String detectedFormat;
371 detectedFormat = archiveFile.getMIMEType();
372
373 if (detectedFormat == null) {
374 logger.log(Level.WARNING, "Could not detect format for file: {0}", archiveFile); //NON-NLS
375
376 // if we don't have attribute info then use file extension
377 String extension = archiveFile.getNameExtension();
378 if ("rar".equals(extension)) //NON-NLS
379 {
380 // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
381 // it will be opened incorrectly when using 7zip's built-in auto-detect functionality
382 return RAR;
383 }
384
385 // Otherwise open the archive using 7zip's built-in auto-detect functionality
386 return null;
387 } else if (detectedFormat.contains("application/x-rar-compressed")) //NON-NLS
388 {
389 // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
390 // it will be opened incorrectly when using 7zip's built-in auto-detect functionality
391 return RAR;
392 }
393
394 // Otherwise open the archive using 7zip's built-in auto-detect functionality
395 return null;
396 }
397
408 private long getRootArchiveId(AbstractFile file) throws TskCoreException {
409 long id = file.getId();
410 Content parentContent = file.getParent();
411 while (parentContent != null) {
412 id = parentContent.getId();
413 parentContent = parentContent.getParent();
414 }
415 return id;
416 }
417
437 private List<AbstractFile> getAlreadyExtractedFiles(AbstractFile archiveFile, String archiveFilePath) throws TskCoreException, InterruptedException, FileTaskExecutor.FileTaskFailedException {
438 /*
439 * TODO (Jira-7145): Is this logic correct?
440 */
441 List<AbstractFile> extractedFiles = new ArrayList<>();
442 File outputDirectory = new File(moduleDirAbsolute, EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
443 if (archiveFile.hasChildren() && fileTaskExecutor.exists(outputDirectory)) {
444 Case currentCase = Case.getCurrentCase();
445 FileManager fileManager = currentCase.getServices().getFileManager();
446 extractedFiles.addAll(fileManager.findFilesByParentPath(getRootArchiveId(archiveFile), archiveFilePath));
447 }
448 return extractedFiles;
449 }
450
458 private String getArchiveFilePath(AbstractFile archiveFile) {
459 return archiveFile.getParentPath() + archiveFile.getName();
460 }
461
471 private boolean makeExtractedFilesDirectory(String uniqueArchiveFileName) {
472 boolean success = true;
473 Path rootDirectoryPath = Paths.get(moduleDirAbsolute, uniqueArchiveFileName);
474 File rootDirectory = rootDirectoryPath.toFile();
475 try {
476 if (!fileTaskExecutor.exists(rootDirectory)) {
477 success = fileTaskExecutor.mkdirs(rootDirectory);
478 }
479 } catch (SecurityException | FileTaskFailedException | InterruptedException ex) {
480 logger.log(Level.SEVERE, String.format("Error creating root extracted files directory %s", rootDirectory), ex); //NON-NLS
481 success = false;
482 }
483 return success;
484 }
485
498 private String getPathInArchive(IInArchive archive, int inArchiveItemIndex, AbstractFile archiveFile) throws SevenZipException {
499 String pathInArchive = (String) archive.getProperty(inArchiveItemIndex, PropID.PATH);
500
501 if (pathInArchive == null || pathInArchive.isEmpty()) {
502 //some formats (.tar.gz) may not be handled correctly -- file in archive has no name/path
503 //handle this for .tar.gz and tgz but assuming the child is tar,
504 //otherwise, unpack using itemNumber as name
505
506 //TODO this should really be signature based, not extension based
507 String archName = archiveFile.getName();
508 int dotI = archName.lastIndexOf(".");
509 String useName = null;
510 if (dotI != -1) {
511 String base = archName.substring(0, dotI);
512 String ext = archName.substring(dotI);
513 int colonIndex = ext.lastIndexOf(":");
514 if (colonIndex != -1) {
515 // If alternate data stream is found, fix the name
516 // so Windows doesn't choke on the colon character.
517 ext = ext.substring(0, colonIndex);
518 }
519 switch (ext) {
520 case ".gz": //NON-NLS
521 useName = base;
522 break;
523 case ".tgz": //NON-NLS
524 useName = base + ".tar"; //NON-NLS
525 break;
526 case ".bz2": //NON-NLS
527 useName = base;
528 break;
529 }
530 }
531 if (useName == null) {
532 pathInArchive = "/" + archName + "/" + Integer.toString(inArchiveItemIndex);
533 } else {
534 pathInArchive = "/" + useName;
535 }
536 }
537 return pathInArchive;
538 }
539
540 private byte[] getPathBytesInArchive(IInArchive archive, int inArchiveItemIndex, AbstractFile archiveFile) throws SevenZipException {
541 return (byte[]) archive.getProperty(inArchiveItemIndex, PropID.PATH_BYTES);
542 }
543
544 /*
545 * Get the String that will represent the key for the hashmap which keeps
546 * track of existing files from an AbstractFile
547 */
548 private String getKeyAbstractFile(AbstractFile fileInDatabase) {
549 return fileInDatabase == null ? null : fileInDatabase.getParentPath() + fileInDatabase.getName();
550 }
551
552 /*
553 * Get the String that will represent the key for the hashmap which keeps
554 * track of existing files from an unpacked node and the archiveFilePath
555 */
556 private String getKeyFromUnpackedNode(UnpackedTree.UnpackedNode node, String archiveFilePath) {
557 return node == null ? null : archiveFilePath + "/" + node.getFileName();
558 }
559
567 void unpack(AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) {
568 unpack(archiveFile, depthMap, null);
569 }
570
582 @Messages({"SevenZipExtractor.indexError.message=Failed to index encryption detected artifact for keyword search.",
583 "# {0} - rootArchive",
584 "SevenZipExtractor.zipBombArtifactCreation.text=Zip Bomb Detected {0}"})
585 boolean unpack(AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap, String password) {
586 boolean unpackSuccessful = true; //initialized to true change to false if any files fail to extract and
587 boolean hasEncrypted = false;
588 boolean fullEncryption = true;
589 boolean progressStarted = false;
590 final String archiveFilePath = getArchiveFilePath(archiveFile);
591 final String escapedArchiveFilePath = FileUtil.escapeFileName(archiveFilePath);
592 HashMap<String, ZipFileStatusWrapper> statusMap = new HashMap<>();
593 List<AbstractFile> unpackedFiles = Collections.<AbstractFile>emptyList();
594
595 currentArchiveName = archiveFile.getName();
596
597 SevenZipContentReadStream stream = null;
598 progress = ProgressHandle.createHandle(Bundle.EmbeddedFileExtractorIngestModule_ArchiveExtractor_moduleName());
599 //recursion depth check for zip bomb
600 Archive parentAr;
601 try {
602 blackboard = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard();
603 } catch (NoCurrentCaseException ex) {
604 logger.log(Level.INFO, "Exception while getting open case.", ex); //NON-NLS
605 unpackSuccessful = false;
606 return unpackSuccessful;
607 }
608 if (checkForIngestCancellation(archiveFile)) {
609 return false;
610 }
611 try {
612 List<AbstractFile> existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath);
613 for (AbstractFile file : existingFiles) {
614 statusMap.put(getKeyAbstractFile(file), new ZipFileStatusWrapper(file, ZipFileStatus.EXISTS));
615 }
616 } catch (TskCoreException | FileTaskFailedException | InterruptedException ex) {
617 logger.log(Level.SEVERE, String.format("Error checking if %s has already been processed, skipping", escapedArchiveFilePath), ex); //NON-NLS
618 unpackSuccessful = false;
619 return unpackSuccessful;
620 }
621 if (checkForIngestCancellation(archiveFile)) {
622 return false;
623 }
624 parentAr = depthMap.get(archiveFile.getId());
625 if (parentAr == null) {
626 parentAr = new Archive(0, archiveFile.getId(), archiveFile);
627 depthMap.put(archiveFile.getId(), parentAr);
628 } else {
629 Archive rootArchive = depthMap.get(parentAr.getRootArchiveId());
630 if (rootArchive.isFlaggedAsZipBomb()) {
631 //skip this archive as the root archive has already been determined to contain a zip bomb
632 unpackSuccessful = false;
633 return unpackSuccessful;
634 } else if (parentAr.getDepth() == MAX_DEPTH) {
635 String details = NbBundle.getMessage(SevenZipExtractor.class,
636 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnDetails.zipBomb",
637 parentAr.getDepth(), FileUtil.escapeFileName(getArchiveFilePath(rootArchive.getArchiveFile())));
638 flagRootArchiveAsZipBomb(rootArchive, archiveFile, details, escapedArchiveFilePath);
639 unpackSuccessful = false;
640 return unpackSuccessful;
641 }
642 }
643 if (checkForIngestCancellation(archiveFile)) {
644 return false;
645 }
646 IInArchive inArchive = null;
647 try {
648 stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile));
649 // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
650 // it will be opened incorrectly when using 7zip's built-in auto-detect functionality.
651 // All other archive formats are still opened using 7zip built-in auto-detect functionality.
652 ArchiveFormat options = get7ZipOptions(archiveFile);
653 if (checkForIngestCancellation(archiveFile)) {
654 return false;
655 }
656 if (password == null) {
657 inArchive = SevenZip.openInArchive(options, stream);
658 } else {
659 inArchive = SevenZip.openInArchive(options, stream, password);
660 }
661 numItems = inArchive.getNumberOfItems();
662 progress.start(numItems);
663 progressStarted = true;
664 if (checkForIngestCancellation(archiveFile)) {
665 return false;
666 }
667 //setup the archive local root folder
668 final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
669 if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) {
670 return false;
671 }
672
673 //initialize tree hierarchy to keep track of unpacked file structure
674 SevenZipExtractor.UnpackedTree unpackedTree = new SevenZipExtractor.UnpackedTree(moduleDirRelative + "/" + uniqueArchiveFileName, archiveFile);
675
676 long freeDiskSpace;
677 try {
678 freeDiskSpace = services.getFreeDiskSpace();
679 } catch (NullPointerException ex) {
680 //If ingest has not been run at least once getFreeDiskSpace() will throw a null pointer exception
681 //currently getFreeDiskSpace always returns DISK_FREE_SPACE_UNKNOWN
682 freeDiskSpace = IngestMonitor.DISK_FREE_SPACE_UNKNOWN;
683 }
684
685 Map<Integer, InArchiveItemDetails> archiveDetailsMap = new HashMap<>();
686 for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) {
687 if (checkForIngestCancellation(archiveFile)) {
688 return false;
689 }
690 progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0);
691 if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) {
692 unpackSuccessful = false;
693 return unpackSuccessful;
694 }
695
696 String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile);
697 byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile);
698 UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive);
699 if (checkForIngestCancellation(archiveFile)) {
700 return false;
701 }
702 final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED);
703
704 if (isEncrypted && password == null) {
705 logger.log(Level.WARNING, "Skipping encrypted file in archive: {0}", pathInArchive); //NON-NLS
706 hasEncrypted = true;
707 unpackSuccessful = false;
708 continue;
709 } else {
710 fullEncryption = false;
711 }
712
713 // NOTE: item size may return null in case of certain
714 // archiving formats. Eg: BZ2
715 //check if unpacking this file will result in out of disk space
716 //this is additional to zip bomb prevention mechanism
717 Long archiveItemSize = (Long) inArchive.getProperty(
718 inArchiveItemIndex, PropID.SIZE);
719 if (freeDiskSpace != IngestMonitor.DISK_FREE_SPACE_UNKNOWN && archiveItemSize != null && archiveItemSize > 0) { //if free space is known and file is not empty.
720 String archiveItemPath = (String) inArchive.getProperty(
721 inArchiveItemIndex, PropID.PATH);
722 long newDiskSpace = freeDiskSpace - archiveItemSize;
723 if (newDiskSpace < MIN_FREE_DISK_SPACE) {
724 String msg = NbBundle.getMessage(SevenZipExtractor.class,
725 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.msg",
726 escapedArchiveFilePath, archiveItemPath);
727 String details = NbBundle.getMessage(SevenZipExtractor.class,
728 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.details");
729 services.postMessage(IngestMessage.createErrorMessage(MODULE_NAME, msg, details));
730 logger.log(Level.INFO, "Skipping archive item due to insufficient disk space: {0}, {1}", new String[]{escapedArchiveFilePath, archiveItemPath}); //NON-NLS
731 logger.log(Level.INFO, "Available disk space: {0}", new Object[]{freeDiskSpace}); //NON-NLS
732 unpackSuccessful = false;
733 continue; //skip this file
734 } else {
735 //update est. disk space during this archive, so we don't need to poll for every file extracted
736 freeDiskSpace = newDiskSpace;
737 }
738 }
739 if (checkForIngestCancellation(archiveFile)) {
740 return false;
741 }
742 final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex);
743 final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName;
744 final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName;
745
746 //create local dirs and empty files before extracted
747 //cannot rely on files in top-bottom order
748 File localFile = new File(localAbsPath);
749 boolean localFileExists;
750 try {
751 if ((Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER)) {
752 localFileExists = findOrCreateDirectory(localFile);
753 } else {
754 localFileExists = findOrCreateEmptyFile(localFile);
755 }
756 } catch (FileTaskFailedException | InterruptedException ex) {
757 localFileExists = false;
758 logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS
759 }
760 if (checkForIngestCancellation(archiveFile)) {
761 return false;
762 }
763 // skip the rest of this loop if we couldn't create the file
764 //continue will skip details from being added to the map
765 if (!localFileExists) {
766 logger.log(Level.SEVERE, String.format("Skipping %s because it could not be created", localFile.getAbsolutePath())); //NON-NLS
767 continue;
768 }
769
770 //Store archiveItemIndex with local paths and unpackedNode reference.
771 //Necessary for the extract call back to write the current archive
772 //file to the correct disk location and to correctly update it's
773 //corresponding unpackedNode
774 archiveDetailsMap.put(inArchiveItemIndex, new InArchiveItemDetails(
775 unpackedNode, localAbsPath, localRelPath));
776 }
777
778 int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap);
779 if (checkForIngestCancellation(archiveFile)) {
780 return false;
781 }
782 StandardIArchiveExtractCallback archiveCallBack
784 inArchive, archiveFile, progress,
785 archiveDetailsMap, password, freeDiskSpace);
786
787 //According to the documentation, indices in sorted order are optimal
788 //for efficiency. Hence, the HashMap and linear processing of
789 //inArchiveItemIndex. False indicates non-test mode
790 inArchive.extract(extractionIndices, false, archiveCallBack);
791 if (checkForIngestCancellation(archiveFile)) {
792 return false;
793 }
794 unpackSuccessful &= archiveCallBack.wasSuccessful();
795
796 archiveDetailsMap = null;
797
798 // add them to the DB. We wait until the end so that we have the metadata on all of the
799 // intermediate nodes since the order is not guaranteed
800 try {
801 unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath, parentAr, archiveFile, depthMap);
802 unpackedTree.commitCurrentTransaction();
803 } catch (TskCoreException | NoCurrentCaseException ex) {
804 logger.log(Level.SEVERE, "Error populating complete derived file hierarchy from the unpacked dir structure", ex); //NON-NLS
805 //TODO decide if anything to cleanup, for now bailing
806 unpackedTree.rollbackCurrentTransaction();
807 }
808
809 if (checkForIngestCancellation(archiveFile)) {
810 return false;
811 }
812
813 // Get the new files to be added to the case.
814 unpackedFiles = unpackedTree.getAllFileObjects();
815 } catch (SevenZipException | IllegalArgumentException ex) {
816 logger.log(Level.WARNING, "Error unpacking file: " + archiveFile, ex); //NON-NLS
817 //inbox message
818
819 // print a message if the file is allocated
820 if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.ALLOC)) {
821 String msg = NbBundle.getMessage(SevenZipExtractor.class,
822 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.msg",
823 currentArchiveName);
824 String details = NbBundle.getMessage(SevenZipExtractor.class,
825 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.details",
826 escapedArchiveFilePath, ex.getMessage());
827 services.postMessage(IngestMessage.createErrorMessage(MODULE_NAME, msg, details));
828 }
829 } finally {
830 if (inArchive != null) {
831 try {
832 inArchive.close();
833 } catch (SevenZipException e) {
834 logger.log(Level.SEVERE, "Error closing archive: " + archiveFile, e); //NON-NLS
835 }
836 }
837
838 if (stream != null) {
839 try {
840 stream.close();
841 } catch (IOException ex) {
842 logger.log(Level.SEVERE, "Error closing stream after unpacking archive: " + archiveFile, ex); //NON-NLS
843 }
844 }
845
846 //close progress bar
847 if (progressStarted) {
848 progress.finish();
849 }
850 }
851 if (checkForIngestCancellation(archiveFile)) {
852 return false;
853 }
854 //create artifact and send user message
855 if (hasEncrypted) {
856 String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
857 try {
858 BlackboardArtifact artifact = archiveFile.newAnalysisResult(
859 new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED),
860 Score.SCORE_NOTABLE,
861 null, null, encryptionType,
862 Arrays.asList(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT, MODULE_NAME, encryptionType)))
863 .getAnalysisResult();
864
865 try {
866 /*
867 * post the artifact which will index the artifact for
868 * keyword search, and fire an event to notify UI of this
869 * new artifact
870 */
871 blackboard.postArtifact(artifact, MODULE_NAME, context.getJobId());
872 } catch (Blackboard.BlackboardException ex) {
873 logger.log(Level.SEVERE, "Unable to post blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
874 MessageNotifyUtil.Notify.error(
875 Bundle.SevenZipExtractor_indexError_message(), artifact.getDisplayName());
876 }
877
878 } catch (TskCoreException ex) {
879 logger.log(Level.SEVERE, "Error creating blackboard artifact for encryption detected for file: " + escapedArchiveFilePath, ex); //NON-NLS
880 }
881
882 String msg = NbBundle.getMessage(SevenZipExtractor.class,
883 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.msg");
884 String details = NbBundle.getMessage(SevenZipExtractor.class,
885 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.details",
886 currentArchiveName, MODULE_NAME);
887 services.postMessage(IngestMessage.createWarningMessage(MODULE_NAME, msg, details));
888 }
889
890 // adding unpacked extracted derived files to the job after closing relevant resources.
891 if (!unpackedFiles.isEmpty()) {
892 //currently sending a single event for all new files
893 services.fireModuleContentEvent(new ModuleContentEvent(archiveFile));
894 if (context != null) {
895 context.addFilesToJob(unpackedFiles);
896 }
897 }
898
899 return unpackSuccessful;
900 }
901
909 private boolean findOrCreateDirectory(File directory) throws FileTaskFailedException, InterruptedException {
910 if (!fileTaskExecutor.exists(directory)) {
911 return fileTaskExecutor.mkdirs(directory);
912 } else {
913 return true;
914 }
915 }
916
924 private boolean findOrCreateEmptyFile(File file) throws FileTaskFailedException, InterruptedException {
925 if (!fileTaskExecutor.exists(file)) {
926 fileTaskExecutor.mkdirs(file.getParentFile());
927 return fileTaskExecutor.createNewFile(file);
928 } else {
929 return true;
930 }
931 }
932
933 private Charset detectFilenamesCharset(List<byte[]> byteDatas) {
934 Charset detectedCharset = null;
935 CharsetDetector charsetDetector = new CharsetDetector();
936 int byteSum = 0;
937 int fileNum = 0;
938 for (byte[] byteData : byteDatas) {
939 fileNum++;
940 byteSum += byteData.length;
941 // Only read ~1000 bytes of filenames in this directory
942 if (byteSum >= 1000) {
943 break;
944 }
945 }
946 byte[] allBytes = new byte[byteSum];
947 int start = 0;
948 for (int i = 0; i < fileNum; i++) {
949 byte[] byteData = byteDatas.get(i);
950 System.arraycopy(byteData, 0, allBytes, start, byteData.length);
951 start += byteData.length;
952 }
953 charsetDetector.setText(allBytes);
954 CharsetMatch cm = charsetDetector.detect();
955 if (cm != null && cm.getConfidence() >= 90 && Charset.isSupported(cm.getName())) {
956 detectedCharset = Charset.forName(cm.getName());
957 }
958 return detectedCharset;
959 }
960
965 private int[] getExtractableFilesFromDetailsMap(
966 Map<Integer, InArchiveItemDetails> archiveDetailsMap) {
967
968 Integer[] wrappedExtractionIndices = archiveDetailsMap.keySet()
969 .toArray(new Integer[archiveDetailsMap.size()]);
970
971 return Arrays.stream(wrappedExtractionIndices)
972 .mapToInt(Integer::intValue)
973 .toArray();
974
975 }
976
984 private final static class UnpackStream implements ISequentialOutStream {
985
986 private EncodedFileOutputStream output;
987 private String localAbsPath;
988 private int bytesWritten;
989 private static final Tika tika = new Tika();
990 private String mimeType = "";
991
992 UnpackStream(String localAbsPath) throws IOException {
993 this.output = new EncodedFileOutputStream(new FileOutputStream(localAbsPath), TskData.EncodingType.XOR1);
994 this.localAbsPath = localAbsPath;
995 this.bytesWritten = 0;
996 }
997
998 public void setNewOutputStream(String localAbsPath) throws IOException {
999 this.output.close();
1000 this.output = new EncodedFileOutputStream(new FileOutputStream(localAbsPath), TskData.EncodingType.XOR1);
1001 this.localAbsPath = localAbsPath;
1002 this.bytesWritten = 0;
1003 this.mimeType = "";
1004 }
1005
1006 public int getSize() {
1007 return bytesWritten;
1008 }
1009
1010 @Override
1011 public int write(byte[] bytes) throws SevenZipException {
1012 try {
1013 // Detect MIME type now while the file is in memory
1014 if (bytesWritten == 0) {
1015 mimeType = tika.detect(bytes);
1016 }
1017 output.write(bytes);
1018 this.bytesWritten += bytes.length;
1019 } catch (IOException ex) {
1020 throw new SevenZipException(
1021 NbBundle.getMessage(SevenZipExtractor.class,
1022 "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackStream.write.exception.msg",
1023 localAbsPath), ex);
1024 }
1025 return bytes.length;
1026 }
1027
1028 public String getMIMEType() {
1029 return mimeType;
1030 }
1031
1032 public void close() throws IOException {
1033 try (EncodedFileOutputStream out = output) {
1034 out.flush();
1035 }
1036 }
1037
1038 }
1039
1043 private static class InArchiveItemDetails {
1044
1046 private final String localAbsPath;
1047 private final String localRelPath;
1048
1049 InArchiveItemDetails(
1050 SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode,
1051 String localAbsPath, String localRelPath) {
1052 this.unpackedNode = unpackedNode;
1053 this.localAbsPath = localAbsPath;
1054 this.localRelPath = localRelPath;
1055 }
1056
1060
1061 public String getLocalAbsPath() {
1062 return localAbsPath;
1063 }
1064
1065 public String getLocalRelPath() {
1066 return localRelPath;
1067 }
1068 }
1069
1074 private static class StandardIArchiveExtractCallback
1075 implements IArchiveExtractCallback, ICryptoGetTextPassword {
1076
1077 private final AbstractFile archiveFile;
1078 private final IInArchive inArchive;
1080 private final Map<Integer, InArchiveItemDetails> archiveDetailsMap;
1081 private final ProgressHandle progressHandle;
1082
1084
1086 private long modTimeInSeconds;
1088
1089 private boolean isFolder;
1090 private final String password;
1091
1092 private boolean unpackSuccessful = true;
1093
1094 StandardIArchiveExtractCallback(IInArchive inArchive,
1095 AbstractFile archiveFile, ProgressHandle progressHandle,
1096 Map<Integer, InArchiveItemDetails> archiveDetailsMap,
1097 String password, long freeDiskSpace) {
1098 this.inArchive = inArchive;
1099 this.progressHandle = progressHandle;
1100 this.archiveFile = archiveFile;
1101 this.archiveDetailsMap = archiveDetailsMap;
1102 this.password = password;
1103 }
1104
1119 @Override
1120 public ISequentialOutStream getStream(int inArchiveItemIndex,
1121 ExtractAskMode mode) throws SevenZipException {
1122
1123 this.inArchiveItemIndex = inArchiveItemIndex;
1124
1125 isFolder = (Boolean) inArchive
1126 .getProperty(inArchiveItemIndex, PropID.IS_FOLDER);
1127 if (isFolder || mode != ExtractAskMode.EXTRACT) {
1128 return null;
1129 }
1130
1131 final String localAbsPath = archiveDetailsMap.get(
1132 inArchiveItemIndex).getLocalAbsPath();
1133
1134 //If the Unpackstream has been allocated, then set the Outputstream
1135 //to another file rather than creating a new unpack stream. The 7Zip
1136 //binding has a memory leak, so creating new unpack streams will not be
1137 //dereferenced. As a fix, we create one UnpackStream, and mutate its state,
1138 //so that there only exists one 8192 byte buffer in memory per archive.
1139 try {
1140 if (unpackStream != null) {
1141 unpackStream.setNewOutputStream(localAbsPath);
1142 } else {
1143 unpackStream = new UnpackStream(localAbsPath);
1144 }
1145 } catch (IOException ex) {
1146 logger.log(Level.WARNING, String.format("Error opening or setting new stream " //NON-NLS
1147 + "for archive file at %s", localAbsPath), ex.getMessage()); //NON-NLS
1148 return null;
1149 }
1150
1151 return unpackStream;
1152 }
1153
1162 @Override
1163 public void prepareOperation(ExtractAskMode mode) throws SevenZipException {
1164 final Date createTime = (Date) inArchive.getProperty(
1165 inArchiveItemIndex, PropID.CREATION_TIME);
1166 final Date accessTime = (Date) inArchive.getProperty(
1167 inArchiveItemIndex, PropID.LAST_ACCESS_TIME);
1168 final Date writeTime = (Date) inArchive.getProperty(
1169 inArchiveItemIndex, PropID.LAST_MODIFICATION_TIME);
1170
1171 createTimeInSeconds = createTime == null ? 0L
1172 : createTime.getTime() / 1000;
1173 modTimeInSeconds = writeTime == null ? 0L
1174 : writeTime.getTime() / 1000;
1175 accessTimeInSeconds = accessTime == null ? 0L
1176 : accessTime.getTime() / 1000;
1177
1178 progressHandle.progress(archiveFile.getName() + ": "
1179 + (String) inArchive.getProperty(inArchiveItemIndex, PropID.PATH),
1181
1182 }
1183
1192 @Override
1193 public void setOperationResult(ExtractOperationResult result) throws SevenZipException {
1194
1196 = archiveDetailsMap.get(inArchiveItemIndex).getUnpackedNode();
1197 final String localRelPath = archiveDetailsMap.get(
1198 inArchiveItemIndex).getLocalRelPath();
1199 if (isFolder) {
1200 unpackedNode.addDerivedInfo(0,
1201 !(Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER),
1203 localRelPath);
1204 return;
1205 } else {
1206 unpackedNode.setMimeType(unpackStream.getMIMEType());
1207 }
1208
1209 final String localAbsPath = archiveDetailsMap.get(
1210 inArchiveItemIndex).getLocalAbsPath();
1211 if (result != ExtractOperationResult.OK) {
1212 if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC)) {
1213 logger.log(Level.WARNING, "Extraction of : {0} encountered error {1} (file is unallocated and may be corrupt)", //NON-NLS
1214 new Object[]{localAbsPath, result});
1215 } else {
1216 logger.log(Level.WARNING, "Extraction of : {0} encountered error {1}", //NON-NLS
1217 new Object[]{localAbsPath, result});
1218 }
1219 unpackSuccessful = false;
1220 }
1221
1222 //record derived data in unode, to be traversed later after unpacking the archive
1223 unpackedNode.addDerivedInfo(unpackStream.getSize(),
1224 !(Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER),
1226
1227 try {
1228 unpackStream.close();
1229 } catch (IOException e) {
1230 logger.log(Level.WARNING, "Error closing unpack stream for file: {0}", localAbsPath); //NON-NLS
1231 }
1232 }
1233
1234 @Override
1235 public void setTotal(long value) throws SevenZipException {
1236 //Not necessary for extract, left intenionally blank
1237 }
1238
1239 @Override
1240 public void setCompleted(long value) throws SevenZipException {
1241 //Not necessary for extract, left intenionally blank
1242 }
1243
1251 @Override
1252 public String cryptoGetTextPassword() throws SevenZipException {
1253 return password;
1254 }
1255
1256 public boolean wasSuccessful() {
1257 return unpackSuccessful;
1258 }
1259 }
1260
1268 private class UnpackedTree {
1269
1270 final UnpackedNode rootNode;
1271 private int nodesProcessed = 0;
1272
1273 // It is significantly faster to add the DerivedFiles to the case on a transaction,
1274 // but we don't want to hold the transaction (and case write lock) for the entire
1275 // stage. Instead, we use the same transaction for MAX_TRANSACTION_SIZE database operations
1276 // and then commit that transaction and start a new one, giving at least a short window
1277 // for other processes.
1278 private CaseDbTransaction currentTransaction = null;
1279 private long transactionCounter = 0;
1280 private final static long MAX_TRANSACTION_SIZE = 1000;
1281
1288 UnpackedTree(String localPathRoot, AbstractFile archiveFile) {
1289 this.rootNode = new UnpackedNode();
1290 this.rootNode.setFile(archiveFile);
1291 this.rootNode.setFileName(archiveFile.getName());
1292 this.rootNode.setLocalRelPath(localPathRoot);
1293 }
1294
1304 UnpackedNode addNode(String filePath, byte[] filePathBytes) {
1305 String[] toks = filePath.split("[\\/\\\\]");
1306 List<String> tokens = new ArrayList<>();
1307 for (int i = 0; i < toks.length; ++i) {
1308 if (!toks[i].isEmpty()) {
1309 tokens.add(toks[i]);
1310 }
1311 }
1312
1313 List<byte[]> byteTokens;
1314 if (filePathBytes == null) {
1315 return addNode(rootNode, tokens, null);
1316 } else {
1317 byteTokens = new ArrayList<>(tokens.size());
1318 int last = 0;
1319 for (int i = 0; i < filePathBytes.length; i++) {
1320 if (filePathBytes[i] == '/') {
1321 int len = i - last;
1322 if (len > 0) {
1323 byte[] arr = new byte[len];
1324 System.arraycopy(filePathBytes, last, arr, 0, len);
1325 byteTokens.add(arr);
1326 }
1327 last = i + 1;
1328 }
1329 }
1330 int len = filePathBytes.length - last;
1331 if (len > 0) {
1332 byte[] arr = new byte[len];
1333 System.arraycopy(filePathBytes, last, arr, 0, len);
1334 byteTokens.add(arr);
1335 }
1336
1337 if (tokens.size() != byteTokens.size()) {
1338 String rootFileInfo = "(unknown)";
1339 if (rootNode.getFile() != null) {
1340 rootFileInfo = rootNode.getFile().getParentPath() + rootNode.getFile().getName()
1341 + "(ID: " + rootNode.getFile().getId() + ")";
1342 }
1343 logger.log(Level.WARNING, "Could not map path bytes to path string while extracting archive {0} (path string: \"{1}\", bytes: {2})",
1344 new Object[]{rootFileInfo, this.rootNode.getFile().getId(), filePath, bytesToString(filePathBytes)});
1345 return addNode(rootNode, tokens, null);
1346 }
1347 }
1348
1349 return addNode(rootNode, tokens, byteTokens);
1350 }
1351
1359 private String bytesToString(byte[] bytes) {
1360 StringBuilder result = new StringBuilder();
1361 for (byte b : bytes) {
1362 result.append(String.format("%02x", b));
1363 }
1364 return result.toString();
1365 }
1366
1377 List<String> tokenPath, List<byte[]> tokenPathBytes) {
1378 // we found all of the tokens
1379 if (tokenPath.isEmpty()) {
1380 return parent;
1381 }
1382
1383 // get the next name in the path and look it up
1384 String childName = tokenPath.remove(0);
1385 byte[] childNameBytes = null;
1386 if (tokenPathBytes != null) {
1387 childNameBytes = tokenPathBytes.remove(0);
1388 }
1389 UnpackedNode child = parent.getChild(childName);
1390 // create new node
1391 if (child == null) {
1392 child = new UnpackedNode(childName, parent);
1393 child.setFileNameBytes(childNameBytes);
1394 parent.addChild(child);
1395 }
1396
1397 // go down one more level
1398 return addNode(child, tokenPath, tokenPathBytes);
1399 }
1400
1407 List<AbstractFile> getRootFileObjects() {
1408 List<AbstractFile> ret = new ArrayList<>();
1409 rootNode.getChildren().forEach((child) -> {
1410 ret.add(child.getFile());
1411 });
1412 return ret;
1413 }
1414
1421 List<AbstractFile> getAllFileObjects() {
1422 List<AbstractFile> ret = new ArrayList<>();
1423 rootNode.getChildren().forEach((child) -> {
1424 getAllFileObjectsRec(ret, child);
1425 });
1426 return ret;
1427 }
1428
1429 private void getAllFileObjectsRec(List<AbstractFile> list, UnpackedNode parent) {
1430 list.add(parent.getFile());
1431 parent.getChildren().forEach((child) -> {
1432 getAllFileObjectsRec(list, child);
1433 });
1434 }
1435
1440 void updateOrAddFileToCaseRec(HashMap<String, ZipFileStatusWrapper> statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) throws TskCoreException, NoCurrentCaseException {
1442 for (UnpackedNode child : rootNode.getChildren()) {
1443 updateOrAddFileToCaseRec(child, fileManager, statusMap, archiveFilePath, parentAr, archiveFile, depthMap);
1444 }
1445 }
1446
1464 private void updateOrAddFileToCaseRec(UnpackedNode node, FileManager fileManager, HashMap<String, ZipFileStatusWrapper> statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) throws TskCoreException {
1465 DerivedFile df;
1466 progress.progress(String.format("%s: Adding/updating files in case database (%d of %d)", currentArchiveName, ++nodesProcessed, numItems));
1467 try {
1468 String nameInDatabase = getKeyFromUnpackedNode(node, archiveFilePath);
1469 ZipFileStatusWrapper existingFile = nameInDatabase == null ? null : statusMap.get(nameInDatabase);
1470 if (existingFile == null) {
1471 df = Case.getCurrentCaseThrows().getSleuthkitCase().addDerivedFile(node.getFileName(), node.getLocalRelPath(), node.getSize(),
1472 node.getCtime(), node.getCrtime(), node.getAtime(), node.getMtime(),
1473 node.isIsFile(), node.getParent().getFile(), "", MODULE_NAME,
1474 "", "", TskData.EncodingType.XOR1, getCurrentTransaction());
1475 statusMap.put(getKeyAbstractFile(df), new ZipFileStatusWrapper(df, ZipFileStatus.EXISTS));
1476 } else {
1477 String key = getKeyAbstractFile(existingFile.getFile());
1478 if (existingFile.getStatus() == ZipFileStatus.EXISTS && existingFile.getFile().getSize() < node.getSize()) {
1479 existingFile.setStatus(ZipFileStatus.UPDATE);
1480 statusMap.put(key, existingFile);
1481 }
1482 if (existingFile.getStatus() == ZipFileStatus.UPDATE) {
1483 //if the we are updating a file and its mime type was octet-stream we want to re-type it
1484 String mimeType = existingFile.getFile().getMIMEType().equalsIgnoreCase("application/octet-stream") ? null : existingFile.getFile().getMIMEType();
1485 df = Case.getCurrentCaseThrows().getSleuthkitCase().updateDerivedFile((DerivedFile) existingFile.getFile(), node.getLocalRelPath(), node.getSize(),
1486 node.getCtime(), node.getCrtime(), node.getAtime(), node.getMtime(),
1487 node.isIsFile(), mimeType, "", MODULE_NAME,
1488 "", "", TskData.EncodingType.XOR1, existingFile.getFile().getParent(), getCurrentTransaction());
1489 } else {
1490 //ALREADY CURRENT - SKIP
1491 statusMap.put(key, new ZipFileStatusWrapper(existingFile.getFile(), ZipFileStatus.SKIP));
1492 df = (DerivedFile) existingFile.getFile();
1493 }
1494 }
1495 node.setFile(df);
1496 } catch (TskCoreException | NoCurrentCaseException ex) {
1497 logger.log(Level.SEVERE, "Error adding a derived file to db:" + node.getFileName(), ex); //NON-NLS
1498 throw new TskCoreException(
1499 NbBundle.getMessage(SevenZipExtractor.class, "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackedTree.exception.msg",
1500 node.getFileName()), ex);
1501 }
1502
1503 // Determine encoding of children
1504 if (node.getChildren().size() > 0) {
1505 String names = "";
1506 ArrayList<byte[]> byteDatas = new ArrayList<>();
1507 for (UnpackedNode child : node.getChildren()) {
1508 byte[] childBytes = child.getFileNameBytes();
1509 if (childBytes != null) {
1510 byteDatas.add(childBytes);
1511 }
1512 names += child.getFileName();
1513 }
1514 Charset detectedCharset = detectFilenamesCharset(byteDatas);
1515
1516 // If a charset was detected, transcode filenames accordingly
1517 if (detectedCharset != null && detectedCharset.canEncode()) {
1518 for (UnpackedNode child : node.getChildren()) {
1519 byte[] childBytes = child.getFileNameBytes();
1520 if (childBytes != null) {
1521 String decodedName = new String(childBytes, detectedCharset);
1522 child.setFileName(decodedName);
1523 }
1524 }
1525 }
1526 }
1527
1528 // Check for zip bombs
1529 if (isSevenZipExtractionSupported(node.getMimeType())) {
1530 Archive child = new Archive(parentAr.getDepth() + 1, parentAr.getRootArchiveId(), archiveFile);
1531 parentAr.addChild(child);
1532 depthMap.put(node.getFile().getId(), child);
1533 }
1534
1535 //recurse adding the children if this file was incomplete the children presumably need to be added
1536 for (UnpackedNode child : node.getChildren()) {
1537 updateOrAddFileToCaseRec(child, fileManager, statusMap, getKeyFromUnpackedNode(node, archiveFilePath), parentAr, archiveFile, depthMap);
1538 }
1539 }
1540
1551 private CaseDbTransaction getCurrentTransaction() throws TskCoreException {
1552
1553 if (currentTransaction == null) {
1555 }
1556
1560 }
1561
1563 return currentTransaction;
1564 }
1565
1571 private void startTransaction() throws TskCoreException {
1572 try {
1575 } catch (NoCurrentCaseException ex) {
1576 throw new TskCoreException("Case is closed");
1577 }
1578 }
1579
1585 private void commitCurrentTransaction() throws TskCoreException {
1586 if (currentTransaction != null) {
1587 currentTransaction.commit();
1588 currentTransaction = null;
1589 }
1590 }
1591
1596 if (currentTransaction != null) {
1597 try {
1598 currentTransaction.rollback();
1599 currentTransaction = null;
1600 } catch (TskCoreException ex) {
1601 // Ignored
1602 }
1603 }
1604 }
1605
1609 private class UnpackedNode {
1610
1611 private String fileName;
1612 private byte[] fileNameBytes;
1613 private AbstractFile file;
1614 private final List<UnpackedNode> children = new ArrayList<>();
1615 private String localRelPath = "";
1616 private long size;
1617 private long ctime, crtime, atime, mtime;
1618 private boolean isFile;
1619 private String mimeType = "";
1620 private UnpackedNode parent;
1621
1622 //root constructor
1623 UnpackedNode() {
1624 }
1625
1626 //child node constructor
1628 this.fileName = fileName;
1629 this.parent = parent;
1630 this.localRelPath = parent.getLocalRelPath() + File.separator + fileName;
1631 }
1632
1633 long getCtime() {
1634 return ctime;
1635 }
1636
1637 long getCrtime() {
1638 return crtime;
1639 }
1640
1641 long getAtime() {
1642 return atime;
1643 }
1644
1645 long getMtime() {
1646 return mtime;
1647 }
1648
1649 void setFileName(String fileName) {
1650 this.fileName = fileName;
1651 }
1652
1658 void addChild(UnpackedNode child) {
1659 children.add(child);
1660 }
1661
1668 List<UnpackedNode> getChildren() {
1669 return children;
1670 }
1671
1677 UnpackedNode getParent() {
1678 return parent;
1679 }
1680
1681 void addDerivedInfo(long size,
1682 boolean isFile,
1683 long ctime, long crtime, long atime, long mtime, String relLocalPath) {
1684 this.size = size;
1685 this.isFile = isFile;
1686 this.ctime = ctime;
1687 this.crtime = crtime;
1688 this.atime = atime;
1689 this.mtime = mtime;
1690 this.localRelPath = relLocalPath;
1691 }
1692
1693 void setFile(AbstractFile file) {
1694 this.file = file;
1695 }
1696
1697 void setMimeType(String mimeType) {
1698 this.mimeType = mimeType;
1699 }
1700
1701 String getMimeType() {
1702 return mimeType;
1703 }
1704
1712 UnpackedNode getChild(String childFileName) {
1713 UnpackedNode ret = null;
1714 for (UnpackedNode child : children) {
1715 if (child.getFileName().equals(childFileName)) {
1716 ret = child;
1717 break;
1718 }
1719 }
1720 return ret;
1721 }
1722
1723 String getFileName() {
1724 return fileName;
1725 }
1726
1727 AbstractFile getFile() {
1728 return file;
1729 }
1730
1731 String getLocalRelPath() {
1732 return localRelPath;
1733 }
1734
1741 void setLocalRelPath(String localRelativePath) {
1742 localRelPath = localRelativePath;
1743 }
1744
1745 long getSize() {
1746 return size;
1747 }
1748
1749 boolean isIsFile() {
1750 return isFile;
1751 }
1752
1753 void setFileNameBytes(byte[] fileNameBytes) {
1754 if (fileNameBytes != null) {
1755 this.fileNameBytes = Arrays.copyOf(fileNameBytes, fileNameBytes.length);
1756 }
1757 }
1758
1759 byte[] getFileNameBytes() {
1760 if (fileNameBytes == null) {
1761 return null;
1762 }
1763 return Arrays.copyOf(fileNameBytes, fileNameBytes.length);
1764 }
1765 }
1766 }
1767
1772 static class Archive {
1773
1774 //depth will be 0 for the root archive unpack was called on, and increase as unpack recurses down through archives contained within
1775 private final int depth;
1776 private final List<Archive> children;
1777 private final long rootArchiveId;
1778 private boolean flaggedAsZipBomb = false;
1779 private final AbstractFile archiveFile;
1780
1793 Archive(int depth, long rootArchiveId, AbstractFile archiveFile) {
1794 this.children = new ArrayList<>();
1795 this.depth = depth;
1796 this.rootArchiveId = rootArchiveId;
1797 this.archiveFile = archiveFile;
1798 }
1799
1806 void addChild(Archive child) {
1807 children.add(child);
1808 }
1809
1814 synchronized void flagAsZipBomb() {
1815 flaggedAsZipBomb = true;
1816 }
1817
1823 synchronized boolean isFlaggedAsZipBomb() {
1824 return flaggedAsZipBomb;
1825 }
1826
1832 AbstractFile getArchiveFile() {
1833 return archiveFile;
1834 }
1835
1841 long getRootArchiveId() {
1842 return rootArchiveId;
1843 }
1844
1850 long getObjectId() {
1851 return archiveFile.getId();
1852 }
1853
1861 int getDepth() {
1862 return depth;
1863 }
1864 }
1865
1870 private final class ZipFileStatusWrapper {
1871
1872 private final AbstractFile abstractFile;
1874
1882 private ZipFileStatusWrapper(AbstractFile file, ZipFileStatus status) {
1883 abstractFile = file;
1884 zipStatus = status;
1885 }
1886
1892 private AbstractFile getFile() {
1893 return abstractFile;
1894 }
1895
1902 return zipStatus;
1903 }
1904
1910 private void setStatus(ZipFileStatus status) {
1911 zipStatus = status;
1912 }
1913
1914 }
1915
1920 private enum ZipFileStatus {
1921 UPDATE, //Should be updated //NON-NLS
1922 SKIP, //File is current can be skipped //NON-NLS
1923 EXISTS //File exists but it is unknown if it is current //NON-NLS
1924 }
1925}
UnpackedNode addNode(UnpackedNode parent, List< String > tokenPath, List< byte[]> tokenPathBytes)
void updateOrAddFileToCaseRec(UnpackedNode node, FileManager fileManager, HashMap< String, ZipFileStatusWrapper > statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap< Long, Archive > depthMap)

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.