19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.lang.IllegalArgumentException;
25 import java.lang.IndexOutOfBoundsException;
26 import java.lang.NullPointerException;
27 import java.nio.file.Paths;
28 import java.util.ArrayList;
29 import java.util.List;
30 import java.util.logging.Level;
31 import org.apache.poi.POIXMLException;
32 import org.apache.poi.hwpf.usermodel.Picture;
33 import org.apache.poi.hslf.usermodel.HSLFPictureData;
34 import org.apache.poi.hslf.usermodel.HSLFSlideShow;
35 import org.apache.poi.hssf.record.RecordInputStream.LeftoverDataException;
36 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
37 import org.apache.poi.hwpf.HWPFDocument;
38 import org.apache.poi.hwpf.model.PicturesTable;
39 import org.apache.poi.sl.usermodel.PictureData.PictureType;
40 import org.apache.poi.ss.usermodel.Workbook;
41 import org.apache.poi.util.RecordFormatException;
42 import org.apache.poi.xslf.usermodel.XMLSlideShow;
43 import org.apache.poi.xslf.usermodel.XSLFPictureData;
44 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
45 import org.apache.poi.xwpf.usermodel.XWPFDocument;
46 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
47 import org.openide.util.NbBundle;
61 class ImageExtractor {
63 private final FileManager fileManager;
64 private final IngestServices services;
65 private static final Logger logger = Logger.getLogger(ImageExtractor.class.getName());
66 private final IngestJobContext context;
67 private String parentFileName;
68 private final String UNKNOWN_NAME_PREFIX =
"image_";
69 private final FileTypeDetector fileTypeDetector;
71 private String moduleDirRelative;
72 private String moduleDirAbsolute;
77 enum SupportedImageExtractionFormats {
79 DOC(
"application/msword"),
80 DOCX(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
81 PPT(
"application/vnd.ms-powerpoint"),
82 PPTX(
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
83 XLS(
"application/vnd.ms-excel"),
84 XLSX(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
86 private final String mimeType;
88 SupportedImageExtractionFormats(
final String mimeType) {
89 this.mimeType = mimeType;
93 public String toString() {
98 private SupportedImageExtractionFormats abstractFileExtractionFormat;
100 ImageExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) {
102 this.fileManager = Case.getCurrentCase().getServices().getFileManager();
103 this.services = IngestServices.getInstance();
104 this.context = context;
105 this.fileTypeDetector = fileTypeDetector;
106 this.moduleDirRelative = moduleDirRelative;
107 this.moduleDirAbsolute = moduleDirAbsolute;
119 boolean isImageExtractionSupported(AbstractFile abstractFile) {
121 String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
122 for (SupportedImageExtractionFormats s : SupportedImageExtractionFormats.values()) {
123 if (s.toString().equals(abstractFileMimeType)) {
124 abstractFileExtractionFormat = s;
129 }
catch (TskCoreException ex) {
130 logger.log(Level.SEVERE,
"Error executing FileTypeDetector.getFileType()", ex);
144 void extractImage(AbstractFile abstractFile) {
149 List<ExtractedImage> listOfExtractedImages = null;
150 List<AbstractFile> listOfExtractedImageAbstractFiles = null;
151 this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile);
154 if (abstractFile.hasChildren()) {
156 if (
new File(getOutputFolderPath(parentFileName)).exists()) {
157 logger.log(Level.INFO,
"File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName());
161 }
catch (TskCoreException e) {
162 logger.log(Level.SEVERE, String.format(
"Error checking if file already has been processed, skipping: %s", parentFileName), e);
165 switch (abstractFileExtractionFormat) {
167 listOfExtractedImages = extractImagesFromDoc(abstractFile);
170 listOfExtractedImages = extractImagesFromDocx(abstractFile);
173 listOfExtractedImages = extractImagesFromPpt(abstractFile);
176 listOfExtractedImages = extractImagesFromPptx(abstractFile);
179 listOfExtractedImages = extractImagesFromXls(abstractFile);
182 listOfExtractedImages = extractImagesFromXlsx(abstractFile);
188 if (listOfExtractedImages == null) {
192 listOfExtractedImageAbstractFiles =
new ArrayList<>();
193 for (ExtractedImage extractedImage : listOfExtractedImages) {
195 listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
196 extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
197 true, abstractFile, null, EmbeddedFileExtractorModuleFactory.getModuleName(), null, null, TskData.EncodingType.XOR1));
198 }
catch (TskCoreException ex) {
199 logger.log(Level.SEVERE, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg"), ex);
202 if (!listOfExtractedImages.isEmpty()) {
203 services.fireModuleContentEvent(
new ModuleContentEvent(abstractFile));
204 context.addFilesToJob(listOfExtractedImageAbstractFiles);
216 private List<ExtractedImage> extractImagesFromDoc(AbstractFile af) {
217 List<Picture> listOfAllPictures;
220 HWPFDocument doc =
new HWPFDocument(
new ReadContentInputStream(af));
221 PicturesTable pictureTable = doc.getPicturesTable();
222 listOfAllPictures = pictureTable.getAllPictures();
223 }
catch (IOException | IllegalArgumentException |
224 IndexOutOfBoundsException | NullPointerException ex) {
243 }
catch (Throwable ex) {
245 logger.log(Level.SEVERE, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err", af.getName()), ex);
249 String outputFolderPath;
250 if (listOfAllPictures.isEmpty()) {
253 outputFolderPath = getOutputFolderPath(this.parentFileName);
255 if (outputFolderPath == null) {
258 List<ExtractedImage> listOfExtractedImages =
new ArrayList<>();
260 for (Picture picture : listOfAllPictures) {
261 String fileName = picture.suggestFullFileName();
263 data = picture.getContent();
264 }
catch (Exception ex) {
267 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
269 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), picture.getSize(), af));
272 return listOfExtractedImages;
283 private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
284 List<XWPFPictureData> listOfAllPictures = null;
287 XWPFDocument docx =
new XWPFDocument(
new ReadContentInputStream(af));
288 listOfAllPictures = docx.getAllPictures();
289 }
catch (POIXMLException | IOException ex) {
297 }
catch (Throwable ex) {
299 logger.log(Level.SEVERE, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex);
305 String outputFolderPath;
306 if (listOfAllPictures.isEmpty()) {
309 outputFolderPath = getOutputFolderPath(this.parentFileName);
311 if (outputFolderPath == null) {
314 List<ExtractedImage> listOfExtractedImages =
new ArrayList<>();
316 for (XWPFPictureData xwpfPicture : listOfAllPictures) {
317 String fileName = xwpfPicture.getFileName();
319 data = xwpfPicture.getData();
320 }
catch (Exception ex) {
323 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
324 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
326 return listOfExtractedImages;
337 private List<ExtractedImage> extractImagesFromPpt(AbstractFile af) {
338 List<HSLFPictureData> listOfAllPictures = null;
341 HSLFSlideShow ppt =
new HSLFSlideShow(
new ReadContentInputStream(af));
342 listOfAllPictures = ppt.getPictureData();
343 }
catch (IOException | IllegalArgumentException |
344 IndexOutOfBoundsException ex) {
359 }
catch (Throwable ex) {
361 logger.log(Level.SEVERE, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err", af.getName()), ex);
367 String outputFolderPath;
368 if (listOfAllPictures.isEmpty()) {
371 outputFolderPath = getOutputFolderPath(this.parentFileName);
373 if (outputFolderPath == null) {
380 List<ExtractedImage> listOfExtractedImages =
new ArrayList<>();
382 for (HSLFPictureData pictureData : listOfAllPictures) {
386 PictureType type = pictureData.getType();
407 String imageName = UNKNOWN_NAME_PREFIX + i + ext;
409 data = pictureData.getData();
410 }
catch (Exception ex) {
413 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
414 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
417 return listOfExtractedImages;
428 private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
429 List<XSLFPictureData> listOfAllPictures = null;
432 XMLSlideShow pptx =
new XMLSlideShow(
new ReadContentInputStream(af));
433 listOfAllPictures = pptx.getPictureData();
434 }
catch (POIXMLException | IOException ex) {
442 }
catch (Throwable ex) {
444 logger.log(Level.SEVERE, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()), ex);
450 String outputFolderPath;
451 if (listOfAllPictures.isEmpty()) {
454 outputFolderPath = getOutputFolderPath(this.parentFileName);
456 if (outputFolderPath == null) {
460 List<ExtractedImage> listOfExtractedImages =
new ArrayList<>();
462 for (XSLFPictureData xslsPicture : listOfAllPictures) {
466 String fileName = xslsPicture.getFileName();
468 data = xslsPicture.getData();
469 }
catch (Exception ex) {
472 writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), data);
473 listOfExtractedImages.add(
new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));
477 return listOfExtractedImages;
489 private List<ExtractedImage> extractImagesFromXls(AbstractFile af) {
490 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
493 Workbook xls =
new HSSFWorkbook(
new ReadContentInputStream(af));
494 listOfAllPictures = xls.getAllPictures();
495 }
catch (IOException | LeftoverDataException |
496 RecordFormatException | IllegalArgumentException |
497 IndexOutOfBoundsException ex) {
521 }
catch (Throwable ex) {
523 logger.log(Level.SEVERE, String.format(
"%s%s", NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err", af.getName()), af.getName()), ex);
529 String outputFolderPath;
530 if (listOfAllPictures.isEmpty()) {
533 outputFolderPath = getOutputFolderPath(this.parentFileName);
535 if (outputFolderPath == null) {
540 List<ExtractedImage> listOfExtractedImages =
new ArrayList<>();
542 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
543 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
545 data = pictureData.getData();
546 }
catch (Exception ex) {
549 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
550 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
553 return listOfExtractedImages;
565 private List<ExtractedImage> extractImagesFromXlsx(AbstractFile af) {
566 List<? extends
org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = null;
569 Workbook xlsx =
new XSSFWorkbook(
new ReadContentInputStream(af));
570 listOfAllPictures = xlsx.getAllPictures();
571 }
catch (POIXMLException | IOException ex) {
579 }
catch (Throwable ex) {
581 logger.log(Level.SEVERE, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err", af.getName()), ex);
587 String outputFolderPath;
588 if (listOfAllPictures.isEmpty()) {
591 outputFolderPath = getOutputFolderPath(this.parentFileName);
593 if (outputFolderPath == null) {
598 List<ExtractedImage> listOfExtractedImages =
new ArrayList<>();
600 for (
org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
601 String imageName = UNKNOWN_NAME_PREFIX + i +
"." + pictureData.suggestFileExtension();
603 data = pictureData.getData();
604 }
catch (Exception ex) {
607 writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), data);
608 listOfExtractedImages.add(
new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
611 return listOfExtractedImages;
622 private void writeExtractedImage(String outputPath, byte[] data) {
623 try (EncodedFileOutputStream fos =
new EncodedFileOutputStream(
new FileOutputStream(outputPath), TskData.EncodingType.XOR1)) {
625 }
catch (IOException ex) {
626 logger.log(Level.WARNING,
"Could not write to the provided location: " + outputPath, ex);
639 private String getOutputFolderPath(String parentFileName) {
640 String outputFolderPath = moduleDirAbsolute + File.separator + parentFileName;
641 File outputFilePath =
new File(outputFolderPath);
642 if (!outputFilePath.exists()) {
644 outputFilePath.mkdirs();
645 }
catch (SecurityException ex) {
646 logger.log(Level.WARNING, NbBundle.getMessage(
this.getClass(),
"EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg", parentFileName), ex);
650 return outputFolderPath;
662 private String getFileRelativePath(String fileName) {
664 return "/" + moduleDirRelative +
"/" + this.parentFileName +
"/" + fileName;
685 ExtractedImage(String fileName, String localPath,
long size, AbstractFile parentFile) {
689 ExtractedImage(String fileName, String localPath,
long size,
long ctime,
long crtime,
long atime,
long mtime, AbstractFile parentFile) {