Autopsy  3.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
ImageExtractor.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2015 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
20 
21 import java.io.File;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.nio.file.Paths;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.logging.Level;
28 import org.apache.poi.OldFileFormatException;
29 import org.apache.poi.hslf.model.Picture;
30 import org.apache.poi.hslf.usermodel.PictureData;
31 import org.apache.poi.hslf.usermodel.SlideShow;
32 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
33 import org.apache.poi.hwpf.HWPFDocument;
34 import org.apache.poi.hwpf.model.PicturesTable;
35 import org.apache.poi.ss.usermodel.Workbook;
36 import org.apache.poi.xslf.usermodel.XMLSlideShow;
37 import org.apache.poi.xslf.usermodel.XSLFPictureData;
38 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
39 import org.apache.poi.xwpf.usermodel.XWPFDocument;
40 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
41 import org.openide.util.NbBundle;
52 
53 class ImageExtractor {
54 
55  private final FileManager fileManager;
56  private final IngestServices services;
57  private static final Logger logger = Logger.getLogger(ImageExtractor.class.getName());
58  private final IngestJobContext context;
59  private String parentFileName;
60  private final String UNKNOWN_NAME_PREFIX = "image_";
61  private final FileTypeDetector fileTypeDetector;
62 
63  private String moduleDirRelative;
64  private String moduleDirAbsolute;
68  enum SupportedImageExtractionFormats {
69 
70  DOC("application/msword"),
71  DOCX("application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
72  PPT("application/vnd.ms-powerpoint"),
73  PPTX("application/vnd.openxmlformats-officedocument.presentationml.presentation"),
74  XLS("application/vnd.ms-excel"),
75  XLSX("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
76 
77  private final String mimeType;
78 
79  SupportedImageExtractionFormats(final String mimeType) {
80  this.mimeType = mimeType;
81  }
82 
83  @Override
84  public String toString() {
85  return this.mimeType;
86  }
87  // TODO Expand to support more formats
88  }
89  private SupportedImageExtractionFormats abstractFileExtractionFormat;
90 
91  ImageExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) {
92 
93  this.fileManager = Case.getCurrentCase().getServices().getFileManager();
94  this.services = IngestServices.getInstance();
95  this.context = context;
96  this.fileTypeDetector = fileTypeDetector;
97  this.moduleDirRelative = moduleDirRelative;
98  this.moduleDirAbsolute = moduleDirAbsolute;
99  }
100 
109  boolean isImageExtractionSupported(AbstractFile abstractFile) {
110  try {
111  String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
112  for (SupportedImageExtractionFormats s : SupportedImageExtractionFormats.values()) {
113  if (s.toString().equals(abstractFileMimeType)) {
114  abstractFileExtractionFormat = s;
115  return true;
116  }
117  }
118  return false;
119  } catch (TskCoreException ex) {
120  logger.log(Level.WARNING, "Error executing FileTypeDetector.getFileType()", ex); // NON-NLS
121  return false;
122  }
123  }
124 
134  void extractImage(AbstractFile abstractFile) {
135  //
136  // switchcase for different supported formats
137  // process abstractFile according to the format by calling appropriate methods.
138 
139  List<ExtractedImage> listOfExtractedImages = null;
140  List<AbstractFile> listOfExtractedImageAbstractFiles = null;
141  this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile);
142  //check if already has derived files, skip
143  try {
144  if (abstractFile.hasChildren()) {
145  //check if local unpacked dir exists
146  if (new File(getOutputFolderPath(parentFileName)).exists()) {
147  logger.log(Level.INFO, "File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName()); //NON-NLS
148  return;
149  }
150  }
151  } catch (TskCoreException e) {
152  logger.log(Level.INFO, "Error checking if file already has been processed, skipping: {0}", parentFileName); //NON-NLS
153  return;
154  }
155  switch (abstractFileExtractionFormat) {
156  case DOC:
157  listOfExtractedImages = extractImagesFromDoc(abstractFile);
158  break;
159  case DOCX:
160  listOfExtractedImages = extractImagesFromDocx(abstractFile);
161  break;
162  case PPT:
163  listOfExtractedImages = extractImagesFromPpt(abstractFile);
164  break;
165  case PPTX:
166  listOfExtractedImages = extractImagesFromPptx(abstractFile);
167  break;
168  case XLS:
169  listOfExtractedImages = extractImagesFromXls(abstractFile);
170  break;
171  case XLSX:
172  listOfExtractedImages = extractImagesFromXlsx(abstractFile);
173  break;
174  default:
175  break;
176  }
177 
178  if (listOfExtractedImages == null) {
179  return;
180  }
181  // the common task of adding abstractFile to derivedfiles is performed.
182  listOfExtractedImageAbstractFiles = new ArrayList<>();
183  for (ExtractedImage extractedImage : listOfExtractedImages) {
184  try {
185  listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
186  extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
187  true, abstractFile, null, EmbeddedFileExtractorModuleFactory.getModuleName(), null, null));
188  } catch (TskCoreException ex) {
189  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg"), ex); //NON-NLS
190  }
191  }
192  if (!listOfExtractedImages.isEmpty()) {
193  services.fireModuleContentEvent(new ModuleContentEvent(abstractFile));
194  context.addFilesToJob(listOfExtractedImageAbstractFiles);
195  }
196  }
197 
205  private List<ExtractedImage> extractImagesFromDoc(AbstractFile af) {
206  List<ExtractedImage> listOfExtractedImages;
207  HWPFDocument doc = null;
208  try {
209  doc = new HWPFDocument(new ReadContentInputStream(af));
210  } catch (Throwable ex) {
211  // instantiating POI containers throw RuntimeExceptions
212  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err", af.getName())); //NON-NLS
213  return null;
214  }
215  PicturesTable pictureTable = doc.getPicturesTable();
216  List<org.apache.poi.hwpf.usermodel.Picture> listOfAllPictures = pictureTable.getAllPictures();
217  String outputFolderPath;
218  if (listOfAllPictures.isEmpty()) {
219  return null;
220  } else {
221  outputFolderPath = getOutputFolderPath(this.parentFileName);
222  }
223  if (outputFolderPath == null) {
224  return null;
225  }
226  listOfExtractedImages = new ArrayList<>();
227  for (org.apache.poi.hwpf.usermodel.Picture picture : listOfAllPictures) {
228  String fileName = picture.suggestFullFileName();
229  writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), picture.getContent());
230  // TODO Extract more info from the Picture viz ctime, crtime, atime, mtime
231  listOfExtractedImages.add(new ExtractedImage(fileName, getFileRelativePath(fileName), picture.getSize(), af));
232  }
233 
234  return listOfExtractedImages;
235  }
236 
244  private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
245  List<ExtractedImage> listOfExtractedImages;
246  XWPFDocument docx = null;
247  try {
248  docx = new XWPFDocument(new ReadContentInputStream(af));
249  } catch (Throwable ex) {
250  // instantiating POI containers throw RuntimeExceptions
251  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName())); //NON-NLS
252  return null;
253  }
254  List<XWPFPictureData> listOfAllPictures = docx.getAllPictures();
255 
256  // if no images are extracted from the PPT, return null, else initialize
257  // the output folder for image extraction.
258  String outputFolderPath;
259  if (listOfAllPictures.isEmpty()) {
260  return null;
261  } else {
262  outputFolderPath = getOutputFolderPath(this.parentFileName);
263  }
264  if (outputFolderPath == null) {
265  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
266  return null;
267  }
268  listOfExtractedImages = new ArrayList<>();
269  for (XWPFPictureData xwpfPicture : listOfAllPictures) {
270  String fileName = xwpfPicture.getFileName();
271  writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), xwpfPicture.getData());
272  listOfExtractedImages.add(new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
273  }
274  return listOfExtractedImages;
275  }
276 
284  private List<ExtractedImage> extractImagesFromPpt(AbstractFile af) {
285  List<ExtractedImage> listOfExtractedImages;
286  SlideShow ppt = null;
287  try {
288  ppt = new SlideShow(new ReadContentInputStream(af));
289  } catch (Throwable ex) {
290  // instantiating POI containers throw RuntimeExceptions
291  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err", af.getName())); //NON-NLS
292  return null;
293  }
294 
295  //extract all pictures contained in the presentation
296  PictureData[] listOfAllPictures = ppt.getPictureData();
297 
298  // if no images are extracted from the PPT, return null, else initialize
299  // the output folder for image extraction.
300  String outputFolderPath;
301  if (listOfAllPictures.length == 0) {
302  return null;
303  } else {
304  outputFolderPath = getOutputFolderPath(this.parentFileName);
305  }
306  if (outputFolderPath == null) {
307  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
308  return null;
309  }
310 
311  // extract the images to the above initialized outputFolder.
312  // extraction path - outputFolder/image_number.ext
313  int i = 0;
314  listOfExtractedImages = new ArrayList<>();
315  for (PictureData pictureData : listOfAllPictures) {
316 
317  // Get image extension, generate image name, write image to the module
318  // output folder, add it to the listOfExtractedImageAbstractFiles
319  int type = pictureData.getType();
320  String ext;
321  switch (type) {
322  case Picture.JPEG:
323  ext = ".jpg"; //NON-NLS
324  break;
325  case Picture.PNG:
326  ext = ".png"; //NON-NLS
327  break;
328  case Picture.WMF:
329  ext = ".wmf"; //NON-NLS
330  break;
331  case Picture.EMF:
332  ext = ".emf"; //NON-NLS
333  break;
334  case Picture.PICT:
335  ext = ".pict"; //NON-NLS
336  break;
337  default:
338  continue;
339  }
340  String imageName = UNKNOWN_NAME_PREFIX + i + ext; //NON-NLS
341  writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
342  listOfExtractedImages.add(new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
343  i++;
344  }
345  return listOfExtractedImages;
346  }
347 
355  private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
356  List<ExtractedImage> listOfExtractedImages;
357  XMLSlideShow pptx;
358  try {
359  pptx = new XMLSlideShow(new ReadContentInputStream(af));
360  } catch (Throwable ex) {
361  // instantiating POI containers throw RuntimeExceptions
362  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName())); //NON-NLS
363  return null;
364  }
365  List<XSLFPictureData> listOfAllPictures = pptx.getAllPictures();
366 
367  // if no images are extracted from the PPT, return null, else initialize
368  // the output folder for image extraction.
369  String outputFolderPath;
370  if (listOfAllPictures.isEmpty()) {
371  return null;
372  } else {
373  outputFolderPath = getOutputFolderPath(this.parentFileName);
374  }
375  if (outputFolderPath == null) {
376  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
377  return null;
378  }
379 
380  listOfExtractedImages = new ArrayList<>();
381  for (XSLFPictureData xslsPicture : listOfAllPictures) {
382 
383  // get image file name, write it to the module outputFolder, and add
384  // it to the listOfExtractedImageAbstractFiles.
385  String fileName = xslsPicture.getFileName();
386  writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), xslsPicture.getData());
387  listOfExtractedImages.add(new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));
388 
389  }
390 
391  return listOfExtractedImages;
392 
393  }
394 
402  private List<ExtractedImage> extractImagesFromXls(AbstractFile af) {
403  List<ExtractedImage> listOfExtractedImages;
404 
405  Workbook xls;
406  try {
407  xls = new HSSFWorkbook(new ReadContentInputStream(af));
408  } catch (Throwable ex) {
409  // instantiating POI containers throw RuntimeExceptions
410  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err", af.getName()) + af.getName()); //NON-NLS
411  return null;
412  }
413 
414  List<? extends org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = xls.getAllPictures();
415  // if no images are extracted from the PPT, return null, else initialize
416  // the output folder for image extraction.
417  String outputFolderPath;
418  if (listOfAllPictures.isEmpty()) {
419  return null;
420  } else {
421  outputFolderPath = getOutputFolderPath(this.parentFileName);
422  }
423  if (outputFolderPath == null) {
424  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
425  return null;
426  }
427 
428  int i = 0;
429  listOfExtractedImages = new ArrayList<>();
430  for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
431  String imageName = UNKNOWN_NAME_PREFIX + i + "." + pictureData.suggestFileExtension(); //NON-NLS
432  writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
433  listOfExtractedImages.add(new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
434  i++;
435  }
436  return listOfExtractedImages;
437 
438  }
439 
447  private List<ExtractedImage> extractImagesFromXlsx(AbstractFile af) {
448  List<ExtractedImage> listOfExtractedImages;
449  Workbook xlsx;
450  try {
451  xlsx = new XSSFWorkbook(new ReadContentInputStream(af));
452  } catch (Throwable ex) {
453  // instantiating POI containers throw RuntimeExceptions
454  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err", af.getName())); //NON-NLS
455  return null;
456  }
457 
458  List<? extends org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = xlsx.getAllPictures();
459  // if no images are extracted from the PPT, return null, else initialize
460  // the output folder for image extraction.
461  String outputFolderPath;
462  if (listOfAllPictures.isEmpty()) {
463  return null;
464  } else {
465  outputFolderPath = getOutputFolderPath(this.parentFileName);
466  }
467  if (outputFolderPath == null) {
468  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
469  return null;
470  }
471 
472  int i = 0;
473  listOfExtractedImages = new ArrayList<>();
474  for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
475  String imageName = UNKNOWN_NAME_PREFIX + i + "." + pictureData.suggestFileExtension();
476  writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
477  listOfExtractedImages.add(new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
478  i++;
479  }
480  return listOfExtractedImages;
481 
482  }
483 
491  private void writeExtractedImage(String outputPath, byte[] data) {
492  try (FileOutputStream fos = new FileOutputStream(outputPath)) {
493  fos.write(data);
494  } catch (IOException ex) {
495  logger.log(Level.WARNING, "Could not write to the provided location: " + outputPath, ex); //NON-NLS
496  }
497  }
498 
507  private String getOutputFolderPath(String parentFileName) {
508  String outputFolderPath = moduleDirAbsolute + File.separator + parentFileName;
509  File outputFilePath = new File(outputFolderPath);
510  if (!outputFilePath.exists()) {
511  try {
512  outputFilePath.mkdirs();
513  } catch (SecurityException ex) {
514  logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg", parentFileName), ex);
515  return null;
516  }
517  }
518  return outputFolderPath;
519  }
520 
529  private String getFileRelativePath(String fileName) {
530  // Used explicit FWD slashes to maintain DB consistency across operating systems.
531  return "/" + moduleDirRelative + "/" + this.parentFileName + "/" + fileName; //NON-NLS
532  }
533 
539  private static class ExtractedImage {
540  //String fileName, String localPath, long size, long ctime, long crtime,
541  //long atime, long mtime, boolean isFile, AbstractFile parentFile, String rederiveDetails, String toolName, String toolVersion, String otherDetails
542 
543  private final String fileName;
544  private final String localPath;
545  private final long size;
546  private final long ctime;
547  private final long crtime;
548  private final long atime;
549  private final long mtime;
550  private final AbstractFile parentFile;
551 
552  ExtractedImage(String fileName, String localPath, long size, AbstractFile parentFile) {
553  this(fileName, localPath, size, 0, 0, 0, 0, parentFile);
554  }
555 
556  ExtractedImage(String fileName, String localPath, long size, long ctime, long crtime, long atime, long mtime, AbstractFile parentFile) {
557  this.fileName = fileName;
558  this.localPath = localPath;
559  this.size = size;
560  this.ctime = ctime;
561  this.crtime = crtime;
562  this.atime = atime;
563  this.mtime = mtime;
564  this.parentFile = parentFile;
565  }
566 
567  public String getFileName() {
568  return fileName;
569  }
570 
571  public String getLocalPath() {
572  return localPath;
573  }
574 
575  public long getSize() {
576  return size;
577  }
578 
579  public long getCtime() {
580  return ctime;
581  }
582 
583  public long getCrtime() {
584  return crtime;
585  }
586 
587  public long getAtime() {
588  return atime;
589  }
590 
591  public long getMtime() {
592  return mtime;
593  }
594 
596  return parentFile;
597  }
598  }
599 }

Copyright © 2012-2015 Basis Technology. Generated on: Mon Oct 19 2015
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.