19 package org.sleuthkit.autopsy.thunderbirdparser;
 
   21 import java.io.BufferedInputStream;
 
   22 import java.io.CharConversionException;
 
   24 import java.io.FileInputStream;
 
   25 import java.io.FileNotFoundException;
 
   26 import java.io.IOException;
 
   27 import java.io.InputStream;
 
   28 import java.nio.charset.Charset;
 
   29 import java.nio.charset.CharsetEncoder;
 
   30 import java.nio.charset.IllegalCharsetNameException;
 
   31 import java.nio.charset.StandardCharsets;
 
   32 import java.nio.charset.UnsupportedCharsetException;
 
   33 import java.util.ArrayList;
 
   34 import java.util.Iterator;
 
   35 import java.util.List;
 
   36 import java.util.logging.Level;
 
   39 import org.apache.james.mime4j.dom.Message;
 
   40 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
 
   41 import org.apache.james.mime4j.mboxiterator.MboxIterator;
 
   42 import org.apache.tika.parser.txt.CharsetDetector;
 
   43 import org.apache.tika.parser.txt.CharsetMatch;
 
   44 import org.apache.commons.validator.routines.EmailValidator;
 
   45 import org.apache.james.mime4j.mboxiterator.MboxIterator.Builder;
 
   46 import org.openide.util.NbBundle;
 
   52 class MboxParser 
extends MimeJ4MessageParser implements Iterator<EmailMessage> {
 
   54     private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
 
   56     private Iterator<EmailMessage> emailIterator = null;
 
   58     private MboxIterator mboxIterable;
 
   60     private MboxParser(String localPath) {
 
   61         setLocalPath(localPath);
 
   64     static boolean isValidMimeTypeMbox(byte[] buffer, AbstractFile abstractFile) {
 
   65         String mboxHeaderLine = 
new String(buffer);
 
   66         if (mboxHeaderLine.startsWith(
"From ")) {
 
   67             String mimeType = abstractFile.getMIMEType();
 
   70             if (mimeType == null || mimeType.isEmpty()) {
 
   71                 FileTypeDetector fileTypeDetector = null;
 
   73                     fileTypeDetector = 
new FileTypeDetector();
 
   74                 } 
catch (FileTypeDetector.FileTypeDetectorInitException ex) {
 
   75                     logger.log(Level.WARNING, String.format(
"Unable to create file type detector for determining MIME type for file %s with id of %d", abstractFile.getName(), abstractFile.getId()));
 
   78                 mimeType = fileTypeDetector.getMIMEType(abstractFile);
 
   80             if (mimeType.equalsIgnoreCase(
"application/mbox")) {
 
   97     static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
 
   98         MboxParser parser = 
new MboxParser(localPath);
 
   99         parser.createIterator(mboxFile, 0, 
false);
 
  113     static MboxParser getEmailIterator(String localPath, File mboxFile, 
long fileID) {
 
  114         MboxParser parser = 
new MboxParser(localPath);
 
  115         parser.createIterator(mboxFile, fileID, 
true);
 
  128     private void createIterator(File mboxFile, 
long fileID, 
boolean wholeMsg) {
 
  130         List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
 
  134         for (CharsetEncoder encoder : encoders) {
 
  136                 mboxIterable = MboxIterator
 
  139                         .fromLine(
"^From .*\r?\n")
 
  140                         .charset(encoder.charset())
 
  142                 if (mboxIterable != null) {
 
  143                     emailIterator = 
new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
 
  146             } 
catch (CharConversionException | UnsupportedCharsetException ex) {
 
  148             } 
catch (IllegalArgumentException ex) {
 
  150             } 
catch (IOException ex) {
 
  151                 logger.log(Level.WARNING, String.format(
"Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex); 
 
  152                 addErrorMessage(NbBundle.getMessage(
this.getClass(), 
"MboxParser.parse.errMsg.failedToReadFile"));
 
  158     public boolean hasNext() {
 
  159         return emailIterator != null && emailIterator.hasNext();
 
  163     public EmailMessage next() {
 
  164         return emailIterator != null ? emailIterator.next() : null;
 
  168     public void close() throws IOException{
 
  169         if(mboxIterable != null) {
 
  170             mboxIterable.close();
 
  182     private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
 
  184         List<CharsetEncoder> possibleEncoders = 
new ArrayList<>();
 
  186         possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
 
  187         possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
 
  188         possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
 
  189         possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
 
  190         possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
 
  191         possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
 
  194             is = 
new BufferedInputStream(
new FileInputStream(mboxFile));
 
  195         } 
catch (FileNotFoundException ex) {
 
  196             logger.log(Level.WARNING, 
"Failed to find mbox file while detecting charset"); 
 
  197             return possibleEncoders;
 
  201             CharsetDetector detector = 
new CharsetDetector();
 
  202             detector.setText(is);
 
  203             CharsetMatch[] matches = detector.detectAll();
 
  204             for (CharsetMatch match : matches) {
 
  206                     possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
 
  207                 } 
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
 
  211             return possibleEncoders;
 
  212         } 
catch (IOException | IllegalArgumentException ex) {
 
  213             logger.log(Level.WARNING, 
"Failed to detect charset of mbox file.", ex); 
 
  214             return possibleEncoders;
 
  218             } 
catch (IOException ex) {
 
  219                 logger.log(Level.WARNING, 
"Failed to close input stream"); 
 
  227     final class MBoxEmailIterator 
implements Iterator<EmailMessage> {
 
  229         private final Iterator<CharBufferWrapper> mboxIterator;
 
  230         private final CharsetEncoder encoder;
 
  231         private final long fileID;
 
  232         private final boolean wholeMsg;
 
  234         MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder, 
long fileID, 
boolean wholeMsg) {
 
  235             mboxIterator = mboxIter;
 
  236             this.encoder = encoder;
 
  237             this.fileID = fileID;
 
  238             this.wholeMsg = wholeMsg;
 
  242         public boolean hasNext() {
 
  243             return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
 
  247         public EmailMessage next() {
 
  248             CharBufferWrapper messageBuffer = mboxIterator.next();
 
  251                 Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
 
  253                     return extractEmail(msg, getLocalPath(), fileID);
 
  255                     return extractPartialEmail(msg);
 
  257             } 
catch (RuntimeException | IOException ex) {
 
  258                 logger.log(Level.WARNING, 
"Failed to get message from mbox: {0}", ex.getMessage());