19 package org.sleuthkit.autopsy.thunderbirdparser;
 
   21 import java.io.BufferedInputStream;
 
   22 import java.io.BufferedReader;
 
   23 import java.io.CharConversionException;
 
   25 import java.io.FileInputStream;
 
   26 import java.io.FileNotFoundException;
 
   27 import java.io.FileOutputStream;
 
   28 import java.io.IOException;
 
   29 import java.io.InputStream;
 
   30 import java.nio.charset.Charset;
 
   31 import java.nio.charset.CharsetEncoder;
 
   32 import java.nio.charset.IllegalCharsetNameException;
 
   33 import java.nio.charset.StandardCharsets;
 
   34 import java.nio.charset.UnsupportedCharsetException;
 
   35 import java.util.ArrayList;
 
   36 import java.util.List;
 
   37 import java.util.UUID;
 
   38 import java.util.logging.Level;
 
   40 import org.apache.james.mime4j.dom.BinaryBody;
 
   41 import org.apache.james.mime4j.dom.Body;
 
   42 import org.apache.james.mime4j.dom.Entity;
 
   43 import org.apache.james.mime4j.dom.Message;
 
   44 import org.apache.james.mime4j.dom.Multipart;
 
   45 import org.apache.james.mime4j.dom.TextBody;
 
   46 import org.apache.james.mime4j.dom.address.AddressList;
 
   47 import org.apache.james.mime4j.dom.address.Mailbox;
 
   48 import org.apache.james.mime4j.dom.address.MailboxList;
 
   49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
 
   50 import org.apache.james.mime4j.dom.field.ContentTypeField;
 
   51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
 
   52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
 
   53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
 
   54 import org.apache.james.mime4j.stream.Field;
 
   55 import org.apache.james.mime4j.stream.MimeConfig;
 
   56 import org.apache.tika.parser.txt.CharsetDetector;
 
   57 import org.apache.tika.parser.txt.CharsetMatch;
 
   58 import org.openide.util.NbBundle;
 
   72     private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
 
   73     private DefaultMessageBuilder messageBuilder;
 
   74     private IngestServices services;
 
   75     private StringBuilder errors;
 
   80     private static final String HTML_TYPE = 
"text/html"; 
 
   85     private String localPath;
 
   87     MboxParser(IngestServices services, String localPath) {
 
   88         this.services = services;
 
   89         this.localPath = localPath;
 
   90         messageBuilder = 
new DefaultMessageBuilder();
 
   91         MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
 
   93         messageBuilder.setMimeEntityConfig(config);
 
   94         errors = 
new StringBuilder();
 
   97     static boolean isValidMimeTypeMbox(byte[] buffer) {
 
   98         return (
new String(buffer)).startsWith(
"From "); 
 
  108     List<EmailMessage> parse(File mboxFile, 
long fileID) {
 
  110         List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
 
  112         CharsetEncoder theEncoder = null;
 
  113         Iterable<CharBufferWrapper> mboxIterator = null;
 
  116         for (CharsetEncoder encoder : encoders) {
 
  118                 mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
 
  119                 theEncoder = encoder;
 
  121             } 
catch (CharConversionException | UnsupportedCharsetException ex) {
 
  123             } 
catch (IllegalArgumentException ex) {
 
  125             } 
catch (IOException ex) {
 
  126                 logger.log(Level.WARNING, 
"couldn't find mbox file.", ex); 
 
  127                 addErrorMessage(NbBundle.getMessage(
this.getClass(), 
"MboxParser.parse.errMsg.failedToReadFile"));
 
  128                 return new ArrayList<>();
 
  133         if (mboxIterator == null || theEncoder == null) {
 
  134             addErrorMessage(NbBundle.getMessage(
this.getClass(), 
"MboxParser.parse.errMsg.couldntFindCharset"));
 
  135             return new ArrayList<>();
 
  138         List<EmailMessage> emails = 
new ArrayList<>();
 
  142         for (CharBufferWrapper message : mboxIterator) {
 
  144                 Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
 
  145                 emails.add(extractEmail(msg, fileID));
 
  146             } 
catch (RuntimeException | IOException ex) {
 
  147                 logger.log(Level.WARNING, 
"Failed to get message from mbox: {0}", ex.getMessage()); 
 
  154                     NbBundle.getMessage(
this.getClass(), 
"MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
 
  160         return errors.toString();
 
  171     private EmailMessage extractEmail(Message msg, 
long fileID) {
 
  172         EmailMessage email = 
new EmailMessage();
 
  174         email.setSender(getAddresses(msg.getFrom()));
 
  175         email.setRecipients(getAddresses(msg.getTo()));
 
  176         email.setBcc(getAddresses(msg.getBcc()));
 
  177         email.setCc(getAddresses(msg.getCc()));
 
  178         email.setSubject(msg.getSubject());
 
  179         email.setSentDate(msg.getDate());
 
  180         email.setLocalPath(localPath);
 
  183         if (msg.isMultipart()) {
 
  184             handleMultipart(email, (Multipart) msg.getBody(), fileID);
 
  186             handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
 
  200     private void handleMultipart(EmailMessage email, Multipart multi, 
long fileID) {
 
  201         List<Entity> entities = multi.getBodyParts();
 
  202         for (
int index = 0; index < entities.size(); index++) {
 
  203             Entity e = entities.get(index);
 
  204             if (e.isMultipart()) {
 
  205                 handleMultipart(email, (Multipart) e.getBody(), fileID);
 
  206             } 
else if (e.getDispositionType() != null
 
  207                     && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
 
  208                 handleAttachment(email, e, fileID, index);
 
  209             } 
else if (e.getMimeType().equals(HTML_TYPE)
 
  210                     || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
 
  211                 handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
 
  228     private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
 
  231             r = 
new BufferedReader(tb.getReader());
 
  232             StringBuilder bodyString = 
new StringBuilder();
 
  233             StringBuilder headersString = 
new StringBuilder();
 
  235             while ((line = r.readLine()) != null) {
 
  236                 bodyString.append(line).append(
"\n");
 
  239             headersString.append(
"\n-----HEADERS-----\n");
 
  240             for(Field field: fields) {
 
  241                 String nextLine = field.getName() + 
": " + field.getBody();
 
  242                 headersString.append(
"\n").append(nextLine);
 
  244             headersString.append(
"\n\n---END HEADERS--\n\n");
 
  246             email.setHeaders(headersString.toString());
 
  249                 case ContentTypeField.TYPE_TEXT_PLAIN:
 
  250                     email.setTextBody(bodyString.toString());
 
  253                     email.setHtmlBody(bodyString.toString());
 
  259         } 
catch (IOException ex) {
 
  260             logger.log(Level.WARNING, 
"Error getting text body of mbox message", ex); 
 
  271     @NbBundle.Messages ({
"MboxParser.handleAttch.noOpenCase.errMsg=Exception while getting open case."})
 
  272     private void handleAttachment(EmailMessage email, Entity e, 
long fileID, 
int index) {
 
  273         String outputDirPath;
 
  274         String relModuleOutputPath;
 
  276             outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
 
  277             relModuleOutputPath = ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() + File.separator;
 
  278         } 
catch (NoCurrentCaseException ex) {
 
  279             addErrorMessage(Bundle.MboxParser_handleAttch_noOpenCase_errMsg());
 
  280             logger.log(Level.SEVERE, Bundle.MboxParser_handleAttch_noOpenCase_errMsg(), ex); 
 
  283         String filename = e.getFilename();
 
  287         filename = filename.replaceAll(
"\\?", 
"_");
 
  288         filename = filename.replaceAll(
"<", 
"_");
 
  289         filename = filename.replaceAll(
">", 
"_");
 
  290         filename = filename.replaceAll(
":", 
"_");
 
  291         filename = filename.replaceAll(
"\"", 
"_");
 
  292         filename = filename.replaceAll(
"/", 
"_");
 
  293         filename = filename.replaceAll(
"\\\\", 
"_");
 
  294         filename = filename.replaceAll(
"|", 
"_");
 
  295         filename = filename.replaceAll(
"\\*", 
"_");
 
  299         if (filename.length() > 64) {
 
  300             filename = UUID.randomUUID().toString();
 
  303         String uniqueFilename = fileID + 
"-" + index + 
"-" + email.getSentDate() + 
"-" + filename;
 
  304         String outPath = outputDirPath + uniqueFilename;
 
  305         EncodedFileOutputStream fos;
 
  308             fos = 
new EncodedFileOutputStream(
new FileOutputStream(outPath), TskData.EncodingType.XOR1);
 
  309         } 
catch (IOException ex) {
 
  311                     NbBundle.getMessage(
this.getClass(),
 
  312                             "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
 
  313             logger.log(Level.INFO, 
"Failed to create file output stream for: " + outPath, ex); 
 
  318             Body b = e.getBody();
 
  319             if (b instanceof BinaryBody) {
 
  325         } 
catch (IOException ex) {
 
  326             logger.log(Level.INFO, 
"Failed to write mbox email attachment to disk.", ex); 
 
  327             addErrorMessage(NbBundle.getMessage(
this.getClass(), 
"MboxParser.handleAttch.failedWriteToDisk", filename));
 
  332             } 
catch (IOException ex) {
 
  333                 logger.log(Level.INFO, 
"Failed to close file output stream", ex); 
 
  337         EmailMessage.Attachment attach = 
new EmailMessage.Attachment();
 
  338         attach.setName(filename);
 
  339         attach.setLocalPath(relModuleOutputPath + uniqueFilename);
 
  340         attach.setSize(
new File(outPath).length());
 
  341         attach.setEncodingType(TskData.EncodingType.XOR1);
 
  342         email.addAttachment(attach);
 
  353     private String getAddresses(MailboxList mailboxList) {
 
  354         if (mailboxList == null) {
 
  357         StringBuilder addresses = 
new StringBuilder();
 
  358         for (Mailbox m : mailboxList) {
 
  359             addresses.append(m.toString()).append(
"; ");
 
  361         return addresses.toString();
 
  372     private String getAddresses(AddressList addressList) {
 
  373         return (addressList == null) ? 
"" : getAddresses(addressList.flatten());
 
  384     private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
 
  386         List<CharsetEncoder> possibleEncoders = 
new ArrayList<>();
 
  388         possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
 
  389         possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
 
  390         possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
 
  391         possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
 
  392         possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
 
  393         possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
 
  396             is = 
new BufferedInputStream(
new FileInputStream(mboxFile));
 
  397         } 
catch (FileNotFoundException ex) {
 
  398             logger.log(Level.WARNING, 
"Failed to find mbox file while detecting charset"); 
 
  399             return possibleEncoders;
 
  403             CharsetDetector detector = 
new CharsetDetector();
 
  404             detector.setText(is);
 
  405             CharsetMatch[] matches = detector.detectAll();
 
  406             for (CharsetMatch match : matches) {
 
  408                     possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
 
  409                 } 
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
 
  413             return possibleEncoders;
 
  414         } 
catch (IOException | IllegalArgumentException ex) {
 
  415             logger.log(Level.WARNING, 
"Failed to detect charset of mbox file.", ex); 
 
  416             return possibleEncoders;
 
  420             } 
catch (IOException ex) {
 
  421                 logger.log(Level.INFO, 
"Failed to close input stream"); 
 
  426     private void addErrorMessage(String msg) {
 
  427         errors.append(
"<li>").append(msg).append(
"</li>");