19 package org.sleuthkit.autopsy.thunderbirdparser;
 
   21 import java.io.BufferedInputStream;
 
   22 import java.io.BufferedReader;
 
   23 import java.io.CharConversionException;
 
   25 import java.io.FileInputStream;
 
   26 import java.io.FileNotFoundException;
 
   27 import java.io.FileOutputStream;
 
   28 import java.io.IOException;
 
   29 import java.io.InputStream;
 
   30 import java.nio.charset.Charset;
 
   31 import java.nio.charset.CharsetEncoder;
 
   32 import java.nio.charset.IllegalCharsetNameException;
 
   33 import java.nio.charset.StandardCharsets;
 
   34 import java.nio.charset.UnsupportedCharsetException;
 
   35 import java.util.ArrayList;
 
   36 import java.util.List;
 
   37 import java.util.UUID;
 
   38 import java.util.logging.Level;
 
   40 import org.apache.james.mime4j.dom.BinaryBody;
 
   41 import org.apache.james.mime4j.dom.Body;
 
   42 import org.apache.james.mime4j.dom.Entity;
 
   43 import org.apache.james.mime4j.dom.Message;
 
   44 import org.apache.james.mime4j.dom.Multipart;
 
   45 import org.apache.james.mime4j.dom.TextBody;
 
   46 import org.apache.james.mime4j.dom.address.AddressList;
 
   47 import org.apache.james.mime4j.dom.address.Mailbox;
 
   48 import org.apache.james.mime4j.dom.address.MailboxList;
 
   49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
 
   50 import org.apache.james.mime4j.dom.field.ContentTypeField;
 
   51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
 
   52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
 
   53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
 
   54 import org.apache.james.mime4j.stream.Field;
 
   55 import org.apache.james.mime4j.stream.MimeConfig;
 
   56 import org.apache.tika.parser.txt.CharsetDetector;
 
   57 import org.apache.tika.parser.txt.CharsetMatch;
 
   58 import org.openide.util.NbBundle;
 
   71     private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
 
   72     private DefaultMessageBuilder messageBuilder;
 
   73     private IngestServices services;
 
   74     private StringBuilder errors;
 
   79     private static final String HTML_TYPE = 
"text/html"; 
 
   84     private String localPath;
 
   86     MboxParser(IngestServices services, String localPath) {
 
   87         this.services = services;
 
   88         this.localPath = localPath;
 
   89         messageBuilder = 
new DefaultMessageBuilder();
 
   90         MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
 
   92         messageBuilder.setMimeEntityConfig(config);
 
   93         errors = 
new StringBuilder();
 
   96     static boolean isValidMimeTypeMbox(byte[] buffer) {
 
   97         return (
new String(buffer)).startsWith(
"From "); 
 
  107     List<EmailMessage> parse(File mboxFile, 
long fileID) {
 
  109         List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
 
  111         CharsetEncoder theEncoder = null;
 
  112         Iterable<CharBufferWrapper> mboxIterator = null;
 
  115         for (CharsetEncoder encoder : encoders) {
 
  117                 mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
 
  118                 theEncoder = encoder;
 
  120             } 
catch (CharConversionException | UnsupportedCharsetException ex) {
 
  122             } 
catch (IllegalArgumentException ex) {
 
  124             } 
catch (IOException ex) {
 
  125                 logger.log(Level.WARNING, 
"couldn't find mbox file.", ex); 
 
  126                 addErrorMessage(NbBundle.getMessage(
this.getClass(), 
"MboxParser.parse.errMsg.failedToReadFile"));
 
  127                 return new ArrayList<>();
 
  132         if (mboxIterator == null || theEncoder == null) {
 
  133             addErrorMessage(NbBundle.getMessage(
this.getClass(), 
"MboxParser.parse.errMsg.couldntFindCharset"));
 
  134             return new ArrayList<>();
 
  137         List<EmailMessage> emails = 
new ArrayList<>();
 
  141         for (CharBufferWrapper message : mboxIterator) {
 
  143                 Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
 
  144                 emails.add(extractEmail(msg, fileID));
 
  145             } 
catch (RuntimeException | IOException ex) {
 
  146                 logger.log(Level.WARNING, 
"Failed to get message from mbox: {0}", ex.getMessage()); 
 
  153                     NbBundle.getMessage(
this.getClass(), 
"MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
 
  159         return errors.toString();
 
  170     private EmailMessage extractEmail(Message msg, 
long fileID) {
 
  171         EmailMessage email = 
new EmailMessage();
 
  173         email.setSender(getAddresses(msg.getFrom()));
 
  174         email.setRecipients(getAddresses(msg.getTo()));
 
  175         email.setBcc(getAddresses(msg.getBcc()));
 
  176         email.setCc(getAddresses(msg.getCc()));
 
  177         email.setSubject(msg.getSubject());
 
  178         email.setSentDate(msg.getDate());
 
  179         email.setLocalPath(localPath);
 
  182         if (msg.isMultipart()) {
 
  183             handleMultipart(email, (Multipart) msg.getBody(), fileID);
 
  185             handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
 
  199     private void handleMultipart(EmailMessage email, Multipart multi, 
long fileID) {
 
  200         List<Entity> entities = multi.getBodyParts();
 
  201         for (
int index = 0; index < entities.size(); index++) {
 
  202             Entity e = entities.get(index);
 
  203             if (e.isMultipart()) {
 
  204                 handleMultipart(email, (Multipart) e.getBody(), fileID);
 
  205             } 
else if (e.getDispositionType() != null
 
  206                     && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
 
  207                 handleAttachment(email, e, fileID, index);
 
  208             } 
else if (e.getMimeType().equals(HTML_TYPE)
 
  209                     || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
 
  210                 handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
 
  227     private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
 
  230             r = 
new BufferedReader(tb.getReader());
 
  231             StringBuilder bodyString = 
new StringBuilder();
 
  233             while ((line = r.readLine()) != null) {
 
  234                 bodyString.append(line).append(
"\n");
 
  236             bodyString.append(
"\n-----HEADERS-----\n");
 
  237             for(Field field: fields) {
 
  238                 String nextLine = field.getName() + 
": " + field.getBody();
 
  239                 bodyString.append(
"\n").append(nextLine);
 
  241             bodyString.append(
"\n\n---END HEADERS--\n\n");
 
  244                 case ContentTypeField.TYPE_TEXT_PLAIN:
 
  245                     email.setTextBody(bodyString.toString());
 
  248                     email.setHtmlBody(bodyString.toString());
 
  254         } 
catch (IOException ex) {
 
  255             logger.log(Level.WARNING, 
"Error getting text body of mbox message", ex); 
 
  266     private void handleAttachment(EmailMessage email, Entity e, 
long fileID, 
int index) {
 
  267         String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
 
  268         String filename = e.getFilename();
 
  272         filename = filename.replaceAll(
"\\?", 
"_");
 
  273         filename = filename.replaceAll(
"<", 
"_");
 
  274         filename = filename.replaceAll(
">", 
"_");
 
  275         filename = filename.replaceAll(
":", 
"_");
 
  276         filename = filename.replaceAll(
"\"", 
"_");
 
  277         filename = filename.replaceAll(
"/", 
"_");
 
  278         filename = filename.replaceAll(
"\\\\", 
"_");
 
  279         filename = filename.replaceAll(
"|", 
"_");
 
  280         filename = filename.replaceAll(
"\\*", 
"_");
 
  284         if (filename.length() > 64) {
 
  285             filename = UUID.randomUUID().toString();
 
  288         String uniqueFilename = fileID + 
"-" + index + 
"-" + email.getSentDate() + 
"-" + filename;
 
  289         String outPath = outputDirPath + uniqueFilename;
 
  290         EncodedFileOutputStream fos;
 
  293             fos = 
new EncodedFileOutputStream(
new FileOutputStream(outPath), TskData.EncodingType.XOR1);
 
  294         } 
catch (IOException ex) {
 
  296                     NbBundle.getMessage(
this.getClass(),
 
  297                             "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
 
  298             logger.log(Level.INFO, 
"Failed to create file output stream for: " + outPath, ex); 
 
  303             Body b = e.getBody();
 
  304             if (b instanceof BinaryBody) {
 
  310         } 
catch (IOException ex) {
 
  311             logger.log(Level.INFO, 
"Failed to write mbox email attachment to disk.", ex); 
 
  312             addErrorMessage(NbBundle.getMessage(
this.getClass(), 
"MboxParser.handleAttch.failedWriteToDisk", filename));
 
  317             } 
catch (IOException ex) {
 
  318                 logger.log(Level.INFO, 
"Failed to close file output stream", ex); 
 
  322         EmailMessage.Attachment attach = 
new EmailMessage.Attachment();
 
  323         attach.setName(filename);
 
  324         attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
 
  325                 + File.separator + uniqueFilename);
 
  326         attach.setSize(
new File(outPath).length());
 
  327         attach.setEncodingType(TskData.EncodingType.XOR1);
 
  328         email.addAttachment(attach);
 
  339     private String getAddresses(MailboxList mailboxList) {
 
  340         if (mailboxList == null) {
 
  343         StringBuilder addresses = 
new StringBuilder();
 
  344         for (Mailbox m : mailboxList) {
 
  345             addresses.append(m.toString()).append(
"; ");
 
  347         return addresses.toString();
 
  358     private String getAddresses(AddressList addressList) {
 
  359         return (addressList == null) ? 
"" : getAddresses(addressList.flatten());
 
  370     private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
 
  372         List<CharsetEncoder> possibleEncoders = 
new ArrayList<>();
 
  374         possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
 
  375         possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
 
  376         possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
 
  377         possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
 
  378         possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
 
  379         possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
 
  382             is = 
new BufferedInputStream(
new FileInputStream(mboxFile));
 
  383         } 
catch (FileNotFoundException ex) {
 
  384             logger.log(Level.WARNING, 
"Failed to find mbox file while detecting charset"); 
 
  385             return possibleEncoders;
 
  389             CharsetDetector detector = 
new CharsetDetector();
 
  390             detector.setText(is);
 
  391             CharsetMatch[] matches = detector.detectAll();
 
  392             for (CharsetMatch match : matches) {
 
  394                     possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
 
  395                 } 
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
 
  399             return possibleEncoders;
 
  400         } 
catch (IOException | IllegalArgumentException ex) {
 
  401             logger.log(Level.WARNING, 
"Failed to detect charset of mbox file.", ex); 
 
  402             return possibleEncoders;
 
  406             } 
catch (IOException ex) {
 
  407                 logger.log(Level.INFO, 
"Failed to close input stream"); 
 
  412     private void addErrorMessage(String msg) {
 
  413         errors.append(
"<li>").append(msg).append(
"</li>");