19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
71 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
72 private DefaultMessageBuilder messageBuilder;
73 private IngestServices services;
74 private StringBuilder errors;
79 private static final String HTML_TYPE =
"text/html";
84 private String localPath;
86 MboxParser(IngestServices services, String localPath) {
87 this.services = services;
88 this.localPath = localPath;
89 messageBuilder =
new DefaultMessageBuilder();
90 MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
92 messageBuilder.setMimeEntityConfig(config);
93 errors =
new StringBuilder();
96 static boolean isValidMimeTypeMbox(byte[] buffer) {
97 return (
new String(buffer)).startsWith(
"From ");
107 List<EmailMessage> parse(File mboxFile,
long fileID) {
109 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
111 CharsetEncoder theEncoder = null;
112 Iterable<CharBufferWrapper> mboxIterator = null;
115 for (CharsetEncoder encoder : encoders) {
117 mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
118 theEncoder = encoder;
120 }
catch (CharConversionException | UnsupportedCharsetException ex) {
122 }
catch (IllegalArgumentException ex) {
124 }
catch (IOException ex) {
125 logger.log(Level.WARNING,
"couldn't find mbox file.", ex);
126 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
127 return new ArrayList<>();
132 if (mboxIterator == null || theEncoder == null) {
133 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.couldntFindCharset"));
134 return new ArrayList<>();
137 List<EmailMessage> emails =
new ArrayList<>();
141 for (CharBufferWrapper message : mboxIterator) {
143 Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
144 emails.add(extractEmail(msg, fileID));
145 }
catch (RuntimeException | IOException ex) {
146 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());
153 NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
159 return errors.toString();
170 private EmailMessage extractEmail(Message msg,
long fileID) {
171 EmailMessage email =
new EmailMessage();
173 email.setSender(getAddresses(msg.getFrom()));
174 email.setRecipients(getAddresses(msg.getTo()));
175 email.setBcc(getAddresses(msg.getBcc()));
176 email.setCc(getAddresses(msg.getCc()));
177 email.setSubject(msg.getSubject());
178 email.setSentDate(msg.getDate());
179 email.setLocalPath(localPath);
182 if (msg.isMultipart()) {
183 handleMultipart(email, (Multipart) msg.getBody(), fileID);
185 handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
199 private void handleMultipart(EmailMessage email, Multipart multi,
long fileID) {
200 List<Entity> entities = multi.getBodyParts();
201 for (
int index = 0; index < entities.size(); index++) {
202 Entity e = entities.get(index);
203 if (e.isMultipart()) {
204 handleMultipart(email, (Multipart) e.getBody(), fileID);
205 }
else if (e.getDispositionType() != null
206 && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
207 handleAttachment(email, e, fileID, index);
208 }
else if (e.getMimeType().equals(HTML_TYPE)
209 || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
210 handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
227 private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
230 r =
new BufferedReader(tb.getReader());
231 StringBuilder bodyString =
new StringBuilder();
232 StringBuilder headersString =
new StringBuilder();
234 while ((line = r.readLine()) != null) {
235 bodyString.append(line).append(
"\n");
238 headersString.append(
"\n-----HEADERS-----\n");
239 for(Field field: fields) {
240 String nextLine = field.getName() +
": " + field.getBody();
241 headersString.append(
"\n").append(nextLine);
243 headersString.append(
"\n\n---END HEADERS--\n\n");
245 email.setHeaders(headersString.toString());
248 case ContentTypeField.TYPE_TEXT_PLAIN:
249 email.setTextBody(bodyString.toString());
252 email.setHtmlBody(bodyString.toString());
258 }
catch (IOException ex) {
259 logger.log(Level.WARNING,
"Error getting text body of mbox message", ex);
270 private void handleAttachment(EmailMessage email, Entity e,
long fileID,
int index) {
271 String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
272 String filename = e.getFilename();
276 filename = filename.replaceAll(
"\\?",
"_");
277 filename = filename.replaceAll(
"<",
"_");
278 filename = filename.replaceAll(
">",
"_");
279 filename = filename.replaceAll(
":",
"_");
280 filename = filename.replaceAll(
"\"",
"_");
281 filename = filename.replaceAll(
"/",
"_");
282 filename = filename.replaceAll(
"\\\\",
"_");
283 filename = filename.replaceAll(
"|",
"_");
284 filename = filename.replaceAll(
"\\*",
"_");
288 if (filename.length() > 64) {
289 filename = UUID.randomUUID().toString();
292 String uniqueFilename = fileID +
"-" + index +
"-" + email.getSentDate() +
"-" + filename;
293 String outPath = outputDirPath + uniqueFilename;
294 EncodedFileOutputStream fos;
297 fos =
new EncodedFileOutputStream(
new FileOutputStream(outPath), TskData.EncodingType.XOR1);
298 }
catch (IOException ex) {
300 NbBundle.getMessage(
this.getClass(),
301 "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
302 logger.log(Level.INFO,
"Failed to create file output stream for: " + outPath, ex);
307 Body b = e.getBody();
308 if (b instanceof BinaryBody) {
314 }
catch (IOException ex) {
315 logger.log(Level.INFO,
"Failed to write mbox email attachment to disk.", ex);
316 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.handleAttch.failedWriteToDisk", filename));
321 }
catch (IOException ex) {
322 logger.log(Level.INFO,
"Failed to close file output stream", ex);
326 EmailMessage.Attachment attach =
new EmailMessage.Attachment();
327 attach.setName(filename);
328 attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
329 + File.separator + uniqueFilename);
330 attach.setSize(
new File(outPath).length());
331 attach.setEncodingType(TskData.EncodingType.XOR1);
332 email.addAttachment(attach);
343 private String getAddresses(MailboxList mailboxList) {
344 if (mailboxList == null) {
347 StringBuilder addresses =
new StringBuilder();
348 for (Mailbox m : mailboxList) {
349 addresses.append(m.toString()).append(
"; ");
351 return addresses.toString();
362 private String getAddresses(AddressList addressList) {
363 return (addressList == null) ?
"" : getAddresses(addressList.flatten());
374 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
376 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
378 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
379 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
380 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
381 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
382 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
383 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
386 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
387 }
catch (FileNotFoundException ex) {
388 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
389 return possibleEncoders;
393 CharsetDetector detector =
new CharsetDetector();
394 detector.setText(is);
395 CharsetMatch[] matches = detector.detectAll();
396 for (CharsetMatch match : matches) {
398 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
399 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
403 return possibleEncoders;
404 }
catch (IOException | IllegalArgumentException ex) {
405 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
406 return possibleEncoders;
410 }
catch (IOException ex) {
411 logger.log(Level.INFO,
"Failed to close input stream");
416 private void addErrorMessage(String msg) {
417 errors.append(
"<li>").append(msg).append(
"</li>");