19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
72 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
73 private DefaultMessageBuilder messageBuilder;
74 private IngestServices services;
75 private StringBuilder errors;
80 private static final String HTML_TYPE =
"text/html";
85 private String localPath;
87 MboxParser(IngestServices services, String localPath) {
88 this.services = services;
89 this.localPath = localPath;
90 messageBuilder =
new DefaultMessageBuilder();
91 MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
93 messageBuilder.setMimeEntityConfig(config);
94 errors =
new StringBuilder();
97 static boolean isValidMimeTypeMbox(byte[] buffer) {
98 return (
new String(buffer)).startsWith(
"From ");
108 List<EmailMessage> parse(File mboxFile,
long fileID) {
110 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
112 CharsetEncoder theEncoder = null;
113 Iterable<CharBufferWrapper> mboxIterator = null;
116 for (CharsetEncoder encoder : encoders) {
118 mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
119 theEncoder = encoder;
121 }
catch (CharConversionException | UnsupportedCharsetException ex) {
123 }
catch (IllegalArgumentException ex) {
125 }
catch (IOException ex) {
126 logger.log(Level.WARNING,
"couldn't find mbox file.", ex);
127 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToReadFile"));
128 return new ArrayList<>();
133 if (mboxIterator == null || theEncoder == null) {
134 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.couldntFindCharset"));
135 return new ArrayList<>();
138 List<EmailMessage> emails =
new ArrayList<>();
142 for (CharBufferWrapper message : mboxIterator) {
144 Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
145 emails.add(extractEmail(msg, fileID));
146 }
catch (RuntimeException | IOException ex) {
147 logger.log(Level.WARNING,
"Failed to get message from mbox: {0}", ex.getMessage());
154 NbBundle.getMessage(
this.getClass(),
"MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
160 return errors.toString();
171 private EmailMessage extractEmail(Message msg,
long fileID) {
172 EmailMessage email =
new EmailMessage();
174 email.setSender(getAddresses(msg.getFrom()));
175 email.setRecipients(getAddresses(msg.getTo()));
176 email.setBcc(getAddresses(msg.getBcc()));
177 email.setCc(getAddresses(msg.getCc()));
178 email.setSubject(msg.getSubject());
179 email.setSentDate(msg.getDate());
180 email.setLocalPath(localPath);
181 email.setMessageID(msg.getMessageId());
183 Field field = msg.getHeader().getField(
"in-reply-to");
184 String inReplyTo = null;
187 inReplyTo = field.getBody();
188 email.setInReplyToID(inReplyTo);
191 field = msg.getHeader().getField(
"references");
193 List<String> references =
new ArrayList<>();
194 for (String
id : field.getBody().split(
">")) {
195 references.add(
id.trim() +
">");
198 if (!references.contains(inReplyTo)) {
199 references.add(inReplyTo);
202 email.setReferences(references);
206 if (msg.isMultipart()) {
207 handleMultipart(email, (Multipart) msg.getBody(), fileID);
209 handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
223 private void handleMultipart(EmailMessage email, Multipart multi,
long fileID) {
224 List<Entity> entities = multi.getBodyParts();
225 for (
int index = 0; index < entities.size(); index++) {
226 Entity e = entities.get(index);
227 if (e.isMultipart()) {
228 handleMultipart(email, (Multipart) e.getBody(), fileID);
229 }
else if (e.getDispositionType() != null
230 && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
231 handleAttachment(email, e, fileID, index);
232 }
else if (e.getMimeType().equals(HTML_TYPE)
233 || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
234 handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
251 private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
254 r =
new BufferedReader(tb.getReader());
255 StringBuilder bodyString =
new StringBuilder();
256 StringBuilder headersString =
new StringBuilder();
258 while ((line = r.readLine()) != null) {
259 bodyString.append(line).append(
"\n");
262 headersString.append(
"\n-----HEADERS-----\n");
263 for(Field field: fields) {
264 String nextLine = field.getName() +
": " + field.getBody();
265 headersString.append(
"\n").append(nextLine);
267 headersString.append(
"\n\n---END HEADERS--\n\n");
269 email.setHeaders(headersString.toString());
272 case ContentTypeField.TYPE_TEXT_PLAIN:
273 email.setTextBody(bodyString.toString());
276 email.setHtmlBody(bodyString.toString());
282 }
catch (IOException ex) {
283 logger.log(Level.WARNING,
"Error getting text body of mbox message", ex);
294 @NbBundle.Messages ({
"MboxParser.handleAttch.noOpenCase.errMsg=Exception while getting open case."})
295 private void handleAttachment(EmailMessage email, Entity e,
long fileID,
int index) {
296 String outputDirPath;
297 String relModuleOutputPath;
299 outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
300 relModuleOutputPath = ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() + File.separator;
301 }
catch (NoCurrentCaseException ex) {
302 addErrorMessage(Bundle.MboxParser_handleAttch_noOpenCase_errMsg());
303 logger.log(Level.SEVERE, Bundle.MboxParser_handleAttch_noOpenCase_errMsg(), ex);
306 String filename = e.getFilename();
310 filename = filename.replaceAll(
"\\?",
"_");
311 filename = filename.replaceAll(
"<",
"_");
312 filename = filename.replaceAll(
">",
"_");
313 filename = filename.replaceAll(
":",
"_");
314 filename = filename.replaceAll(
"\"",
"_");
315 filename = filename.replaceAll(
"/",
"_");
316 filename = filename.replaceAll(
"\\\\",
"_");
317 filename = filename.replaceAll(
"|",
"_");
318 filename = filename.replaceAll(
"\\*",
"_");
322 if (filename.length() > 64) {
323 filename = UUID.randomUUID().toString();
326 String uniqueFilename = fileID +
"-" + index +
"-" + email.getSentDate() +
"-" + filename;
327 String outPath = outputDirPath + uniqueFilename;
328 EncodedFileOutputStream fos;
331 fos =
new EncodedFileOutputStream(
new FileOutputStream(outPath), TskData.EncodingType.XOR1);
332 }
catch (IOException ex) {
334 NbBundle.getMessage(
this.getClass(),
335 "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
336 logger.log(Level.INFO,
"Failed to create file output stream for: " + outPath, ex);
341 Body b = e.getBody();
342 if (b instanceof BinaryBody) {
348 }
catch (IOException ex) {
349 logger.log(Level.INFO,
"Failed to write mbox email attachment to disk.", ex);
350 addErrorMessage(NbBundle.getMessage(
this.getClass(),
"MboxParser.handleAttch.failedWriteToDisk", filename));
355 }
catch (IOException ex) {
356 logger.log(Level.INFO,
"Failed to close file output stream", ex);
360 EmailMessage.Attachment attach =
new EmailMessage.Attachment();
361 attach.setName(filename);
362 attach.setLocalPath(relModuleOutputPath + uniqueFilename);
363 attach.setSize(
new File(outPath).length());
364 attach.setEncodingType(TskData.EncodingType.XOR1);
365 email.addAttachment(attach);
376 private String getAddresses(MailboxList mailboxList) {
377 if (mailboxList == null) {
380 StringBuilder addresses =
new StringBuilder();
381 for (Mailbox m : mailboxList) {
382 addresses.append(m.toString()).append(
"; ");
384 return addresses.toString();
395 private String getAddresses(AddressList addressList) {
396 return (addressList == null) ?
"" : getAddresses(addressList.flatten());
407 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
409 List<CharsetEncoder> possibleEncoders =
new ArrayList<>();
411 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
412 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
413 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
414 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
415 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
416 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
419 is =
new BufferedInputStream(
new FileInputStream(mboxFile));
420 }
catch (FileNotFoundException ex) {
421 logger.log(Level.WARNING,
"Failed to find mbox file while detecting charset");
422 return possibleEncoders;
426 CharsetDetector detector =
new CharsetDetector();
427 detector.setText(is);
428 CharsetMatch[] matches = detector.detectAll();
429 for (CharsetMatch match : matches) {
431 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
432 }
catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
436 return possibleEncoders;
437 }
catch (IOException | IllegalArgumentException ex) {
438 logger.log(Level.WARNING,
"Failed to detect charset of mbox file.", ex);
439 return possibleEncoders;
443 }
catch (IOException ex) {
444 logger.log(Level.INFO,
"Failed to close input stream");
449 private void addErrorMessage(String msg) {
450 errors.append(
"<li>").append(msg).append(
"</li>");