Autopsy  4.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2014 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
60 
67 class MboxParser {
68 
69  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
70  private DefaultMessageBuilder messageBuilder;
71  private IngestServices services;
72  private StringBuilder errors;
73 
77  private static final String HTML_TYPE = "text/html"; //NON-NLS
78 
82  private String localPath;
83 
84  MboxParser(IngestServices services, String localPath) {
85  this.services = services;
86  this.localPath = localPath;
87  messageBuilder = new DefaultMessageBuilder();
88  MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
89  // disable line length checks.
90  messageBuilder.setMimeEntityConfig(config);
91  errors = new StringBuilder();
92  }
93 
94  static boolean isValidMimeTypeMbox(byte[] buffer) {
95  return (new String(buffer)).startsWith("From "); //NON-NLS
96  }
97 
105  List<EmailMessage> parse(File mboxFile, long fileID) {
106  // Detect possible charsets
107  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
108 
109  CharsetEncoder theEncoder = null;
110  Iterable<CharBufferWrapper> mboxIterator = null;
111  // Loop through the possible encoders and find the first one that works.
112  // That will usually be one of the first ones.
113  for (CharsetEncoder encoder : encoders) {
114  try {
115  mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
116  theEncoder = encoder;
117  break;
118  } catch (CharConversionException | UnsupportedCharsetException ex) {
119  // Not the right encoder
120  } catch (IllegalArgumentException ex) {
121  // Not the right encoder
122  } catch (IOException ex) {
123  logger.log(Level.WARNING, "couldn't find mbox file.", ex); //NON-NLS
124  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
125  return new ArrayList<>();
126  }
127  }
128 
129  // If no encoders work, post an error message and return.
130  if (mboxIterator == null || theEncoder == null) {
131  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.couldntFindCharset"));
132  return new ArrayList<>();
133  }
134 
135  List<EmailMessage> emails = new ArrayList<>();
136  long failCount = 0;
137 
138  // Parse each message and extract an EmailMessage structure
139  for (CharBufferWrapper message : mboxIterator) {
140  try {
141  Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
142  emails.add(extractEmail(msg, fileID));
143  } catch (RuntimeException | IOException ex) {
144  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
145  failCount++;
146  }
147  }
148 
149  if (failCount > 0) {
150  addErrorMessage(
151  NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
152  }
153  return emails;
154  }
155 
156  String getErrors() {
157  return errors.toString();
158  }
159 
168  private EmailMessage extractEmail(Message msg, long fileID) {
169  EmailMessage email = new EmailMessage();
170  // Basic Info
171  email.setSender(getAddresses(msg.getFrom()));
172  email.setRecipients(getAddresses(msg.getTo()));
173  email.setBcc(getAddresses(msg.getBcc()));
174  email.setCc(getAddresses(msg.getCc()));
175  email.setSubject(msg.getSubject());
176  email.setSentDate(msg.getDate());
177  email.setLocalPath(localPath);
178 
179  // Body
180  if (msg.isMultipart()) {
181  handleMultipart(email, (Multipart) msg.getBody(), fileID);
182  } else {
183  handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
184  }
185 
186  return email;
187  }
188 
197  private void handleMultipart(EmailMessage email, Multipart multi, long fileID) {
198  List<Entity> entities = multi.getBodyParts();
199  for (int index = 0; index < entities.size(); index++) {
200  Entity e = entities.get(index);
201  if (e.isMultipart()) {
202  handleMultipart(email, (Multipart) e.getBody(), fileID);
203  } else if (e.getDispositionType() != null
204  && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
205  handleAttachment(email, e, fileID, index);
206  } else if (e.getMimeType().equals(HTML_TYPE)
207  || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
208  handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
209  } else {
210  // Ignore other types.
211  }
212  }
213  }
214 
225  private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
226  BufferedReader r;
227  try {
228  r = new BufferedReader(tb.getReader());
229  StringBuilder bodyString = new StringBuilder();
230  String line;
231  while ((line = r.readLine()) != null) {
232  bodyString.append(line).append("\n");
233  }
234  bodyString.append("\n-----HEADERS-----\n");
235  for(Field field: fields) {
236  String nextLine = field.getName() + ": " + field.getBody();
237  bodyString.append("\n").append(nextLine);
238  }
239  bodyString.append("\n\n---END HEADERS--\n\n");
240 
241  switch (type) {
242  case ContentTypeField.TYPE_TEXT_PLAIN:
243  email.setTextBody(bodyString.toString());
244  break;
245  case HTML_TYPE:
246  email.setHtmlBody(bodyString.toString());
247  break;
248  default:
249  // Not interested in other text types.
250  break;
251  }
252  } catch (IOException ex) {
253  logger.log(Level.WARNING, "Error getting text body of mbox message", ex); //NON-NLS
254  }
255  }
256 
264  private void handleAttachment(EmailMessage email, Entity e, long fileID, int index) {
265  String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
266  String filename = e.getFilename();
267 
268  // sanitize name. Had an attachment with a Japanese encoded path that
269  // invalid characters and attachment could not be saved.
270  filename = filename.replaceAll("\\?", "_");
271  filename = filename.replaceAll("<", "_");
272  filename = filename.replaceAll(">", "_");
273  filename = filename.replaceAll(":", "_");
274  filename = filename.replaceAll("\"", "_");
275  filename = filename.replaceAll("/", "_");
276  filename = filename.replaceAll("\\\\", "_");
277  filename = filename.replaceAll("|", "_");
278  filename = filename.replaceAll("\\*", "_");
279 
280  // also had some crazy long names, so make random one if we get those.
281  // also from Japanese image that had encoded name
282  if (filename.length() > 64) {
283  filename = UUID.randomUUID().toString();
284  }
285 
286  String uniqueFilename = fileID + "-" + index + "-" + email.getSentDate() + "-" + filename;
287  String outPath = outputDirPath + uniqueFilename;
288  FileOutputStream fos;
289  BinaryBody bb;
290  try {
291  fos = new FileOutputStream(outPath);
292  } catch (FileNotFoundException ex) {
293  addErrorMessage(
294  NbBundle.getMessage(this.getClass(),
295  "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
296  logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
297  return;
298  }
299 
300  try {
301  Body b = e.getBody();
302  if (b instanceof BinaryBody) {
303  bb = (BinaryBody) b;
304  bb.writeTo(fos);
305  } else {
306  // This could potentially be other types. Only seen this once.
307  }
308  } catch (IOException ex) {
309  logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
310  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
311  return;
312  } finally {
313  try {
314  fos.close();
315  } catch (IOException ex) {
316  logger.log(Level.INFO, "Failed to close file output stream", ex); //NON-NLS
317  }
318  }
319 
320  EmailMessage.Attachment attach = new EmailMessage.Attachment();
321  attach.setName(filename);
322  attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
323  + File.separator + uniqueFilename);
324  attach.setSize(new File(outPath).length());
325  email.addAttachment(attach);
326  }
327 
336  private String getAddresses(MailboxList mailboxList) {
337  if (mailboxList == null) {
338  return "";
339  }
340  StringBuilder addresses = new StringBuilder();
341  for (Mailbox m : mailboxList) {
342  addresses.append(m.toString()).append("; ");
343  }
344  return addresses.toString();
345  }
346 
355  private String getAddresses(AddressList addressList) {
356  return (addressList == null) ? "" : getAddresses(addressList.flatten());
357  }
358 
367  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
368  InputStream is;
369  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
370 
371  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
372  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
373  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
374  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
375  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
376  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
377 
378  try {
379  is = new BufferedInputStream(new FileInputStream(mboxFile));
380  } catch (FileNotFoundException ex) {
381  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
382  return possibleEncoders;
383  }
384 
385  try {
386  CharsetDetector detector = new CharsetDetector();
387  detector.setText(is);
388  CharsetMatch[] matches = detector.detectAll();
389  for (CharsetMatch match : matches) {
390  try {
391  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
392  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
393  // Don't add unsupported charsets to the list
394  }
395  }
396  return possibleEncoders;
397  } catch (IOException | IllegalArgumentException ex) {
398  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
399  return possibleEncoders;
400  } finally {
401  try {
402  is.close();
403  } catch (IOException ex) {
404  logger.log(Level.INFO, "Failed to close input stream"); //NON-NLS
405  }
406  }
407  }
408 
409  private void addErrorMessage(String msg) {
410  errors.append("<li>").append(msg).append("</li>"); //NON-NLS
411  }
412 }
Logger getLogger(String moduleDisplayName)

Copyright © 2012-2015 Basis Technology. Generated on: Wed Apr 6 2016
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.