Autopsy  4.4
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2014 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
60 import org.sleuthkit.datamodel.TskData;
61 import org.sleuthkit.datamodel.EncodedFileOutputStream;
62 
69 class MboxParser {
70 
71  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
72  private DefaultMessageBuilder messageBuilder;
73  private IngestServices services;
74  private StringBuilder errors;
75 
79  private static final String HTML_TYPE = "text/html"; //NON-NLS
80 
84  private String localPath;
85 
86  MboxParser(IngestServices services, String localPath) {
87  this.services = services;
88  this.localPath = localPath;
89  messageBuilder = new DefaultMessageBuilder();
90  MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
91  // disable line length checks.
92  messageBuilder.setMimeEntityConfig(config);
93  errors = new StringBuilder();
94  }
95 
96  static boolean isValidMimeTypeMbox(byte[] buffer) {
97  return (new String(buffer)).startsWith("From "); //NON-NLS
98  }
99 
107  List<EmailMessage> parse(File mboxFile, long fileID) {
108  // Detect possible charsets
109  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
110 
111  CharsetEncoder theEncoder = null;
112  Iterable<CharBufferWrapper> mboxIterator = null;
113  // Loop through the possible encoders and find the first one that works.
114  // That will usually be one of the first ones.
115  for (CharsetEncoder encoder : encoders) {
116  try {
117  mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
118  theEncoder = encoder;
119  break;
120  } catch (CharConversionException | UnsupportedCharsetException ex) {
121  // Not the right encoder
122  } catch (IllegalArgumentException ex) {
123  // Not the right encoder
124  } catch (IOException ex) {
125  logger.log(Level.WARNING, "couldn't find mbox file.", ex); //NON-NLS
126  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
127  return new ArrayList<>();
128  }
129  }
130 
131  // If no encoders work, post an error message and return.
132  if (mboxIterator == null || theEncoder == null) {
133  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.couldntFindCharset"));
134  return new ArrayList<>();
135  }
136 
137  List<EmailMessage> emails = new ArrayList<>();
138  long failCount = 0;
139 
140  // Parse each message and extract an EmailMessage structure
141  for (CharBufferWrapper message : mboxIterator) {
142  try {
143  Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
144  emails.add(extractEmail(msg, fileID));
145  } catch (RuntimeException | IOException ex) {
146  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
147  failCount++;
148  }
149  }
150 
151  if (failCount > 0) {
152  addErrorMessage(
153  NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
154  }
155  return emails;
156  }
157 
158  String getErrors() {
159  return errors.toString();
160  }
161 
170  private EmailMessage extractEmail(Message msg, long fileID) {
171  EmailMessage email = new EmailMessage();
172  // Basic Info
173  email.setSender(getAddresses(msg.getFrom()));
174  email.setRecipients(getAddresses(msg.getTo()));
175  email.setBcc(getAddresses(msg.getBcc()));
176  email.setCc(getAddresses(msg.getCc()));
177  email.setSubject(msg.getSubject());
178  email.setSentDate(msg.getDate());
179  email.setLocalPath(localPath);
180 
181  // Body
182  if (msg.isMultipart()) {
183  handleMultipart(email, (Multipart) msg.getBody(), fileID);
184  } else {
185  handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
186  }
187 
188  return email;
189  }
190 
199  private void handleMultipart(EmailMessage email, Multipart multi, long fileID) {
200  List<Entity> entities = multi.getBodyParts();
201  for (int index = 0; index < entities.size(); index++) {
202  Entity e = entities.get(index);
203  if (e.isMultipart()) {
204  handleMultipart(email, (Multipart) e.getBody(), fileID);
205  } else if (e.getDispositionType() != null
206  && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
207  handleAttachment(email, e, fileID, index);
208  } else if (e.getMimeType().equals(HTML_TYPE)
209  || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
210  handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
211  } else {
212  // Ignore other types.
213  }
214  }
215  }
216 
227  private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
228  BufferedReader r;
229  try {
230  r = new BufferedReader(tb.getReader());
231  StringBuilder bodyString = new StringBuilder();
232  StringBuilder headersString = new StringBuilder();
233  String line;
234  while ((line = r.readLine()) != null) {
235  bodyString.append(line).append("\n");
236  }
237 
238  headersString.append("\n-----HEADERS-----\n");
239  for(Field field: fields) {
240  String nextLine = field.getName() + ": " + field.getBody();
241  headersString.append("\n").append(nextLine);
242  }
243  headersString.append("\n\n---END HEADERS--\n\n");
244 
245  email.setHeaders(headersString.toString());
246 
247  switch (type) {
248  case ContentTypeField.TYPE_TEXT_PLAIN:
249  email.setTextBody(bodyString.toString());
250  break;
251  case HTML_TYPE:
252  email.setHtmlBody(bodyString.toString());
253  break;
254  default:
255  // Not interested in other text types.
256  break;
257  }
258  } catch (IOException ex) {
259  logger.log(Level.WARNING, "Error getting text body of mbox message", ex); //NON-NLS
260  }
261  }
262 
270  private void handleAttachment(EmailMessage email, Entity e, long fileID, int index) {
271  String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
272  String filename = e.getFilename();
273 
274  // sanitize name. Had an attachment with a Japanese encoded path that
275  // invalid characters and attachment could not be saved.
276  filename = filename.replaceAll("\\?", "_");
277  filename = filename.replaceAll("<", "_");
278  filename = filename.replaceAll(">", "_");
279  filename = filename.replaceAll(":", "_");
280  filename = filename.replaceAll("\"", "_");
281  filename = filename.replaceAll("/", "_");
282  filename = filename.replaceAll("\\\\", "_");
283  filename = filename.replaceAll("|", "_");
284  filename = filename.replaceAll("\\*", "_");
285 
286  // also had some crazy long names, so make random one if we get those.
287  // also from Japanese image that had encoded name
288  if (filename.length() > 64) {
289  filename = UUID.randomUUID().toString();
290  }
291 
292  String uniqueFilename = fileID + "-" + index + "-" + email.getSentDate() + "-" + filename;
293  String outPath = outputDirPath + uniqueFilename;
294  EncodedFileOutputStream fos;
295  BinaryBody bb;
296  try {
297  fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1);
298  } catch (IOException ex) {
299  addErrorMessage(
300  NbBundle.getMessage(this.getClass(),
301  "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
302  logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
303  return;
304  }
305 
306  try {
307  Body b = e.getBody();
308  if (b instanceof BinaryBody) {
309  bb = (BinaryBody) b;
310  bb.writeTo(fos);
311  } else {
312  // This could potentially be other types. Only seen this once.
313  }
314  } catch (IOException ex) {
315  logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
316  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
317  return;
318  } finally {
319  try {
320  fos.close();
321  } catch (IOException ex) {
322  logger.log(Level.INFO, "Failed to close file output stream", ex); //NON-NLS
323  }
324  }
325 
326  EmailMessage.Attachment attach = new EmailMessage.Attachment();
327  attach.setName(filename);
328  attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
329  + File.separator + uniqueFilename);
330  attach.setSize(new File(outPath).length());
331  attach.setEncodingType(TskData.EncodingType.XOR1);
332  email.addAttachment(attach);
333  }
334 
343  private String getAddresses(MailboxList mailboxList) {
344  if (mailboxList == null) {
345  return "";
346  }
347  StringBuilder addresses = new StringBuilder();
348  for (Mailbox m : mailboxList) {
349  addresses.append(m.toString()).append("; ");
350  }
351  return addresses.toString();
352  }
353 
362  private String getAddresses(AddressList addressList) {
363  return (addressList == null) ? "" : getAddresses(addressList.flatten());
364  }
365 
374  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
375  InputStream is;
376  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
377 
378  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
379  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
380  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
381  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
382  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
383  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
384 
385  try {
386  is = new BufferedInputStream(new FileInputStream(mboxFile));
387  } catch (FileNotFoundException ex) {
388  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
389  return possibleEncoders;
390  }
391 
392  try {
393  CharsetDetector detector = new CharsetDetector();
394  detector.setText(is);
395  CharsetMatch[] matches = detector.detectAll();
396  for (CharsetMatch match : matches) {
397  try {
398  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
399  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
400  // Don't add unsupported charsets to the list
401  }
402  }
403  return possibleEncoders;
404  } catch (IOException | IllegalArgumentException ex) {
405  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
406  return possibleEncoders;
407  } finally {
408  try {
409  is.close();
410  } catch (IOException ex) {
411  logger.log(Level.INFO, "Failed to close input stream"); //NON-NLS
412  }
413  }
414  }
415 
416  private void addErrorMessage(String msg) {
417  errors.append("<li>").append(msg).append("</li>"); //NON-NLS
418  }
419 }

Copyright © 2012-2016 Basis Technology. Generated on: Tue Jun 13 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.