Autopsy  4.9.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2014 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
61 import org.sleuthkit.datamodel.TskData;
62 import org.sleuthkit.datamodel.EncodedFileOutputStream;
63 
70 class MboxParser {
71 
72  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
73  private DefaultMessageBuilder messageBuilder;
74  private IngestServices services;
75  private StringBuilder errors;
76 
80  private static final String HTML_TYPE = "text/html"; //NON-NLS
81 
85  private String localPath;
86 
87  MboxParser(IngestServices services, String localPath) {
88  this.services = services;
89  this.localPath = localPath;
90  messageBuilder = new DefaultMessageBuilder();
91  MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
92  // disable line length checks.
93  messageBuilder.setMimeEntityConfig(config);
94  errors = new StringBuilder();
95  }
96 
97  static boolean isValidMimeTypeMbox(byte[] buffer) {
98  return (new String(buffer)).startsWith("From "); //NON-NLS
99  }
100 
108  List<EmailMessage> parse(File mboxFile, long fileID) {
109  // Detect possible charsets
110  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
111 
112  CharsetEncoder theEncoder = null;
113  Iterable<CharBufferWrapper> mboxIterator = null;
114  // Loop through the possible encoders and find the first one that works.
115  // That will usually be one of the first ones.
116  for (CharsetEncoder encoder : encoders) {
117  try {
118  mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
119  theEncoder = encoder;
120  break;
121  } catch (CharConversionException | UnsupportedCharsetException ex) {
122  // Not the right encoder
123  } catch (IllegalArgumentException ex) {
124  // Not the right encoder
125  } catch (IOException ex) {
126  logger.log(Level.WARNING, "couldn't find mbox file.", ex); //NON-NLS
127  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
128  return new ArrayList<>();
129  }
130  }
131 
132  // If no encoders work, post an error message and return.
133  if (mboxIterator == null || theEncoder == null) {
134  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.couldntFindCharset"));
135  return new ArrayList<>();
136  }
137 
138  List<EmailMessage> emails = new ArrayList<>();
139  long failCount = 0;
140 
141  // Parse each message and extract an EmailMessage structure
142  for (CharBufferWrapper message : mboxIterator) {
143  try {
144  Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
145  emails.add(extractEmail(msg, fileID));
146  } catch (RuntimeException | IOException ex) {
147  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
148  failCount++;
149  }
150  }
151 
152  if (failCount > 0) {
153  addErrorMessage(
154  NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
155  }
156  return emails;
157  }
158 
159  String getErrors() {
160  return errors.toString();
161  }
162 
171  private EmailMessage extractEmail(Message msg, long fileID) {
172  EmailMessage email = new EmailMessage();
173  // Basic Info
174  email.setSender(getAddresses(msg.getFrom()));
175  email.setRecipients(getAddresses(msg.getTo()));
176  email.setBcc(getAddresses(msg.getBcc()));
177  email.setCc(getAddresses(msg.getCc()));
178  email.setSubject(msg.getSubject());
179  email.setSentDate(msg.getDate());
180  email.setLocalPath(localPath);
181 
182  // Body
183  if (msg.isMultipart()) {
184  handleMultipart(email, (Multipart) msg.getBody(), fileID);
185  } else {
186  handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
187  }
188 
189  return email;
190  }
191 
200  private void handleMultipart(EmailMessage email, Multipart multi, long fileID) {
201  List<Entity> entities = multi.getBodyParts();
202  for (int index = 0; index < entities.size(); index++) {
203  Entity e = entities.get(index);
204  if (e.isMultipart()) {
205  handleMultipart(email, (Multipart) e.getBody(), fileID);
206  } else if (e.getDispositionType() != null
207  && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
208  handleAttachment(email, e, fileID, index);
209  } else if (e.getMimeType().equals(HTML_TYPE)
210  || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
211  handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
212  } else {
213  // Ignore other types.
214  }
215  }
216  }
217 
228  private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
229  BufferedReader r;
230  try {
231  r = new BufferedReader(tb.getReader());
232  StringBuilder bodyString = new StringBuilder();
233  StringBuilder headersString = new StringBuilder();
234  String line;
235  while ((line = r.readLine()) != null) {
236  bodyString.append(line).append("\n");
237  }
238 
239  headersString.append("\n-----HEADERS-----\n");
240  for(Field field: fields) {
241  String nextLine = field.getName() + ": " + field.getBody();
242  headersString.append("\n").append(nextLine);
243  }
244  headersString.append("\n\n---END HEADERS--\n\n");
245 
246  email.setHeaders(headersString.toString());
247 
248  switch (type) {
249  case ContentTypeField.TYPE_TEXT_PLAIN:
250  email.setTextBody(bodyString.toString());
251  break;
252  case HTML_TYPE:
253  email.setHtmlBody(bodyString.toString());
254  break;
255  default:
256  // Not interested in other text types.
257  break;
258  }
259  } catch (IOException ex) {
260  logger.log(Level.WARNING, "Error getting text body of mbox message", ex); //NON-NLS
261  }
262  }
263 
271  @NbBundle.Messages ({"MboxParser.handleAttch.noOpenCase.errMsg=Exception while getting open case."})
272  private void handleAttachment(EmailMessage email, Entity e, long fileID, int index) {
273  String outputDirPath;
274  String relModuleOutputPath;
275  try {
276  outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
277  relModuleOutputPath = ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() + File.separator;
278  } catch (NoCurrentCaseException ex) {
279  addErrorMessage(Bundle.MboxParser_handleAttch_noOpenCase_errMsg());
280  logger.log(Level.SEVERE, Bundle.MboxParser_handleAttch_noOpenCase_errMsg(), ex); //NON-NLS
281  return;
282  }
283  String filename = e.getFilename();
284 
285  // sanitize name. Had an attachment with a Japanese encoded path that
286  // invalid characters and attachment could not be saved.
287  filename = filename.replaceAll("\\?", "_");
288  filename = filename.replaceAll("<", "_");
289  filename = filename.replaceAll(">", "_");
290  filename = filename.replaceAll(":", "_");
291  filename = filename.replaceAll("\"", "_");
292  filename = filename.replaceAll("/", "_");
293  filename = filename.replaceAll("\\\\", "_");
294  filename = filename.replaceAll("|", "_");
295  filename = filename.replaceAll("\\*", "_");
296 
297  // also had some crazy long names, so make random one if we get those.
298  // also from Japanese image that had encoded name
299  if (filename.length() > 64) {
300  filename = UUID.randomUUID().toString();
301  }
302 
303  String uniqueFilename = fileID + "-" + index + "-" + email.getSentDate() + "-" + filename;
304  String outPath = outputDirPath + uniqueFilename;
305  EncodedFileOutputStream fos;
306  BinaryBody bb;
307  try {
308  fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1);
309  } catch (IOException ex) {
310  addErrorMessage(
311  NbBundle.getMessage(this.getClass(),
312  "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
313  logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
314  return;
315  }
316 
317  try {
318  Body b = e.getBody();
319  if (b instanceof BinaryBody) {
320  bb = (BinaryBody) b;
321  bb.writeTo(fos);
322  } else {
323  // This could potentially be other types. Only seen this once.
324  }
325  } catch (IOException ex) {
326  logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
327  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
328  return;
329  } finally {
330  try {
331  fos.close();
332  } catch (IOException ex) {
333  logger.log(Level.INFO, "Failed to close file output stream", ex); //NON-NLS
334  }
335  }
336 
337  EmailMessage.Attachment attach = new EmailMessage.Attachment();
338  attach.setName(filename);
339  attach.setLocalPath(relModuleOutputPath + uniqueFilename);
340  attach.setSize(new File(outPath).length());
341  attach.setEncodingType(TskData.EncodingType.XOR1);
342  email.addAttachment(attach);
343  }
344 
353  private String getAddresses(MailboxList mailboxList) {
354  if (mailboxList == null) {
355  return "";
356  }
357  StringBuilder addresses = new StringBuilder();
358  for (Mailbox m : mailboxList) {
359  addresses.append(m.toString()).append("; ");
360  }
361  return addresses.toString();
362  }
363 
372  private String getAddresses(AddressList addressList) {
373  return (addressList == null) ? "" : getAddresses(addressList.flatten());
374  }
375 
384  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
385  InputStream is;
386  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
387 
388  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
389  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
390  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
391  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
392  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
393  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
394 
395  try {
396  is = new BufferedInputStream(new FileInputStream(mboxFile));
397  } catch (FileNotFoundException ex) {
398  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
399  return possibleEncoders;
400  }
401 
402  try {
403  CharsetDetector detector = new CharsetDetector();
404  detector.setText(is);
405  CharsetMatch[] matches = detector.detectAll();
406  for (CharsetMatch match : matches) {
407  try {
408  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
409  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
410  // Don't add unsupported charsets to the list
411  }
412  }
413  return possibleEncoders;
414  } catch (IOException | IllegalArgumentException ex) {
415  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
416  return possibleEncoders;
417  } finally {
418  try {
419  is.close();
420  } catch (IOException ex) {
421  logger.log(Level.INFO, "Failed to close input stream"); //NON-NLS
422  }
423  }
424  }
425 
426  private void addErrorMessage(String msg) {
427  errors.append("<li>").append(msg).append("</li>"); //NON-NLS
428  }
429 }

Copyright © 2012-2018 Basis Technology. Generated on: Tue Dec 18 2018
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.