Autopsy  4.11.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2014 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.BufferedReader;
23 import java.io.CharConversionException;
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetEncoder;
32 import java.nio.charset.IllegalCharsetNameException;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.charset.UnsupportedCharsetException;
35 import java.util.ArrayList;
36 import java.util.List;
37 import java.util.UUID;
38 import java.util.logging.Level;
40 import org.apache.james.mime4j.dom.BinaryBody;
41 import org.apache.james.mime4j.dom.Body;
42 import org.apache.james.mime4j.dom.Entity;
43 import org.apache.james.mime4j.dom.Message;
44 import org.apache.james.mime4j.dom.Multipart;
45 import org.apache.james.mime4j.dom.TextBody;
46 import org.apache.james.mime4j.dom.address.AddressList;
47 import org.apache.james.mime4j.dom.address.Mailbox;
48 import org.apache.james.mime4j.dom.address.MailboxList;
49 import org.apache.james.mime4j.dom.field.ContentDispositionField;
50 import org.apache.james.mime4j.dom.field.ContentTypeField;
51 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
52 import org.apache.james.mime4j.mboxiterator.MboxIterator;
53 import org.apache.james.mime4j.message.DefaultMessageBuilder;
54 import org.apache.james.mime4j.stream.Field;
55 import org.apache.james.mime4j.stream.MimeConfig;
56 import org.apache.tika.parser.txt.CharsetDetector;
57 import org.apache.tika.parser.txt.CharsetMatch;
58 import org.openide.util.NbBundle;
61 import org.sleuthkit.datamodel.TskData;
62 import org.sleuthkit.datamodel.EncodedFileOutputStream;
63 
70 class MboxParser {
71 
72  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
73  private DefaultMessageBuilder messageBuilder;
74  private IngestServices services;
75  private StringBuilder errors;
76 
80  private static final String HTML_TYPE = "text/html"; //NON-NLS
81 
85  private String localPath;
86 
87  MboxParser(IngestServices services, String localPath) {
88  this.services = services;
89  this.localPath = localPath;
90  messageBuilder = new DefaultMessageBuilder();
91  MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
92  // disable line length checks.
93  messageBuilder.setMimeEntityConfig(config);
94  errors = new StringBuilder();
95  }
96 
97  static boolean isValidMimeTypeMbox(byte[] buffer) {
98  return (new String(buffer)).startsWith("From "); //NON-NLS
99  }
100 
108  List<EmailMessage> parse(File mboxFile, long fileID) {
109  // Detect possible charsets
110  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
111 
112  CharsetEncoder theEncoder = null;
113  Iterable<CharBufferWrapper> mboxIterator = null;
114  // Loop through the possible encoders and find the first one that works.
115  // That will usually be one of the first ones.
116  for (CharsetEncoder encoder : encoders) {
117  try {
118  mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
119  theEncoder = encoder;
120  break;
121  } catch (CharConversionException | UnsupportedCharsetException ex) {
122  // Not the right encoder
123  } catch (IllegalArgumentException ex) {
124  // Not the right encoder
125  } catch (IOException ex) {
126  logger.log(Level.WARNING, "couldn't find mbox file.", ex); //NON-NLS
127  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
128  return new ArrayList<>();
129  }
130  }
131 
132  // If no encoders work, post an error message and return.
133  if (mboxIterator == null || theEncoder == null) {
134  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.couldntFindCharset"));
135  return new ArrayList<>();
136  }
137 
138  List<EmailMessage> emails = new ArrayList<>();
139  long failCount = 0;
140 
141  // Parse each message and extract an EmailMessage structure
142  for (CharBufferWrapper message : mboxIterator) {
143  try {
144  Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
145  emails.add(extractEmail(msg, fileID));
146  } catch (RuntimeException | IOException ex) {
147  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
148  failCount++;
149  }
150  }
151 
152  if (failCount > 0) {
153  addErrorMessage(
154  NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
155  }
156  return emails;
157  }
158 
159  String getErrors() {
160  return errors.toString();
161  }
162 
171  private EmailMessage extractEmail(Message msg, long fileID) {
172  EmailMessage email = new EmailMessage();
173  // Basic Info
174  email.setSender(getAddresses(msg.getFrom()));
175  email.setRecipients(getAddresses(msg.getTo()));
176  email.setBcc(getAddresses(msg.getBcc()));
177  email.setCc(getAddresses(msg.getCc()));
178  email.setSubject(msg.getSubject());
179  email.setSentDate(msg.getDate());
180  email.setLocalPath(localPath);
181  email.setMessageID(msg.getMessageId());
182 
183  Field field = msg.getHeader().getField("in-reply-to"); //NON-NLS
184  String inReplyTo = null;
185 
186  if (field != null) {
187  inReplyTo = field.getBody();
188  email.setInReplyToID(inReplyTo);
189  }
190 
191  field = msg.getHeader().getField("references");
192  if (field != null) {
193  List<String> references = new ArrayList<>();
194  for (String id : field.getBody().split(">")) {
195  references.add(id.trim() + ">");
196  }
197 
198  if (!references.contains(inReplyTo)) {
199  references.add(inReplyTo);
200  }
201 
202  email.setReferences(references);
203  }
204 
205  // Body
206  if (msg.isMultipart()) {
207  handleMultipart(email, (Multipart) msg.getBody(), fileID);
208  } else {
209  handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
210  }
211 
212  return email;
213  }
214 
223  private void handleMultipart(EmailMessage email, Multipart multi, long fileID) {
224  List<Entity> entities = multi.getBodyParts();
225  for (int index = 0; index < entities.size(); index++) {
226  Entity e = entities.get(index);
227  if (e.isMultipart()) {
228  handleMultipart(email, (Multipart) e.getBody(), fileID);
229  } else if (e.getDispositionType() != null
230  && e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
231  handleAttachment(email, e, fileID, index);
232  } else if (e.getMimeType().equals(HTML_TYPE)
233  || e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
234  handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
235  } else {
236  // Ignore other types.
237  }
238  }
239  }
240 
251  private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
252  BufferedReader r;
253  try {
254  r = new BufferedReader(tb.getReader());
255  StringBuilder bodyString = new StringBuilder();
256  StringBuilder headersString = new StringBuilder();
257  String line;
258  while ((line = r.readLine()) != null) {
259  bodyString.append(line).append("\n");
260  }
261 
262  headersString.append("\n-----HEADERS-----\n");
263  for(Field field: fields) {
264  String nextLine = field.getName() + ": " + field.getBody();
265  headersString.append("\n").append(nextLine);
266  }
267  headersString.append("\n\n---END HEADERS--\n\n");
268 
269  email.setHeaders(headersString.toString());
270 
271  switch (type) {
272  case ContentTypeField.TYPE_TEXT_PLAIN:
273  email.setTextBody(bodyString.toString());
274  break;
275  case HTML_TYPE:
276  email.setHtmlBody(bodyString.toString());
277  break;
278  default:
279  // Not interested in other text types.
280  break;
281  }
282  } catch (IOException ex) {
283  logger.log(Level.WARNING, "Error getting text body of mbox message", ex); //NON-NLS
284  }
285  }
286 
294  @NbBundle.Messages ({"MboxParser.handleAttch.noOpenCase.errMsg=Exception while getting open case."})
295  private void handleAttachment(EmailMessage email, Entity e, long fileID, int index) {
296  String outputDirPath;
297  String relModuleOutputPath;
298  try {
299  outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
300  relModuleOutputPath = ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() + File.separator;
301  } catch (NoCurrentCaseException ex) {
302  addErrorMessage(Bundle.MboxParser_handleAttch_noOpenCase_errMsg());
303  logger.log(Level.SEVERE, Bundle.MboxParser_handleAttch_noOpenCase_errMsg(), ex); //NON-NLS
304  return;
305  }
306  String filename = e.getFilename();
307 
308  // sanitize name. Had an attachment with a Japanese encoded path that
309  // invalid characters and attachment could not be saved.
310  filename = filename.replaceAll("\\?", "_");
311  filename = filename.replaceAll("<", "_");
312  filename = filename.replaceAll(">", "_");
313  filename = filename.replaceAll(":", "_");
314  filename = filename.replaceAll("\"", "_");
315  filename = filename.replaceAll("/", "_");
316  filename = filename.replaceAll("\\\\", "_");
317  filename = filename.replaceAll("|", "_");
318  filename = filename.replaceAll("\\*", "_");
319 
320  // also had some crazy long names, so make random one if we get those.
321  // also from Japanese image that had encoded name
322  if (filename.length() > 64) {
323  filename = UUID.randomUUID().toString();
324  }
325 
326  String uniqueFilename = fileID + "-" + index + "-" + email.getSentDate() + "-" + filename;
327  String outPath = outputDirPath + uniqueFilename;
328  EncodedFileOutputStream fos;
329  BinaryBody bb;
330  try {
331  fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1);
332  } catch (IOException ex) {
333  addErrorMessage(
334  NbBundle.getMessage(this.getClass(),
335  "MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
336  logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
337  return;
338  }
339 
340  try {
341  Body b = e.getBody();
342  if (b instanceof BinaryBody) {
343  bb = (BinaryBody) b;
344  bb.writeTo(fos);
345  } else {
346  // This could potentially be other types. Only seen this once.
347  }
348  } catch (IOException ex) {
349  logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
350  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
351  return;
352  } finally {
353  try {
354  fos.close();
355  } catch (IOException ex) {
356  logger.log(Level.INFO, "Failed to close file output stream", ex); //NON-NLS
357  }
358  }
359 
360  EmailMessage.Attachment attach = new EmailMessage.Attachment();
361  attach.setName(filename);
362  attach.setLocalPath(relModuleOutputPath + uniqueFilename);
363  attach.setSize(new File(outPath).length());
364  attach.setEncodingType(TskData.EncodingType.XOR1);
365  email.addAttachment(attach);
366  }
367 
376  private String getAddresses(MailboxList mailboxList) {
377  if (mailboxList == null) {
378  return "";
379  }
380  StringBuilder addresses = new StringBuilder();
381  for (Mailbox m : mailboxList) {
382  addresses.append(m.toString()).append("; ");
383  }
384  return addresses.toString();
385  }
386 
395  private String getAddresses(AddressList addressList) {
396  return (addressList == null) ? "" : getAddresses(addressList.flatten());
397  }
398 
407  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
408  InputStream is;
409  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
410 
411  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
412  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
413  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
414  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
415  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
416  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
417 
418  try {
419  is = new BufferedInputStream(new FileInputStream(mboxFile));
420  } catch (FileNotFoundException ex) {
421  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
422  return possibleEncoders;
423  }
424 
425  try {
426  CharsetDetector detector = new CharsetDetector();
427  detector.setText(is);
428  CharsetMatch[] matches = detector.detectAll();
429  for (CharsetMatch match : matches) {
430  try {
431  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
432  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
433  // Don't add unsupported charsets to the list
434  }
435  }
436  return possibleEncoders;
437  } catch (IOException | IllegalArgumentException ex) {
438  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
439  return possibleEncoders;
440  } finally {
441  try {
442  is.close();
443  } catch (IOException ex) {
444  logger.log(Level.INFO, "Failed to close input stream"); //NON-NLS
445  }
446  }
447  }
448 
449  private void addErrorMessage(String msg) {
450  errors.append("<li>").append(msg).append("</li>"); //NON-NLS
451  }
452 }

Copyright © 2012-2018 Basis Technology. Generated on: Fri Jun 21 2019
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.