Autopsy  4.15.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2019 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.io.BufferedInputStream;
22 import java.io.CharConversionException;
23 import java.io.File;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.charset.Charset;
29 import java.nio.charset.CharsetEncoder;
30 import java.nio.charset.IllegalCharsetNameException;
31 import java.nio.charset.StandardCharsets;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.ArrayList;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.logging.Level;
38 import org.apache.james.mime4j.dom.Message;
39 import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
40 import org.apache.james.mime4j.mboxiterator.MboxIterator;
41 import org.apache.tika.parser.txt.CharsetDetector;
42 import org.apache.tika.parser.txt.CharsetMatch;
43 import org.openide.util.NbBundle;
44 
48 class MboxParser extends MimeJ4MessageParser implements Iterator<EmailMessage> {
49 
50  private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
51 
52  private Iterator<EmailMessage> emailIterator = null;
53 
54  private MboxParser(String localPath) {
55  setLocalPath(localPath);
56  }
57 
58  static boolean isValidMimeTypeMbox(byte[] buffer) {
59  return (new String(buffer)).startsWith("From "); //NON-NLS
60  }
61 
72  static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
73  MboxParser parser = new MboxParser(localPath);
74  parser.createIterator(mboxFile, 0, false);
75  return parser;
76  }
77 
88  static MboxParser getEmailIterator(String localPath, File mboxFile, long fileID) {
89  MboxParser parser = new MboxParser(localPath);
90  parser.createIterator(mboxFile, fileID, true);
91 
92  return parser;
93  }
94 
103  private void createIterator(File mboxFile, long fileID, boolean wholeMsg) {
104  // Detect possible charsets
105  List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
106 
107  // Loop through the possible encoders and find the first one that works.
108  // That will usually be one of the first ones.
109  for (CharsetEncoder encoder : encoders) {
110  try {
111  Iterable<CharBufferWrapper> mboxIterable = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
112  if (mboxIterable != null) {
113  emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
114  }
115  break;
116  } catch (CharConversionException | UnsupportedCharsetException ex) {
117  // Not the right encoder
118  } catch (IllegalArgumentException ex) {
119  // Not the right encoder
120  } catch (IOException ex) {
121  logger.log(Level.WARNING, String.format("Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex); //NON-NLS
122  addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
123  }
124  }
125  }
126 
127  @Override
128  public boolean hasNext() {
129  return emailIterator != null && emailIterator.hasNext();
130  }
131 
132  @Override
133  public EmailMessage next() {
134  return emailIterator != null ? emailIterator.next() : null;
135  }
136 
145  private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
146  InputStream is;
147  List<CharsetEncoder> possibleEncoders = new ArrayList<>();
148 
149  possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
150  possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
151  possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
152  possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
153  possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
154  possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
155 
156  try {
157  is = new BufferedInputStream(new FileInputStream(mboxFile));
158  } catch (FileNotFoundException ex) {
159  logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
160  return possibleEncoders;
161  }
162 
163  try {
164  CharsetDetector detector = new CharsetDetector();
165  detector.setText(is);
166  CharsetMatch[] matches = detector.detectAll();
167  for (CharsetMatch match : matches) {
168  try {
169  possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
170  } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
171  // Don't add unsupported charsets to the list
172  }
173  }
174  return possibleEncoders;
175  } catch (IOException | IllegalArgumentException ex) {
176  logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
177  return possibleEncoders;
178  } finally {
179  try {
180  is.close();
181  } catch (IOException ex) {
182  logger.log(Level.WARNING, "Failed to close input stream"); //NON-NLS
183  }
184  }
185  }
186 
190  final class MBoxEmailIterator implements Iterator<EmailMessage> {
191 
192  private final Iterator<CharBufferWrapper> mboxIterator;
193  private final CharsetEncoder encoder;
194  private final long fileID;
195  private final boolean wholeMsg;
196 
197  MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder, long fileID, boolean wholeMsg) {
198  mboxIterator = mboxIter;
199  this.encoder = encoder;
200  this.fileID = fileID;
201  this.wholeMsg = wholeMsg;
202  }
203 
204  @Override
205  public boolean hasNext() {
206  return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
207  }
208 
209  @Override
210  public EmailMessage next() {
211  CharBufferWrapper messageBuffer = mboxIterator.next();
212 
213  try {
214  Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
215  if (wholeMsg) {
216  return extractEmail(msg, getLocalPath(), fileID);
217  } else {
218  return extractPartialEmail(msg);
219  }
220  } catch (RuntimeException | IOException ex) {
221  logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
222  }
223  return null;
224  }
225 
226  }
227 }

Copyright © 2012-2020 Basis Technology. Generated on: Mon Jul 6 2020
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.