Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
MboxParser.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2019 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.thunderbirdparser;
20
21import java.io.BufferedInputStream;
22import java.io.CharConversionException;
23import java.io.File;
24import java.io.FileInputStream;
25import java.io.FileNotFoundException;
26import java.io.IOException;
27import java.io.InputStream;
28import java.nio.charset.Charset;
29import java.nio.charset.CharsetEncoder;
30import java.nio.charset.IllegalCharsetNameException;
31import java.nio.charset.StandardCharsets;
32import java.nio.charset.UnsupportedCharsetException;
33import java.util.ArrayList;
34import java.util.Iterator;
35import java.util.List;
36import java.util.logging.Level;
37import org.sleuthkit.autopsy.coreutils.Logger;
38import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
39import org.apache.james.mime4j.dom.Message;
40import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
41import org.apache.james.mime4j.mboxiterator.MboxIterator;
42import org.apache.tika.parser.txt.CharsetDetector;
43import org.apache.tika.parser.txt.CharsetMatch;
44import org.apache.commons.validator.routines.EmailValidator;
45import org.apache.james.mime4j.mboxiterator.MboxIterator.Builder;
46import org.openide.util.NbBundle;
47import org.sleuthkit.datamodel.AbstractFile;
48
52class MboxParser extends MimeJ4MessageParser implements Iterator<EmailMessage> {
53
54 private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
55
56 private Iterator<EmailMessage> emailIterator = null;
57
58 private MboxIterator mboxIterable;
59
60 private MboxParser(String localPath) {
61 setLocalPath(localPath);
62 }
63
64 static boolean isValidMimeTypeMbox(byte[] buffer, AbstractFile abstractFile) {
65 String mboxHeaderLine = new String(buffer);
66 if (mboxHeaderLine.startsWith("From ")) {
67 String mimeType = abstractFile.getMIMEType();
68
69 // if it is not present, attempt to use the FileTypeDetector to determine
70 if (mimeType == null || mimeType.isEmpty()) {
71 FileTypeDetector fileTypeDetector = null;
72 try {
73 fileTypeDetector = new FileTypeDetector();
74 } catch (FileTypeDetector.FileTypeDetectorInitException ex) {
75 logger.log(Level.WARNING, String.format("Unable to create file type detector for determining MIME type for file %s with id of %d", abstractFile.getName(), abstractFile.getId()));
76 return false;
77 }
78 mimeType = fileTypeDetector.getMIMEType(abstractFile);
79 }
80 if (mimeType.equalsIgnoreCase("application/mbox")) {
81 return true;
82 }
83 }
84 return false; //NON-NLS
85 }
86
97 static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
98 MboxParser parser = new MboxParser(localPath);
99 parser.createIterator(mboxFile, 0, false);
100 return parser;
101 }
102
113 static MboxParser getEmailIterator(String localPath, File mboxFile, long fileID) {
114 MboxParser parser = new MboxParser(localPath);
115 parser.createIterator(mboxFile, fileID, true);
116
117 return parser;
118 }
119
128 private void createIterator(File mboxFile, long fileID, boolean wholeMsg) {
129 // Detect possible charsets
130 List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
131
132 // Loop through the possible encoders and find the first one that works.
133 // That will usually be one of the first ones.
134 for (CharsetEncoder encoder : encoders) {
135 try {
136 mboxIterable = MboxIterator
137 .fromFile(mboxFile)
138 // use more permissive from line from mbox iterator 0.8.0, but handling CRLF/LF
139 .fromLine("^From .*\r?\n")
140 .charset(encoder.charset())
141 .build();
142 if (mboxIterable != null) {
143 emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
144 }
145 break;
146 } catch (CharConversionException | UnsupportedCharsetException ex) {
147 // Not the right encoder
148 } catch (IllegalArgumentException ex) {
149 // Not the right encoder
150 } catch (IOException ex) {
151 logger.log(Level.WARNING, String.format("Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex); //NON-NLS
152 addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
153 }
154 }
155 }
156
157 @Override
158 public boolean hasNext() {
159 return emailIterator != null && emailIterator.hasNext();
160 }
161
162 @Override
163 public EmailMessage next() {
164 return emailIterator != null ? emailIterator.next() : null;
165 }
166
167 @Override
168 public void close() throws IOException{
169 if(mboxIterable != null) {
170 mboxIterable.close();
171 }
172 }
173
182 private List<CharsetEncoder> getPossibleEncoders(File mboxFile) {
183 InputStream is;
184 List<CharsetEncoder> possibleEncoders = new ArrayList<>();
185
186 possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
187 possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder());
188 possibleEncoders.add(StandardCharsets.UTF_16.newEncoder());
189 possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder());
190 possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder());
191 possibleEncoders.add(StandardCharsets.UTF_8.newEncoder());
192
193 try {
194 is = new BufferedInputStream(new FileInputStream(mboxFile));
195 } catch (FileNotFoundException ex) {
196 logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); //NON-NLS
197 return possibleEncoders;
198 }
199
200 try {
201 CharsetDetector detector = new CharsetDetector();
202 detector.setText(is);
203 CharsetMatch[] matches = detector.detectAll();
204 for (CharsetMatch match : matches) {
205 try {
206 possibleEncoders.add(Charset.forName(match.getName()).newEncoder());
207 } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) {
208 // Don't add unsupported charsets to the list
209 }
210 }
211 return possibleEncoders;
212 } catch (IOException | IllegalArgumentException ex) {
213 logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); //NON-NLS
214 return possibleEncoders;
215 } finally {
216 try {
217 is.close();
218 } catch (IOException ex) {
219 logger.log(Level.WARNING, "Failed to close input stream"); //NON-NLS
220 }
221 }
222 }
223
227 final class MBoxEmailIterator implements Iterator<EmailMessage> {
228
229 private final Iterator<CharBufferWrapper> mboxIterator;
230 private final CharsetEncoder encoder;
231 private final long fileID;
232 private final boolean wholeMsg;
233
234 MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder, long fileID, boolean wholeMsg) {
235 mboxIterator = mboxIter;
236 this.encoder = encoder;
237 this.fileID = fileID;
238 this.wholeMsg = wholeMsg;
239 }
240
241 @Override
242 public boolean hasNext() {
243 return (mboxIterator != null && encoder != null) && mboxIterator.hasNext();
244 }
245
246 @Override
247 public EmailMessage next() {
248 CharBufferWrapper messageBuffer = mboxIterator.next();
249
250 try {
251 Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
252 if (wholeMsg) {
253 return extractEmail(msg, getLocalPath(), fileID);
254 } else {
255 return extractPartialEmail(msg);
256 }
257 } catch (RuntimeException | IOException ex) {
258 logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
259 }
260 return null;
261 }
262
263 }
264}

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.