Autopsy  4.11.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
EmailMessageThreader.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2019 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.Iterator;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.Set;
28 
38 final class EmailMessageThreader {
39 
40  private int bogus_id_count = 0;
41 
42  private EmailMessageThreader(){}
43 
44  public static void threadMessages(List<EmailMessage> emailMessages, String threadIDPrefix) {
45  EmailMessageThreader instance = new EmailMessageThreader();
46 
47  Map<String, EmailContainer> id_table = instance.createIDTable(emailMessages);
48  Set<EmailContainer> rootSet = instance.getRootSet(id_table);
49 
50  instance.pruneEmptyContainers(rootSet);
51 
52  Set<EmailContainer> finalRootSet = instance.groupBySubject(rootSet);
53 
54  instance.assignThreadIDs(finalRootSet, threadIDPrefix);
55  }
56 
66  private Map<String, EmailContainer> createIDTable(List<EmailMessage> emailMessages) {
67  HashMap<String, EmailContainer> id_table = new HashMap<>();
68 
69  for (EmailMessage message : emailMessages) {
70  String messageID = message.getMessageID();
71 
72  // Check the id_table for an existing Container for message-id
73  EmailContainer container = id_table.get(messageID);
74 
75  // An existing container for message-id was found
76  if (container != null) {
77  // If the existing Container has a message already assocated with it
78  // emailMessage maybe a duplicate, so we don't lose the existance of
79  // the duplicate message assign it a bogus message-id
80  if (container.hasMessage()) {
81  messageID = String.format("<Bogus-id: %d >", bogus_id_count++);
82  container = null;
83  } else {
84  container.setMessage(message);
85  }
86  }
87 
88  if (container == null) {
89  container = new EmailContainer(message);
90  id_table.put(messageID, container);
91  }
92 
93  processMessageReferences(message, container, id_table);
94  }
95 
96  return id_table;
97  }
98 
107  void processMessageReferences(EmailMessage message, EmailContainer container, Map<String, EmailContainer> id_table) {
108  List<String> referenceList = message.getReferences();
109 
110  // Make sure the inReplyToID is in the list of references
111  String inReplyToID = message.getInReplyToID();
112  if (inReplyToID != null && !inReplyToID.isEmpty()) {
113  if (referenceList == null) {
114  referenceList = new ArrayList<>();
115  }
116 
117  referenceList.add(inReplyToID);
118  }
119 
120  // No references, nothing to do
121  if (referenceList == null) {
122  return;
123  }
124 
125  EmailContainer parent_ref = null;
126  EmailContainer ref;
127 
128  for (String refID : referenceList) {
129  // Check id_table to see if there is already a container for this
130  // reference id, if not create a new Container and add to table
131  ref = id_table.get(refID);
132 
133  if (ref == null) {
134  ref = new EmailContainer();
135  id_table.put(refID, ref);
136  }
137 
138  // Set the parent\child relationship between parent_ref and ref
139  if (parent_ref != null
140  && !ref.hasParent()
141  && !parent_ref.equals(ref)
142  && !parent_ref.isChild(ref)) {
143  ref.setParent(parent_ref);
144  parent_ref.addChild(ref);
145  }
146 
147  parent_ref = ref;
148  }
149 
150  // If the parent_ref and container are already linked, don't change
151  // anything
152  if (parent_ref != null
153  && (parent_ref.equals(container)
154  || container.isChild(parent_ref))) {
155  parent_ref = null;
156  }
157 
158  // If container already has a parent, the parent was assumed based on
159  // the list of references from another message. parent_ref will be
160  // the real parent of container so throw away the old parent and set a
161  // new one.
162  if (container.hasParent()) {
163  container.getParent().removeChild(container);
164  container.setParent(null);
165  }
166 
167  if (parent_ref != null) {
168  container.setParent(container);
169  parent_ref.addChild(container);
170  }
171  }
172 
181  Set<EmailContainer> getRootSet(Map<?, EmailContainer> id_table) {
182  HashSet<EmailContainer> rootSet = new HashSet<>();
183 
184  id_table.values().stream().filter((container)
185  -> (!container.hasParent())).forEachOrdered((container) -> {
186  rootSet.add(container);
187  });
188 
189  return rootSet;
190  }
191 
198  void pruneEmptyContainers(Set<EmailContainer> containerSet) {
199  Set<EmailContainer> containersToRemove = new HashSet<>();
200  containerSet.forEach((container) -> {
201  if (!container.hasMessage() && !container.hasChildren()) {
202  containersToRemove.add(container);
203  } else {
204  pruneChildren(container);
205  }
206  });
207 
208  containerSet.removeAll(containersToRemove);
209  }
210 
219  boolean pruneChildren(EmailContainer parent) {
220  if (parent == null) {
221  return false;
222  }
223 
224  Set<EmailContainer> children = parent.getChildren();
225 
226  if (children == null) {
227  return false;
228  }
229 
230  EmailContainer grandParent = parent.getParent();
231  Set<EmailContainer> remove = new HashSet<>();
232  Set<EmailContainer> add = new HashSet<>();
233  for (EmailContainer child : parent.getChildren()) {
234  if (pruneChildren(child)) {
235  remove.add(child);
236  add.addAll(child.getChildren());
237  child.setParent(null);
238  child.clearChildren();
239 
240  }
241  }
242 
243  parent.addChildren(add);
244  parent.removeChildren(remove);
245 
246  if (!parent.hasMessage() && grandParent != null) {
247  children.forEach((child) -> {
248  child.setParent(grandParent);
249  });
250  return true;
251  }
252 
253  return false;
254  }
255 
274  Set<EmailContainer> groupBySubject(Set<EmailContainer> rootSet) {
275  Map<String, EmailContainer> subject_table = createSubjectTable(rootSet);
276 
277  Set<EmailContainer> finalSet = new HashSet<>();
278 
279  for (EmailContainer rootSetContainer : rootSet) {
280  String rootSubject = rootSetContainer.getSimplifiedSubject();
281 
282  EmailContainer tableContainer = subject_table.get(rootSubject);
283  if (tableContainer == null || tableContainer.equals(rootSetContainer)) {
284  finalSet.add(rootSetContainer);
285  continue;
286  }
287 
288  // If both containers are dummy/empty append the children of one to the other
289  if (tableContainer.getMessage() == null && rootSetContainer.getMessage() == null) {
290  tableContainer.addChildren(rootSetContainer.getChildren());
291  rootSetContainer.clearChildren();
292  continue;
293  }
294 
295  // one container is empty, but the other is not, make the non-empty one be a
296  // child of the empty
297  if ((tableContainer.getMessage() == null && rootSetContainer.getMessage() != null)
298  || (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null)) {
299 
300  if (tableContainer.getMessage() == null) {
301  tableContainer.addChild(rootSetContainer);
302 
303  } else {
304  rootSetContainer.addChild(tableContainer);
305  subject_table.remove(rootSubject, tableContainer);
306  subject_table.put(rootSubject, rootSetContainer);
307 
308  finalSet.add(rootSetContainer);
309  }
310 
311  continue;
312  }
313 
314  // tableContainer is non-empty and it's message's subject does not begin
315  // with 'RE:' but rootSetContainer's message does begin with 'RE:', then
316  // make rootSetContainer a child of tableContainer
317  if (tableContainer.getMessage() != null
318  && !tableContainer.isReplySubject()
319  && rootSetContainer.isReplySubject()) {
320  tableContainer.addChild(rootSetContainer);
321  continue;
322  }
323 
324  // If table container is non-empy, and table container's subject does
325  // begin with 'RE:', but rootSetContainer does not start with 'RE:'
326  // make tableContainer a child of rootSetContainer
327  if (tableContainer.getMessage() != null
328  && tableContainer.isReplySubject()
329  && !rootSetContainer.isReplySubject()) {
330  rootSetContainer.addChild(tableContainer);
331  subject_table.put(rootSubject, rootSetContainer);
332  finalSet.add(rootSetContainer);
333  continue;
334  }
335 
336  // rootSetContainer and tableContainer either both have 'RE' or
337  // don't. Create a new dummy container with both containers as
338  // children.
339  EmailContainer newParent = new EmailContainer();
340  newParent.addChild(tableContainer);
341  newParent.addChild(rootSetContainer);
342  subject_table.remove(rootSubject, tableContainer);
343  subject_table.put(rootSubject, newParent);
344 
345  finalSet.add(newParent);
346  }
347  return finalSet;
348  }
349 
358  Map<String, EmailContainer> createSubjectTable(Set<EmailContainer> rootSet) {
359  HashMap<String, EmailContainer> subject_table = new HashMap<>();
360 
361  for (EmailContainer rootSetContainer : rootSet) {
362  String subject = "";
363  boolean reSubject = false;
364 
365  if (rootSetContainer.hasMessage()) {
366  subject = rootSetContainer.getMessage().getSimplifiedSubject();
367  reSubject = rootSetContainer.getMessage().isReplySubject();
368  } else if (rootSetContainer.hasChildren()) {
369  Iterator<EmailContainer> childrenIterator = rootSetContainer.getChildren().iterator();
370  while (childrenIterator.hasNext()) {
371  EmailMessage childMessage = childrenIterator.next().getMessage();
372  if (childMessage != null) {
373  subject = childMessage.getSimplifiedSubject();
374  if (!subject.isEmpty()) {
375  reSubject = childMessage.isReplySubject();
376  break;
377  }
378  }
379  }
380  }
381 
382  if (subject == null || subject.isEmpty()) {
383  continue; // Give up on this container
384  }
385 
386  EmailContainer tableContainer = subject_table.get(subject);
387 
388 // A container will be added to the table, if a container for its "simplified" subject
389 // does not currently exist in the table. Or if there is more than one container with the same
390 // subject, but one is an "empty container" the empty one will be added
391 // the table or in the one in the table has "RE" in the subject it will be replaced
392 // by the one that does not have "RE" in the subject (if it exists)
393 //
394  if (tableContainer == null ||
395  (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null) ||
396  (!reSubject && (tableContainer.getMessage() != null && tableContainer.getMessage().isReplySubject()))) {
397  subject_table.put(subject, rootSetContainer);
398  }
399 
400  }
401 
402  return subject_table;
403  }
404 
416  private void assignThreadIDs(Set<EmailContainer> containerSet, String IDPrefix) {
417  int threadCounter = 0;
418 
419  for(EmailContainer container: containerSet) {
420  // Generate a threadID
421  String threadID = String.format("%s-%d", IDPrefix, threadCounter++);
422  // Add the IDs to this thread
423  addThreadID(container, threadID);
424  }
425  }
426 
435  private void addThreadID(EmailContainer container, String threadID) {
436  if(container == null) {
437  return;
438  }
439 
440  EmailMessage message = container.getMessage();
441  if(message != null) {
442  message.setMessageThreadID(threadID);
443  }
444 
445  if(container.hasChildren()) {
446  for(EmailContainer child: container.getChildren()) {
447  addThreadID(child, threadID);
448  }
449  }
450  }
451 
456  final class EmailContainer {
457 
458  private EmailMessage message;
459  private EmailContainer parent;
460  private Set<EmailContainer> children;
461 
465  EmailContainer() {
466  // This constructor is intentially empty to allow for the creation of
467  // an EmailContainer without a message
468  }
469 
475  EmailContainer(EmailMessage message) {
476  this.message = message;
477  }
478 
484  EmailMessage getMessage() {
485  return message;
486  }
487 
493  void setMessage(EmailMessage message) {
494  this.message = message;
495  }
496 
502  boolean hasMessage() {
503  return message != null;
504  }
505 
513  String getSimplifiedSubject() {
514  String subject = "";
515  if (message != null) {
516  subject = message.getSimplifiedSubject();
517  } else if (children != null) {
518  for (EmailContainer child : children) {
519  if (child.hasMessage()) {
520  subject = child.getSimplifiedSubject();
521  }
522 
523  if (subject != null && !subject.isEmpty()) {
524  break;
525  }
526  }
527  }
528  return subject;
529  }
530 
538  boolean isReplySubject() {
539  if (message != null) {
540  return message.isReplySubject();
541  } else if (children != null) {
542  for (EmailContainer child : children) {
543  if (child.hasMessage()) {
544  boolean isReply = child.isReplySubject();
545 
546  if (isReply) {
547  return isReply;
548  }
549  }
550  }
551  }
552 
553  return false;
554  }
555 
561  EmailContainer getParent() {
562  return parent;
563  }
564 
570  void setParent(EmailContainer container) {
571  parent = container;
572  }
573 
579  boolean hasParent() {
580  return parent != null;
581  }
582 
590  boolean addChild(EmailContainer child) {
591  if (children == null) {
592  children = new HashSet<>();
593  }
594 
595  return children.add(child);
596  }
597 
608  boolean addChildren(Set<EmailContainer> children) {
609  if (children == null || children.isEmpty()) {
610  return false;
611  }
612 
613  if (this.children == null) {
614  this.children = new HashSet<>();
615  }
616 
617  return this.children.addAll(children);
618  }
619 
629  boolean removeChildren(Set<EmailContainer> children) {
630  if (children != null) {
631  return this.children.removeAll(children);
632  }
633 
634  return false;
635  }
636 
641  void clearChildren() {
642  if( children != null ) {
643  children.clear();
644  }
645  }
646 
655  boolean removeChild(EmailContainer child) {
656  if(children != null) {
657  return children.remove(child);
658  } else {
659  return false;
660  }
661  }
662 
668  boolean hasChildren() {
669  return children != null && !children.isEmpty();
670  }
671 
677  Set<EmailContainer> getChildren() {
678  return children;
679  }
680 
690  boolean isChild(EmailContainer container) {
691  if (children == null || children.isEmpty()) {
692  return false;
693  } else if (children.contains(container)) {
694  return true;
695  } else {
696  return children.stream().anyMatch((child) -> (child.isChild(container)));
697  }
698  }
699  }
700 }

Copyright © 2012-2018 Basis Technology. Generated on: Fri Jun 21 2019
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.