Autopsy  4.19.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
EmailMessageThreader.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2019 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.thunderbirdparser;
20 
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.Iterator;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.Set;
28 import java.util.UUID;
29 
39 final class EmailMessageThreader {
40 
41  private int bogus_id_count = 0;
42 
43  private EmailMessageThreader(){}
44 
45  public static void threadMessages(List<EmailMessage> emailMessages) {
46  EmailMessageThreader instance = new EmailMessageThreader();
47 
48  Map<String, EmailContainer> id_table = instance.createIDTable(emailMessages);
49  Set<EmailContainer> rootSet = instance.getRootSet(id_table);
50 
51  instance.pruneEmptyContainers(rootSet);
52 
53  Set<EmailContainer> finalRootSet = instance.groupBySubject(rootSet);
54 
55  instance.assignThreadIDs(finalRootSet);
56  }
57 
67  private Map<String, EmailContainer> createIDTable(List<EmailMessage> emailMessages) {
68  HashMap<String, EmailContainer> id_table = new HashMap<>();
69 
70  for (EmailMessage message : emailMessages) {
71  String messageID = message.getMessageID();
72 
73  // Check the id_table for an existing Container for message-id
74  EmailContainer container = id_table.get(messageID);
75 
76  // An existing container for message-id was found
77  if (container != null) {
78  // If the existing Container has a message already assocated with it
79  // emailMessage maybe a duplicate, so we don't lose the existance of
80  // the duplicate message assign it a bogus message-id
81  if (container.hasMessage()) {
82  messageID = String.format("<Bogus-id: %d >", bogus_id_count++);
83  container = null;
84  } else {
85  container.setMessage(message);
86  }
87  }
88 
89  if (container == null) {
90  container = new EmailContainer(message);
91  id_table.put(messageID, container);
92  }
93 
94  processMessageReferences(message, container, id_table);
95  }
96 
97  return id_table;
98  }
99 
108  void processMessageReferences(EmailMessage message, EmailContainer container, Map<String, EmailContainer> id_table) {
109  List<String> referenceList = message.getReferences();
110 
111  // Make sure the inReplyToID is in the list of references
112  String inReplyToID = message.getInReplyToID();
113  if (inReplyToID != null && !inReplyToID.isEmpty()) {
114  if (referenceList == null) {
115  referenceList = new ArrayList<>();
116  }
117 
118  referenceList.add(inReplyToID);
119  }
120 
121  // No references, nothing to do
122  if (referenceList == null) {
123  return;
124  }
125 
126  EmailContainer parent_ref = null;
127  EmailContainer ref;
128 
129  for (String refID : referenceList) {
130  // Check id_table to see if there is already a container for this
131  // reference id, if not create a new Container and add to table
132  ref = id_table.get(refID);
133 
134  if (ref == null) {
135  ref = new EmailContainer();
136  id_table.put(refID, ref);
137  }
138 
139  // Set the parent\child relationship between parent_ref and ref
140  if (parent_ref != null
141  && !ref.hasParent()
142  && !parent_ref.equals(ref)
143  && !parent_ref.isChild(ref)) {
144  ref.setParent(parent_ref);
145  parent_ref.addChild(ref);
146  }
147 
148  parent_ref = ref;
149  }
150 
151  // If the parent_ref and container are already linked, don't change
152  // anything
153  if (parent_ref != null
154  && (parent_ref.equals(container)
155  || container.isChild(parent_ref))) {
156  parent_ref = null;
157  }
158 
159  // If container already has a parent, the parent was assumed based on
160  // the list of references from another message. parent_ref will be
161  // the real parent of container so throw away the old parent and set a
162  // new one.
163  if (container.hasParent()) {
164  container.getParent().removeChild(container);
165  container.setParent(null);
166  }
167 
168  if (parent_ref != null) {
169  container.setParent(container);
170  parent_ref.addChild(container);
171  }
172  }
173 
182  Set<EmailContainer> getRootSet(Map<?, EmailContainer> id_table) {
183  HashSet<EmailContainer> rootSet = new HashSet<>();
184 
185  id_table.values().stream().filter((container)
186  -> (!container.hasParent())).forEachOrdered((container) -> {
187  rootSet.add(container);
188  });
189 
190  return rootSet;
191  }
192 
199  void pruneEmptyContainers(Set<EmailContainer> containerSet) {
200  Set<EmailContainer> containersToRemove = new HashSet<>();
201  containerSet.forEach((container) -> {
202  if (!container.hasMessage() && !container.hasChildren()) {
203  containersToRemove.add(container);
204  } else {
205  pruneChildren(container);
206  }
207  });
208 
209  containerSet.removeAll(containersToRemove);
210  }
211 
220  boolean pruneChildren(EmailContainer parent) {
221  if (parent == null) {
222  return false;
223  }
224 
225  Set<EmailContainer> children = parent.getChildren();
226 
227  if (children == null) {
228  return false;
229  }
230 
231  EmailContainer grandParent = parent.getParent();
232  Set<EmailContainer> remove = new HashSet<>();
233  Set<EmailContainer> add = new HashSet<>();
234  for (EmailContainer child : parent.getChildren()) {
235  if (pruneChildren(child)) {
236  remove.add(child);
237  add.addAll(child.getChildren());
238  child.setParent(null);
239  child.clearChildren();
240 
241  }
242  }
243 
244  parent.addChildren(add);
245  parent.removeChildren(remove);
246 
247  if (!parent.hasMessage() && grandParent != null) {
248  children.forEach((child) -> {
249  child.setParent(grandParent);
250  });
251  return true;
252  }
253 
254  return false;
255  }
256 
275  Set<EmailContainer> groupBySubject(Set<EmailContainer> rootSet) {
276  Map<String, EmailContainer> subject_table = createSubjectTable(rootSet);
277 
278  Set<EmailContainer> finalSet = new HashSet<>();
279 
280  for (EmailContainer rootSetContainer : rootSet) {
281  String rootSubject = rootSetContainer.getSimplifiedSubject();
282 
283  EmailContainer tableContainer = subject_table.get(rootSubject);
284  if (tableContainer == null || tableContainer.equals(rootSetContainer)) {
285  finalSet.add(rootSetContainer);
286  continue;
287  }
288 
289  // If both containers are dummy/empty append the children of one to the other
290  if (tableContainer.getMessage() == null && rootSetContainer.getMessage() == null) {
291  tableContainer.addChildren(rootSetContainer.getChildren());
292  rootSetContainer.clearChildren();
293  continue;
294  }
295 
296  // one container is empty, but the other is not, make the non-empty one be a
297  // child of the empty
298  if ((tableContainer.getMessage() == null && rootSetContainer.getMessage() != null)
299  || (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null)) {
300 
301  if (tableContainer.getMessage() == null) {
302  tableContainer.addChild(rootSetContainer);
303 
304  } else {
305  rootSetContainer.addChild(tableContainer);
306  subject_table.remove(rootSubject, tableContainer);
307  subject_table.put(rootSubject, rootSetContainer);
308 
309  finalSet.add(rootSetContainer);
310  }
311 
312  continue;
313  }
314 
315  // tableContainer is non-empty and it's message's subject does not begin
316  // with 'RE:' but rootSetContainer's message does begin with 'RE:', then
317  // make rootSetContainer a child of tableContainer
318  if (tableContainer.getMessage() != null
319  && !tableContainer.isReplySubject()
320  && rootSetContainer.isReplySubject()) {
321  tableContainer.addChild(rootSetContainer);
322  continue;
323  }
324 
325  // If table container is non-empy, and table container's subject does
326  // begin with 'RE:', but rootSetContainer does not start with 'RE:'
327  // make tableContainer a child of rootSetContainer
328  if (tableContainer.getMessage() != null
329  && tableContainer.isReplySubject()
330  && !rootSetContainer.isReplySubject()) {
331  rootSetContainer.addChild(tableContainer);
332  subject_table.put(rootSubject, rootSetContainer);
333  finalSet.add(rootSetContainer);
334  continue;
335  }
336 
337  // rootSetContainer and tableContainer either both have 'RE' or
338  // don't. Create a new dummy container with both containers as
339  // children.
340  EmailContainer newParent = new EmailContainer();
341  newParent.addChild(tableContainer);
342  newParent.addChild(rootSetContainer);
343  subject_table.remove(rootSubject, tableContainer);
344  subject_table.put(rootSubject, newParent);
345 
346  finalSet.add(newParent);
347  }
348  return finalSet;
349  }
350 
359  Map<String, EmailContainer> createSubjectTable(Set<EmailContainer> rootSet) {
360  HashMap<String, EmailContainer> subject_table = new HashMap<>();
361 
362  for (EmailContainer rootSetContainer : rootSet) {
363  String subject = "";
364  boolean reSubject = false;
365 
366  if (rootSetContainer.hasMessage()) {
367  subject = rootSetContainer.getMessage().getSimplifiedSubject();
368  reSubject = rootSetContainer.getMessage().isReplySubject();
369  } else if (rootSetContainer.hasChildren()) {
370  Iterator<EmailContainer> childrenIterator = rootSetContainer.getChildren().iterator();
371  while (childrenIterator.hasNext()) {
372  EmailMessage childMessage = childrenIterator.next().getMessage();
373  if (childMessage != null) {
374  subject = childMessage.getSimplifiedSubject();
375  if (!subject.isEmpty()) {
376  reSubject = childMessage.isReplySubject();
377  break;
378  }
379  }
380  }
381  }
382 
383  if (subject == null || subject.isEmpty()) {
384  continue; // Give up on this container
385  }
386 
387  EmailContainer tableContainer = subject_table.get(subject);
388 
389 // A container will be added to the table, if a container for its "simplified" subject
390 // does not currently exist in the table. Or if there is more than one container with the same
391 // subject, but one is an "empty container" the empty one will be added
392 // the table or in the one in the table has "RE" in the subject it will be replaced
393 // by the one that does not have "RE" in the subject (if it exists)
394 //
395  if (tableContainer == null ||
396  (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null) ||
397  (!reSubject && (tableContainer.getMessage() != null && tableContainer.getMessage().isReplySubject()))) {
398  subject_table.put(subject, rootSetContainer);
399  }
400 
401  }
402 
403  return subject_table;
404  }
405 
417  private void assignThreadIDs(Set<EmailContainer> containerSet) {
418  for(EmailContainer container: containerSet) {
419  // Generate a threadID
420  String threadID = UUID.randomUUID().toString();
421  // Add the IDs to this thread
422  addThreadID(container, threadID);
423  }
424  }
425 
434  private void addThreadID(EmailContainer container, String threadID) {
435  if(container == null) {
436  return;
437  }
438 
439  EmailMessage message = container.getMessage();
440  if(message != null) {
441  message.setMessageThreadID(threadID);
442  }
443 
444  if(container.hasChildren()) {
445  for(EmailContainer child: container.getChildren()) {
446  addThreadID(child, threadID);
447  }
448  }
449  }
450 
455  final class EmailContainer {
456 
457  private EmailMessage message;
458  private EmailContainer parent;
459  private Set<EmailContainer> children;
460 
464  EmailContainer() {
465  // This constructor is intentially empty to allow for the creation of
466  // an EmailContainer without a message
467  }
468 
474  EmailContainer(EmailMessage message) {
475  this.message = message;
476  }
477 
483  EmailMessage getMessage() {
484  return message;
485  }
486 
492  void setMessage(EmailMessage message) {
493  this.message = message;
494  }
495 
501  boolean hasMessage() {
502  return message != null;
503  }
504 
512  String getSimplifiedSubject() {
513  String subject = "";
514  if (message != null) {
515  subject = message.getSimplifiedSubject();
516  } else if (children != null) {
517  for (EmailContainer child : children) {
518  if (child.hasMessage()) {
519  subject = child.getSimplifiedSubject();
520  }
521 
522  if (subject != null && !subject.isEmpty()) {
523  break;
524  }
525  }
526  }
527  return subject;
528  }
529 
537  boolean isReplySubject() {
538  if (message != null) {
539  return message.isReplySubject();
540  } else if (children != null) {
541  for (EmailContainer child : children) {
542  if (child.hasMessage()) {
543  boolean isReply = child.isReplySubject();
544 
545  if (isReply) {
546  return isReply;
547  }
548  }
549  }
550  }
551 
552  return false;
553  }
554 
560  EmailContainer getParent() {
561  return parent;
562  }
563 
569  void setParent(EmailContainer container) {
570  parent = container;
571  }
572 
578  boolean hasParent() {
579  return parent != null;
580  }
581 
589  boolean addChild(EmailContainer child) {
590  if (children == null) {
591  children = new HashSet<>();
592  }
593 
594  return children.add(child);
595  }
596 
607  boolean addChildren(Set<EmailContainer> children) {
608  if (children == null || children.isEmpty()) {
609  return false;
610  }
611 
612  if (this.children == null) {
613  this.children = new HashSet<>();
614  }
615 
616  return this.children.addAll(children);
617  }
618 
628  boolean removeChildren(Set<EmailContainer> children) {
629  if (children != null) {
630  return this.children.removeAll(children);
631  }
632 
633  return false;
634  }
635 
640  void clearChildren() {
641  if( children != null ) {
642  children.clear();
643  }
644  }
645 
654  boolean removeChild(EmailContainer child) {
655  if(children != null) {
656  return children.remove(child);
657  } else {
658  return false;
659  }
660  }
661 
667  boolean hasChildren() {
668  return children != null && !children.isEmpty();
669  }
670 
676  Set<EmailContainer> getChildren() {
677  return children;
678  }
679 
689  boolean isChild(EmailContainer container) {
690  return isChild(container, new HashSet<>());
691  }
692 
703  private boolean isChild(EmailContainer container, Set<EmailContainer> processedContainers) {
704  processedContainers.add(this);
705  if (children == null || children.isEmpty()) {
706  return false;
707  } else if (children.contains(container)) {
708  return true;
709  } else {
710  for (EmailContainer child : children) {
711  // Prevent an infinite recursion by making sure we haven't already
712  // run isChild() on this child
713  if ((!processedContainers.contains(child)) && child.isChild(container, processedContainers)) {
714  return true;
715  }
716  }
717  return false;
718  }
719  }
720  }
721 }

Copyright © 2012-2021 Basis Technology. Generated on: Fri Aug 6 2021
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.