19 package org.sleuthkit.autopsy.thunderbirdparser;
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.Iterator;
25 import java.util.List;
38 final class EmailMessageThreader {
40 private int bogus_id_count = 0;
42 private EmailMessageThreader(){}
44 public static void threadMessages(List<EmailMessage> emailMessages, String threadIDPrefix) {
45 EmailMessageThreader instance =
new EmailMessageThreader();
47 Map<String, EmailContainer> id_table = instance.createIDTable(emailMessages);
48 Set<EmailContainer> rootSet = instance.getRootSet(id_table);
50 instance.pruneEmptyContainers(rootSet);
52 Set<EmailContainer> finalRootSet = instance.groupBySubject(rootSet);
54 instance.assignThreadIDs(finalRootSet, threadIDPrefix);
66 private Map<String, EmailContainer> createIDTable(List<EmailMessage> emailMessages) {
67 HashMap<String, EmailContainer> id_table =
new HashMap<>();
69 for (EmailMessage message : emailMessages) {
70 String messageID = message.getMessageID();
73 EmailContainer container = id_table.get(messageID);
76 if (container != null) {
80 if (container.hasMessage()) {
81 messageID = String.format(
"<Bogus-id: %d >", bogus_id_count++);
84 container.setMessage(message);
88 if (container == null) {
89 container =
new EmailContainer(message);
90 id_table.put(messageID, container);
93 processMessageReferences(message, container, id_table);
107 void processMessageReferences(EmailMessage message, EmailContainer container, Map<String, EmailContainer> id_table) {
108 List<String> referenceList = message.getReferences();
111 String inReplyToID = message.getInReplyToID();
112 if (inReplyToID != null && !inReplyToID.isEmpty()) {
113 if (referenceList == null) {
114 referenceList =
new ArrayList<>();
117 referenceList.add(inReplyToID);
121 if (referenceList == null) {
125 EmailContainer parent_ref = null;
128 for (String refID : referenceList) {
131 ref = id_table.get(refID);
134 ref =
new EmailContainer();
135 id_table.put(refID, ref);
139 if (parent_ref != null
141 && !parent_ref.equals(ref)
142 && !parent_ref.isChild(ref)) {
143 ref.setParent(parent_ref);
144 parent_ref.addChild(ref);
152 if (parent_ref != null
153 && (parent_ref.equals(container)
154 || container.isChild(parent_ref))) {
162 if (container.hasParent()) {
163 container.getParent().removeChild(container);
164 container.setParent(null);
167 if (parent_ref != null) {
168 container.setParent(container);
169 parent_ref.addChild(container);
181 Set<EmailContainer> getRootSet(Map<?, EmailContainer> id_table) {
182 HashSet<EmailContainer> rootSet =
new HashSet<>();
184 id_table.values().stream().filter((container)
185 -> (!container.hasParent())).forEachOrdered((container) -> {
186 rootSet.add(container);
198 void pruneEmptyContainers(Set<EmailContainer> containerSet) {
199 Set<EmailContainer> containersToRemove =
new HashSet<>();
200 containerSet.forEach((container) -> {
201 if (!container.hasMessage() && !container.hasChildren()) {
202 containersToRemove.add(container);
204 pruneChildren(container);
208 containerSet.removeAll(containersToRemove);
219 boolean pruneChildren(EmailContainer parent) {
220 if (parent == null) {
224 Set<EmailContainer> children = parent.getChildren();
226 if (children == null) {
230 EmailContainer grandParent = parent.getParent();
231 Set<EmailContainer>
remove =
new HashSet<>();
232 Set<EmailContainer> add =
new HashSet<>();
233 for (EmailContainer child : parent.getChildren()) {
234 if (pruneChildren(child)) {
236 add.addAll(child.getChildren());
237 child.setParent(null);
238 child.clearChildren();
243 parent.addChildren(add);
244 parent.removeChildren(
remove);
246 if (!parent.hasMessage() && grandParent != null) {
247 children.forEach((child) -> {
248 child.setParent(grandParent);
274 Set<EmailContainer> groupBySubject(Set<EmailContainer> rootSet) {
275 Map<String, EmailContainer> subject_table = createSubjectTable(rootSet);
277 Set<EmailContainer> finalSet =
new HashSet<>();
279 for (EmailContainer rootSetContainer : rootSet) {
280 String rootSubject = rootSetContainer.getSimplifiedSubject();
282 EmailContainer tableContainer = subject_table.get(rootSubject);
283 if (tableContainer == null || tableContainer.equals(rootSetContainer)) {
284 finalSet.add(rootSetContainer);
289 if (tableContainer.getMessage() == null && rootSetContainer.getMessage() == null) {
290 tableContainer.addChildren(rootSetContainer.getChildren());
291 rootSetContainer.clearChildren();
297 if ((tableContainer.getMessage() == null && rootSetContainer.getMessage() != null)
298 || (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null)) {
300 if (tableContainer.getMessage() == null) {
301 tableContainer.addChild(rootSetContainer);
304 rootSetContainer.addChild(tableContainer);
305 subject_table.remove(rootSubject, tableContainer);
306 subject_table.put(rootSubject, rootSetContainer);
308 finalSet.add(rootSetContainer);
317 if (tableContainer.getMessage() != null
318 && !tableContainer.isReplySubject()
319 && rootSetContainer.isReplySubject()) {
320 tableContainer.addChild(rootSetContainer);
327 if (tableContainer.getMessage() != null
328 && tableContainer.isReplySubject()
329 && !rootSetContainer.isReplySubject()) {
330 rootSetContainer.addChild(tableContainer);
331 subject_table.put(rootSubject, rootSetContainer);
332 finalSet.add(rootSetContainer);
339 EmailContainer newParent =
new EmailContainer();
340 newParent.addChild(tableContainer);
341 newParent.addChild(rootSetContainer);
342 subject_table.remove(rootSubject, tableContainer);
343 subject_table.put(rootSubject, newParent);
345 finalSet.add(newParent);
358 Map<String, EmailContainer> createSubjectTable(Set<EmailContainer> rootSet) {
359 HashMap<String, EmailContainer> subject_table =
new HashMap<>();
361 for (EmailContainer rootSetContainer : rootSet) {
363 boolean reSubject =
false;
365 if (rootSetContainer.hasMessage()) {
366 subject = rootSetContainer.getMessage().getSimplifiedSubject();
367 reSubject = rootSetContainer.getMessage().isReplySubject();
368 }
else if (rootSetContainer.hasChildren()) {
369 Iterator<EmailContainer> childrenIterator = rootSetContainer.getChildren().iterator();
370 while (childrenIterator.hasNext()) {
371 EmailMessage childMessage = childrenIterator.next().getMessage();
372 if (childMessage != null) {
373 subject = childMessage.getSimplifiedSubject();
374 if (!subject.isEmpty()) {
375 reSubject = childMessage.isReplySubject();
382 if (subject == null || subject.isEmpty()) {
386 EmailContainer tableContainer = subject_table.get(subject);
394 if (tableContainer == null ||
395 (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null) ||
396 (!reSubject && (tableContainer.getMessage() != null && tableContainer.getMessage().isReplySubject()))) {
397 subject_table.put(subject, rootSetContainer);
402 return subject_table;
416 private void assignThreadIDs(Set<EmailContainer> containerSet, String IDPrefix) {
417 int threadCounter = 0;
419 for(EmailContainer container: containerSet) {
421 String threadID = String.format(
"%s-%d", IDPrefix, threadCounter++);
423 addThreadID(container, threadID);
435 private void addThreadID(EmailContainer container, String threadID) {
436 if(container == null) {
440 EmailMessage message = container.getMessage();
441 if(message != null) {
442 message.setMessageThreadID(threadID);
445 if(container.hasChildren()) {
446 for(EmailContainer child: container.getChildren()) {
447 addThreadID(child, threadID);
456 final class EmailContainer {
458 private EmailMessage message;
459 private EmailContainer parent;
460 private Set<EmailContainer> children;
475 EmailContainer(EmailMessage message) {
476 this.message = message;
484 EmailMessage getMessage() {
493 void setMessage(EmailMessage message) {
494 this.message = message;
502 boolean hasMessage() {
503 return message != null;
513 String getSimplifiedSubject() {
515 if (message != null) {
516 subject = message.getSimplifiedSubject();
517 }
else if (children != null) {
518 for (EmailContainer child : children) {
519 if (child.hasMessage()) {
520 subject = child.getSimplifiedSubject();
523 if (subject != null && !subject.isEmpty()) {
538 boolean isReplySubject() {
539 if (message != null) {
540 return message.isReplySubject();
541 }
else if (children != null) {
542 for (EmailContainer child : children) {
543 if (child.hasMessage()) {
544 boolean isReply = child.isReplySubject();
561 EmailContainer getParent() {
570 void setParent(EmailContainer container) {
579 boolean hasParent() {
580 return parent != null;
590 boolean addChild(EmailContainer child) {
591 if (children == null) {
592 children =
new HashSet<>();
595 return children.add(child);
608 boolean addChildren(Set<EmailContainer> children) {
609 if (children == null || children.isEmpty()) {
613 if (this.children == null) {
614 this.children =
new HashSet<>();
617 return this.children.addAll(children);
629 boolean removeChildren(Set<EmailContainer> children) {
630 if (children != null) {
631 return this.children.removeAll(children);
641 void clearChildren() {
642 if( children != null ) {
655 boolean removeChild(EmailContainer child) {
656 if(children != null) {
657 return children.remove(child);
668 boolean hasChildren() {
669 return children != null && !children.isEmpty();
677 Set<EmailContainer> getChildren() {
690 boolean isChild(EmailContainer container) {
691 if (children == null || children.isEmpty()) {
693 }
else if (children.contains(container)) {
696 return children.stream().anyMatch((child) -> (child.isChild(container)));