Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
EmailMessageThreader.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2019 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.thunderbirdparser;
20
21import java.util.ArrayList;
22import java.util.HashMap;
23import java.util.HashSet;
24import java.util.Iterator;
25import java.util.List;
26import java.util.Map;
27import java.util.Set;
28import java.util.UUID;
29
39final class EmailMessageThreader {
40
41 private int bogus_id_count = 0;
42
43 private EmailMessageThreader(){}
44
45 public static void threadMessages(List<EmailMessage> emailMessages) {
46 EmailMessageThreader instance = new EmailMessageThreader();
47
48 Map<String, EmailContainer> id_table = instance.createIDTable(emailMessages);
49 Set<EmailContainer> rootSet = instance.getRootSet(id_table);
50
51 instance.pruneEmptyContainers(rootSet);
52
53 Set<EmailContainer> finalRootSet = instance.groupBySubject(rootSet);
54
55 instance.assignThreadIDs(finalRootSet);
56 }
57
67 private Map<String, EmailContainer> createIDTable(List<EmailMessage> emailMessages) {
68 HashMap<String, EmailContainer> id_table = new HashMap<>();
69
70 for (EmailMessage message : emailMessages) {
71 String messageID = message.getMessageID();
72
73 // Check the id_table for an existing Container for message-id
74 EmailContainer container = id_table.get(messageID);
75
76 // An existing container for message-id was found
77 if (container != null) {
78 // If the existing Container has a message already assocated with it
79 // emailMessage maybe a duplicate, so we don't lose the existance of
80 // the duplicate message assign it a bogus message-id
81 if (container.hasMessage()) {
82 messageID = String.format("<Bogus-id: %d >", bogus_id_count++);
83 container = null;
84 } else {
85 container.setMessage(message);
86 }
87 }
88
89 if (container == null) {
90 container = new EmailContainer(message);
91 id_table.put(messageID, container);
92 }
93
94 processMessageReferences(message, container, id_table);
95 }
96
97 return id_table;
98 }
99
108 void processMessageReferences(EmailMessage message, EmailContainer container, Map<String, EmailContainer> id_table) {
109 List<String> referenceList = message.getReferences();
110
111 // Make sure the inReplyToID is in the list of references
112 String inReplyToID = message.getInReplyToID();
113 if (inReplyToID != null && !inReplyToID.isEmpty()) {
114 if (referenceList == null) {
115 referenceList = new ArrayList<>();
116 }
117
118 referenceList.add(inReplyToID);
119 }
120
121 // No references, nothing to do
122 if (referenceList == null) {
123 return;
124 }
125
126 EmailContainer parent_ref = null;
127 EmailContainer ref;
128
129 for (String refID : referenceList) {
130 // Check id_table to see if there is already a container for this
131 // reference id, if not create a new Container and add to table
132 ref = id_table.get(refID);
133
134 if (ref == null) {
135 ref = new EmailContainer();
136 id_table.put(refID, ref);
137 }
138
139 // Set the parent\child relationship between parent_ref and ref
140 if (parent_ref != null
141 && !ref.hasParent()
142 && !parent_ref.equals(ref)
143 && !parent_ref.isChild(ref)) {
144 ref.setParent(parent_ref);
145 parent_ref.addChild(ref);
146 }
147
148 parent_ref = ref;
149 }
150
151 // If the parent_ref and container are already linked, don't change
152 // anything
153 if (parent_ref != null
154 && (parent_ref.equals(container)
155 || container.isChild(parent_ref))) {
156 parent_ref = null;
157 }
158
159 // If container already has a parent, the parent was assumed based on
160 // the list of references from another message. parent_ref will be
161 // the real parent of container so throw away the old parent and set a
162 // new one.
163 if (container.hasParent()) {
164 container.getParent().removeChild(container);
165 container.setParent(null);
166 }
167
168 if (parent_ref != null) {
169 container.setParent(container);
170 parent_ref.addChild(container);
171 }
172 }
173
182 Set<EmailContainer> getRootSet(Map<?, EmailContainer> id_table) {
183 HashSet<EmailContainer> rootSet = new HashSet<>();
184
185 id_table.values().stream().filter((container)
186 -> (!container.hasParent())).forEachOrdered((container) -> {
187 rootSet.add(container);
188 });
189
190 return rootSet;
191 }
192
199 void pruneEmptyContainers(Set<EmailContainer> containerSet) {
200 Set<EmailContainer> containersToRemove = new HashSet<>();
201 containerSet.forEach((container) -> {
202 if (!container.hasMessage() && !container.hasChildren()) {
203 containersToRemove.add(container);
204 } else {
205 pruneChildren(container);
206 }
207 });
208
209 containerSet.removeAll(containersToRemove);
210 }
211
220 boolean pruneChildren(EmailContainer parent) {
221 if (parent == null) {
222 return false;
223 }
224
225 Set<EmailContainer> children = parent.getChildren();
226
227 if (children == null) {
228 return false;
229 }
230
231 EmailContainer grandParent = parent.getParent();
232 Set<EmailContainer> remove = new HashSet<>();
233 Set<EmailContainer> add = new HashSet<>();
234 for (EmailContainer child : parent.getChildren()) {
235 if (pruneChildren(child)) {
236 remove.add(child);
237 add.addAll(child.getChildren());
238 child.setParent(null);
239 child.clearChildren();
240
241 }
242 }
243
244 parent.addChildren(add);
245 parent.removeChildren(remove);
246
247 if (!parent.hasMessage() && grandParent != null) {
248 children.forEach((child) -> {
249 child.setParent(grandParent);
250 });
251 return true;
252 }
253
254 return false;
255 }
256
275 Set<EmailContainer> groupBySubject(Set<EmailContainer> rootSet) {
276 Map<String, EmailContainer> subject_table = createSubjectTable(rootSet);
277
278 Set<EmailContainer> finalSet = new HashSet<>();
279
280 for (EmailContainer rootSetContainer : rootSet) {
281 String rootSubject = rootSetContainer.getSimplifiedSubject();
282
283 EmailContainer tableContainer = subject_table.get(rootSubject);
284 if (tableContainer == null || tableContainer.equals(rootSetContainer)) {
285 finalSet.add(rootSetContainer);
286 continue;
287 }
288
289 // If both containers are dummy/empty append the children of one to the other
290 if (tableContainer.getMessage() == null && rootSetContainer.getMessage() == null) {
291 tableContainer.addChildren(rootSetContainer.getChildren());
292 rootSetContainer.clearChildren();
293 continue;
294 }
295
296 // one container is empty, but the other is not, make the non-empty one be a
297 // child of the empty
298 if ((tableContainer.getMessage() == null && rootSetContainer.getMessage() != null)
299 || (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null)) {
300
301 if (tableContainer.getMessage() == null) {
302 tableContainer.addChild(rootSetContainer);
303
304 } else {
305 rootSetContainer.addChild(tableContainer);
306 subject_table.remove(rootSubject, tableContainer);
307 subject_table.put(rootSubject, rootSetContainer);
308
309 finalSet.add(rootSetContainer);
310 }
311
312 continue;
313 }
314
315 // tableContainer is non-empty and it's message's subject does not begin
316 // with 'RE:' but rootSetContainer's message does begin with 'RE:', then
317 // make rootSetContainer a child of tableContainer
318 if (tableContainer.getMessage() != null
319 && !tableContainer.isReplySubject()
320 && rootSetContainer.isReplySubject()) {
321 tableContainer.addChild(rootSetContainer);
322 continue;
323 }
324
325 // If table container is non-empy, and table container's subject does
326 // begin with 'RE:', but rootSetContainer does not start with 'RE:'
327 // make tableContainer a child of rootSetContainer
328 if (tableContainer.getMessage() != null
329 && tableContainer.isReplySubject()
330 && !rootSetContainer.isReplySubject()) {
331 rootSetContainer.addChild(tableContainer);
332 subject_table.put(rootSubject, rootSetContainer);
333 finalSet.add(rootSetContainer);
334 continue;
335 }
336
337 // rootSetContainer and tableContainer either both have 'RE' or
338 // don't. Create a new dummy container with both containers as
339 // children.
340 EmailContainer newParent = new EmailContainer();
341 newParent.addChild(tableContainer);
342 newParent.addChild(rootSetContainer);
343 subject_table.remove(rootSubject, tableContainer);
344 subject_table.put(rootSubject, newParent);
345
346 finalSet.add(newParent);
347 }
348 return finalSet;
349 }
350
359 Map<String, EmailContainer> createSubjectTable(Set<EmailContainer> rootSet) {
360 HashMap<String, EmailContainer> subject_table = new HashMap<>();
361
362 for (EmailContainer rootSetContainer : rootSet) {
363 String subject = "";
364 boolean reSubject = false;
365
366 if (rootSetContainer.hasMessage()) {
367 subject = rootSetContainer.getMessage().getSimplifiedSubject();
368 reSubject = rootSetContainer.getMessage().isReplySubject();
369 } else if (rootSetContainer.hasChildren()) {
370 Iterator<EmailContainer> childrenIterator = rootSetContainer.getChildren().iterator();
371 while (childrenIterator.hasNext()) {
372 EmailMessage childMessage = childrenIterator.next().getMessage();
373 if (childMessage != null) {
374 subject = childMessage.getSimplifiedSubject();
375 if (!subject.isEmpty()) {
376 reSubject = childMessage.isReplySubject();
377 break;
378 }
379 }
380 }
381 }
382
383 if (subject == null || subject.isEmpty()) {
384 continue; // Give up on this container
385 }
386
387 EmailContainer tableContainer = subject_table.get(subject);
388
389// A container will be added to the table, if a container for its "simplified" subject
390// does not currently exist in the table. Or if there is more than one container with the same
391// subject, but one is an "empty container" the empty one will be added
392// the table or in the one in the table has "RE" in the subject it will be replaced
393// by the one that does not have "RE" in the subject (if it exists)
394//
395 if (tableContainer == null ||
396 (tableContainer.getMessage() != null && rootSetContainer.getMessage() == null) ||
397 (!reSubject && (tableContainer.getMessage() != null && tableContainer.getMessage().isReplySubject()))) {
398 subject_table.put(subject, rootSetContainer);
399 }
400
401 }
402
403 return subject_table;
404 }
405
417 private void assignThreadIDs(Set<EmailContainer> containerSet) {
418 for(EmailContainer container: containerSet) {
419 // Generate a threadID
420 String threadID = UUID.randomUUID().toString();
421 // Add the IDs to this thread
422 addThreadID(container, threadID);
423 }
424 }
425
434 private void addThreadID(EmailContainer container, String threadID) {
435 if(container == null) {
436 return;
437 }
438
439 EmailMessage message = container.getMessage();
440 if(message != null) {
441 message.setMessageThreadID(threadID);
442 }
443
444 if(container.hasChildren()) {
445 for(EmailContainer child: container.getChildren()) {
446 addThreadID(child, threadID);
447 }
448 }
449 }
450
455 final class EmailContainer {
456
457 private EmailMessage message;
458 private EmailContainer parent;
459 private Set<EmailContainer> children;
460
464 EmailContainer() {
465 // This constructor is intentially empty to allow for the creation of
466 // an EmailContainer without a message
467 }
468
474 EmailContainer(EmailMessage message) {
475 this.message = message;
476 }
477
483 EmailMessage getMessage() {
484 return message;
485 }
486
492 void setMessage(EmailMessage message) {
493 this.message = message;
494 }
495
501 boolean hasMessage() {
502 return message != null;
503 }
504
512 String getSimplifiedSubject() {
513 String subject = "";
514 if (message != null) {
515 subject = message.getSimplifiedSubject();
516 } else if (children != null) {
517 for (EmailContainer child : children) {
518 if (child.hasMessage()) {
519 subject = child.getSimplifiedSubject();
520 }
521
522 if (subject != null && !subject.isEmpty()) {
523 break;
524 }
525 }
526 }
527 return subject;
528 }
529
537 boolean isReplySubject() {
538 if (message != null) {
539 return message.isReplySubject();
540 } else if (children != null) {
541 for (EmailContainer child : children) {
542 if (child.hasMessage()) {
543 boolean isReply = child.isReplySubject();
544
545 if (isReply) {
546 return isReply;
547 }
548 }
549 }
550 }
551
552 return false;
553 }
554
560 EmailContainer getParent() {
561 return parent;
562 }
563
569 void setParent(EmailContainer container) {
570 parent = container;
571 }
572
578 boolean hasParent() {
579 return parent != null;
580 }
581
589 boolean addChild(EmailContainer child) {
590 if (children == null) {
591 children = new HashSet<>();
592 }
593
594 return children.add(child);
595 }
596
607 boolean addChildren(Set<EmailContainer> children) {
608 if (children == null || children.isEmpty()) {
609 return false;
610 }
611
612 if (this.children == null) {
613 this.children = new HashSet<>();
614 }
615
616 return this.children.addAll(children);
617 }
618
628 boolean removeChildren(Set<EmailContainer> children) {
629 if (children != null) {
630 return this.children.removeAll(children);
631 }
632
633 return false;
634 }
635
640 void clearChildren() {
641 if( children != null ) {
642 children.clear();
643 }
644 }
645
654 boolean removeChild(EmailContainer child) {
655 if(children != null) {
656 return children.remove(child);
657 } else {
658 return false;
659 }
660 }
661
667 boolean hasChildren() {
668 return children != null && !children.isEmpty();
669 }
670
676 Set<EmailContainer> getChildren() {
677 return children;
678 }
679
689 boolean isChild(EmailContainer container) {
690 return isChild(container, new HashSet<>());
691 }
692
703 private boolean isChild(EmailContainer container, Set<EmailContainer> processedContainers) {
704 processedContainers.add(this);
705 if (children == null || children.isEmpty()) {
706 return false;
707 } else if (children.contains(container)) {
708 return true;
709 } else {
710 for (EmailContainer child : children) {
711 // Prevent an infinite recursion by making sure we haven't already
712 // run isChild() on this child
713 if ((!processedContainers.contains(child)) && child.isChild(container, processedContainers)) {
714 return true;
715 }
716 }
717 return false;
718 }
719 }
720 }
721}

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.