19 package org.sleuthkit.autopsy.recentactivity;
21 import java.net.MalformedURLException;
23 import java.util.Arrays;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.Comparator;
27 import java.util.HashSet;
28 import java.util.List;
30 import java.util.logging.Level;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34 import java.util.stream.Collectors;
35 import org.apache.commons.lang.StringUtils;
36 import org.openide.util.Lookup;
37 import org.openide.util.NbBundle.Messages;
45 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
47 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
60 "DomainCategoryRunner_moduleName_text=DomainCategoryRunner",
61 "DomainCategoryRunner_Progress_Message_Domain_Types=Finding Domain Types",
62 "DomainCategoryRunner_parentModuleName=Recent Activity"
64 class DomainCategoryRunner extends Extract {
69 private static final String URL_REGEX_SCHEME =
"(((?<scheme>[^:\\/?#]+):?)?\\/\\/)";
71 private static final String URL_REGEX_USERINFO =
"((?<userinfo>[^\\/?#@]*)@)";
72 private static final String URL_REGEX_HOST =
"(?<host>[^\\/\\.?#:]*\\.[^\\/?#:]*)";
73 private static final String URL_REGEX_PORT =
"(:(?<port>[0-9]{1,5}))";
74 private static final String URL_REGEX_AUTHORITY = String.format(
"(%s?%s?%s?\\/?)", URL_REGEX_USERINFO, URL_REGEX_HOST, URL_REGEX_PORT);
76 private static final String URL_REGEX_PATH =
"(?<path>([^?#]*)(\\?([^#]*))?(#(.*))?)";
78 private static final String URL_REGEX_STR = String.format(
"^\\s*%s?%s?%s?", URL_REGEX_SCHEME, URL_REGEX_AUTHORITY, URL_REGEX_PATH);
79 private static final Pattern URL_REGEX = Pattern.compile(URL_REGEX_STR);
81 private static int DATETIME_ACCESSED_TYPEID = ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID();
82 private static int URL_TYPEID = ATTRIBUTE_TYPE.TSK_URL.getTypeID();
84 private static final Logger logger = Logger.getLogger(DomainCategoryRunner.class.getName());
94 private static long getTimeOrZero(Map<Integer, BlackboardAttribute> attrMap,
int attrTypeId) {
95 if (attrMap == null) {
99 BlackboardAttribute attr = attrMap.get(attrTypeId);
100 return attr == null ? 0 : attr.getValueLong();
111 private static String getStringOrEmpty(Map<Integer, BlackboardAttribute> attrMap,
int attrTypeId) {
112 if (attrMap == null) {
116 BlackboardAttribute attr = attrMap.get(attrTypeId);
117 String attrStr = attr == null ?
"" : attr.getValueString();
118 return attrStr == null ?
"" : attrStr;
124 private static final Comparator<BlackboardArtifact> ARTIFACT_COMPARATOR = (a, b) -> {
126 Map<Integer, BlackboardAttribute> attrMapA = null;
127 Map<Integer, BlackboardAttribute> attrMapB = null;
130 attrMapA = a.getAttributes()
132 .collect(Collectors.toMap(attr -> attr.getAttributeType().getTypeID(), attr -> attr, (attr1, attr2) -> attr1));
134 attrMapB = b.getAttributes()
136 .collect(Collectors.toMap(attr -> attr.getAttributeType().getTypeID(), attr -> attr, (attr1, attr2) -> attr1));
138 }
catch (TskCoreException ex) {
139 logger.log(Level.WARNING,
"There was an error fetching attributes for artifacts", ex);
144 int timeCompare = Long.compare(getTimeOrZero(attrMapA, DATETIME_ACCESSED_TYPEID), getTimeOrZero(attrMapB, DATETIME_ACCESSED_TYPEID));
145 if (timeCompare != 0) {
151 int urlCompare = getStringOrEmpty(attrMapA, URL_TYPEID).compareToIgnoreCase(getStringOrEmpty(attrMapB, URL_TYPEID));
152 if (urlCompare != 0) {
157 return Long.compare(a.getId(), b.getId());
160 private Content dataSource;
161 private IngestJobContext context;
162 private List<DomainCategorizer> domainProviders = Collections.emptyList();
167 DomainCategoryRunner() {
178 private String getHost(String urlString) {
182 URL url =
new URL(urlString);
184 host = url.getHost();
186 }
catch (MalformedURLException ignore) {
191 if (StringUtils.isBlank(host)) {
192 Matcher m = URL_REGEX.matcher(urlString);
194 host = m.group(
"host");
208 private DomainCategory findCategory(String domain, String host) {
209 List<DomainCategorizer> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
210 for (DomainCategorizer provider : safeProviders) {
211 DomainCategory result;
213 result = provider.getCategory(domain, host);
214 if (result != null) {
217 }
catch (DomainCategorizerException ex) {
218 logger.log(Level.WARNING,
"There was an error processing results with " + provider.getClass().getCanonicalName(), ex);
242 ArtifactHost(AbstractFile abstractFile, String host, String domain) {
243 this.abstractFile = abstractFile;
245 this.domain = domain;
251 AbstractFile getAbstractFile() {
279 private ArtifactHost getDomainAndHost(BlackboardArtifact artifact)
throws TskCoreException {
281 AbstractFile file = tskCase.getAbstractFileById(artifact.getObjectID());
287 BlackboardAttribute urlAttr = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL));
288 String urlString = null;
290 if (urlAttr != null) {
291 urlString = urlAttr.getValueString();
292 if (StringUtils.isNotBlank(urlString)) {
293 host = getHost(urlString);
298 BlackboardAttribute domainAttr = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
299 String domainString = null;
300 if (domainAttr != null) {
301 domainString = domainAttr.getValueString();
304 boolean hasDomain = StringUtils.isNotBlank(domainString);
305 boolean hasHost = StringUtils.isNotBlank(host);
308 if (!hasDomain && !hasHost) {
310 }
else if (!hasDomain) {
311 domainString = NetworkUtils.extractDomain(host);
312 }
else if (!hasHost) {
316 return new ArtifactHost(file, host.toLowerCase(), domainString.toLowerCase());
328 private static boolean isDuplicateOrAdd(Set<String> items, String item) {
329 if (StringUtils.isBlank(item)) {
331 }
else if (items.contains(item)) {
344 private void findDomainTypes() {
345 int artifactsAnalyzed = 0;
346 int domainTypeInstancesFound = 0;
349 Set<String> hostsSeen =
new HashSet<>();
352 Set<String> hostSuffixesSeen =
new HashSet<>();
354 List<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getBlackboard().getArtifacts(
355 Arrays.asList(
new BlackboardArtifact.Type(ARTIFACT_TYPE.TSK_WEB_HISTORY)),
356 Arrays.asList(dataSource.getId()));
358 logger.log(Level.INFO,
"Processing {0} blackboard artifacts.", listArtifacts.size());
359 Collections.sort(listArtifacts, ARTIFACT_COMPARATOR);
361 for (BlackboardArtifact artifact : listArtifacts) {
363 if (context.dataSourceIngestIsCancelled()) {
368 ArtifactHost curArtHost = getDomainAndHost(artifact);
369 if (curArtHost == null || isDuplicateOrAdd(hostsSeen, curArtHost.getHost())) {
377 DomainCategory domainEntryFound = findCategory(curArtHost.getDomain(), curArtHost.getHost());
378 if (domainEntryFound == null) {
383 String hostSuffix = domainEntryFound.getHostSuffix();
384 String domainCategory = domainEntryFound.getCategory();
385 if (StringUtils.isBlank(hostSuffix) || StringUtils.isBlank(domainCategory)) {
390 domainTypeInstancesFound++;
392 if (isDuplicateOrAdd(hostSuffixesSeen, hostSuffix)) {
397 addCategoryArtifact(curArtHost, domainCategory);
399 }
catch (TskCoreException e) {
400 logger.log(Level.SEVERE,
"Encountered error retrieving artifacts for messaging domains", e);
402 if (context.dataSourceIngestIsCancelled()) {
403 logger.info(
"Operation terminated by user.");
405 logger.log(Level.INFO, String.format(
"Extracted %s distinct messaging domain(s) from the blackboard. "
406 +
"Of the %s artifact(s) with valid hosts, %s url(s) contained messaging domain suffix.",
407 hostSuffixesSeen.size(), artifactsAnalyzed, domainTypeInstancesFound));
418 private void addCategoryArtifact(ArtifactHost artHost, String domainCategory) {
419 String moduleName = Bundle.DomainCategoryRunner_parentModuleName();
420 Collection<BlackboardAttribute> bbattributes = Arrays.asList(
421 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN, moduleName, artHost.getDomain()),
422 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_HOST, moduleName, artHost.getHost()),
423 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME, moduleName, domainCategory)
425 postArtifact(createArtifactWithAttributes(ARTIFACT_TYPE.TSK_WEB_CATEGORIZATION, artHost.getAbstractFile(), bbattributes));
429 public void process(Content dataSource, IngestJobContext context, DataSourceIngestModuleProgress progressBar) {
430 this.dataSource = dataSource;
431 this.context = context;
433 progressBar.progress(Bundle.DomainCategoryRunner_Progress_Message_Domain_Types());
434 this.findDomainTypes();
438 void configExtractor() throws IngestModule.IngestModuleException {
440 Collection<? extends DomainCategorizer> lookupList = Lookup.getDefault().lookupAll(DomainCategorizer.class);
441 if (lookupList == null) {
442 lookupList = Collections.emptyList();
445 List<DomainCategorizer> foundProviders = lookupList.stream()
446 .filter(provider -> provider != null)
447 .sorted((a, b) -> a.getClass().getName().compareToIgnoreCase(b.getClass().getName()))
448 .collect(Collectors.toList());
451 foundProviders.add(
new DefaultDomainCategorizer());
453 for (DomainCategorizer provider : foundProviders) {
455 provider.initialize();
456 }
catch (DomainCategorizerException ex) {
457 throw new IngestModule.IngestModuleException(
"There was an error instantiating the provider: " + provider.getClass().getSimpleName(), ex);
461 this.domainProviders = foundProviders;
465 public void complete() {
466 if (this.domainProviders != null) {
467 for (DomainCategorizer provider : this.domainProviders) {
470 }
catch (Exception ex) {
471 logger.log(Level.WARNING,
"There was an error closing " + provider.getClass().getName(), ex);
476 logger.info(
"Domain categorization completed.");
final AbstractFile abstractFile