Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
DefaultPriorityDomainCategorizer.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2021 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.recentactivity;
20
21import java.io.IOException;
22import java.util.Arrays;
23import java.util.List;
24import java.util.Map;
25import java.util.stream.Collectors;
26import java.util.stream.Stream;
27import org.apache.commons.lang.StringUtils;
28import org.openide.util.NbBundle.Messages;
29import org.sleuthkit.autopsy.url.analytics.DomainCategorizer;
30import org.sleuthkit.autopsy.url.analytics.DomainCategorizerException;
31import org.sleuthkit.autopsy.url.analytics.DomainCategory;
32
37@Messages({
38 "DefaultPriorityDomainCategorizer_searchEngineCategory=Search Engine"
39})
41
42 // taken from https://www.google.com/supported_domains
43 private static final List<String> GOOGLE_DOMAINS = Arrays.asList("google.com", "google.ad", "google.ae", "google.com.af", "google.com.ag", "google.com.ai", "google.al", "google.am", "google.co.ao", "google.com.ar", "google.as", "google.at", "google.com.au", "google.az", "google.ba", "google.com.bd", "google.be", "google.bf", "google.bg", "google.com.bh", "google.bi", "google.bj", "google.com.bn", "google.com.bo", "google.com.br", "google.bs", "google.bt", "google.co.bw", "google.by", "google.com.bz", "google.ca", "google.cd", "google.cf", "google.cg", "google.ch", "google.ci", "google.co.ck", "google.cl", "google.cm", "google.cn", "google.com.co", "google.co.cr", "google.com.cu", "google.cv", "google.com.cy", "google.cz", "google.de", "google.dj", "google.dk", "google.dm", "google.com.do", "google.dz", "google.com.ec", "google.ee", "google.com.eg", "google.es", "google.com.et", "google.fi", "google.com.fj", "google.fm", "google.fr", "google.ga", "google.ge", "google.gg", "google.com.gh", "google.com.gi", "google.gl", "google.gm", "google.gr", "google.com.gt", "google.gy", "google.com.hk", "google.hn", "google.hr", "google.ht", "google.hu", "google.co.id", "google.ie", "google.co.il", "google.im", "google.co.in", "google.iq", "google.is", "google.it", "google.je", "google.com.jm", "google.jo", "google.co.jp", "google.co.ke", "google.com.kh", "google.ki", "google.kg", "google.co.kr", "google.com.kw", "google.kz", "google.la", "google.com.lb", "google.li", "google.lk", "google.co.ls", "google.lt", "google.lu", "google.lv", "google.com.ly", "google.co.ma", "google.md", "google.me", "google.mg", "google.mk", "google.ml", "google.com.mm", "google.mn", "google.ms", "google.com.mt", "google.mu", "google.mv", "google.mw", "google.com.mx", "google.com.my", "google.co.mz", "google.com.na", "google.com.ng", "google.com.ni", "google.ne", "google.nl", "google.no", "google.com.np", "google.nr", "google.nu", "google.co.nz", "google.com.om", "google.com.pa", "google.com.pe", "google.com.pg", "google.com.ph", "google.com.pk", "google.pl", "google.pn", "google.com.pr", "google.ps", "google.pt", "google.com.py", "google.com.qa", "google.ro", "google.ru", "google.rw", "google.com.sa", "google.com.sb", "google.sc", "google.se", "google.com.sg", "google.sh", "google.si", "google.sk", "google.com.sl", "google.sn", "google.so", "google.sm", "google.sr", "google.st", "google.com.sv", "google.td", "google.tg", "google.co.th", "google.com.tj", "google.tl", "google.tm", "google.tn", "google.to", "google.com.tr", "google.tt", "google.com.tw", "google.co.tz", "google.com.ua", "google.co.ug", "google.co.uk", "google.com.uy", "google.co.uz", "google.com.vc", "google.co.ve", "google.vg", "google.co.vi", "google.com.vn", "google.vu", "google.ws", "google.rs", "google.co.za", "google.co.zm", "google.co.zw", "google.cat");
44
45 // taken from https://www.yahoo.com/everything/world
46 private static final List<String> YAHOO_DOMAINS = Arrays.asList("espanol.yahoo.com", "au.yahoo.com", "be.yahoo.com", "fr-be.yahoo.com", "br.yahoo.com", "ca.yahoo.com", "espanol.yahoo.com", "espanol.yahoo.com", "de.yahoo.com", "es.yahoo.com", "espanol.yahoo.com", "fr.yahoo.com", "in.yahoo.com", "id.yahoo.com", "ie.yahoo.com", "it.yahoo.com", "en-maktoob.yahoo.com", "malaysia.yahoo.com", "espanol.yahoo.com", "nz.yahoo.com", "espanol.yahoo.com", "ph.yahoo.com", "qc.yahoo.com", "ro.yahoo.com", "sg.yahoo.com", "za.yahoo.com", "se.yahoo.com", "uk.yahoo.com", "yahoo.com", "espanol.yahoo.com", "vn.yahoo.com", "gr.yahoo.com", "maktoob.yahoo.com", "yahoo.com", "hk.yahoo.com", "tw.yahoo.com", "yahoo.co.jp");
47
48 private static final List<String> OTHER_SEARCH_ENGINES = Arrays.asList(
49 "bing.com",
50 "baidu.com",
51 "sogou.com",
52 "soso.com",
53 "duckduckgo.com",
54 "swisscows.com",
55 "gibiru.com",
56 "cutestat.com",
57 "youdao.com",
58 "biglobe.ne.jp",
59 "givewater.com",
60 "ekoru.org",
61 "ecosia.org",
62 // according to https://en.wikipedia.org/wiki/Yandex
63 "yandex.ru",
64 "yandex.com"
65 );
66
67 private static final String WWW_PREFIX = "www";
68
69 private static final Map<String, String> DOMAIN_LOOKUP
71 .flatMap((lst) -> lst.stream())
72 .collect(Collectors.toMap((k) -> k, (k) -> Bundle.DefaultPriorityDomainCategorizer_searchEngineCategory(), (v1, v2) -> v1));
73
74 @Override
76 }
77
78 @Override
79 public DomainCategory getCategory(String domain, String host) throws DomainCategorizerException {
80
81 String hostToUse = StringUtils.isBlank(host) ? domain : host;
82
83 if (StringUtils.isBlank(hostToUse)) {
84 return null;
85 }
86
87 List<String> domainWords = Stream.of(hostToUse.toLowerCase().split("\\."))
88 .filter(StringUtils::isNotBlank)
89 .map(String::trim)
90 .collect(Collectors.toList());
91
92 String sanitizedDomain = domainWords.stream()
93 // skip first word segment if 'www'
94 .skip(domainWords.size() > 0 && WWW_PREFIX.equals(domainWords.get(0)) ? 1 : 0)
95 .collect(Collectors.joining("."));
96
97 String category = DOMAIN_LOOKUP.get(sanitizedDomain);
98 return category == null ? null : new DomainCategory(sanitizedDomain, category);
99 }
100
101 @Override
102 public void close() throws IOException {
103 }
104}

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.