19package org.sleuthkit.autopsy.coreutils;
21import java.io.BufferedReader;
22import java.io.IOException;
23import java.io.InputStream;
24import java.io.InputStreamReader;
25import java.nio.charset.StandardCharsets;
26import java.util.HashMap;
28import java.util.stream.Collectors;
29import java.util.stream.Stream;
30import org.apache.commons.lang3.StringUtils;
36class DomainTokenizer {
43 private static class DomainCategory extends HashMap<String, DomainCategory> {
49 this.put(childKey, cat);
57 private static final String JOINER =
".";
59 private static final String DELIMITER =
"\\" + JOINER;
61 private static final String WILDCARD =
"*";
62 private static final String EXCEPTION_PREFIX =
"!";
67 private static final String DOMAIN_LIST =
"public_suffix_list.dat";
70 private static final String COMMENT_TOKEN =
"//";
73 private static DomainTokenizer categorizer =
null;
81 static DomainTokenizer getInstance() throws IOException {
82 if (categorizer ==
null) {
95 private static DomainTokenizer load() throws IOException {
96 try (InputStream is = DomainTokenizer.class.getResourceAsStream(DOMAIN_LIST);
97 InputStreamReader isReader =
new InputStreamReader(is, StandardCharsets.UTF_8);
98 BufferedReader reader =
new BufferedReader(isReader)) {
100 DomainTokenizer categorizer =
new DomainTokenizer();
101 while (reader.ready()) {
102 String line = reader.readLine();
103 String trimmed = line.trim();
104 if (!StringUtils.isBlank(trimmed) && !trimmed.startsWith(COMMENT_TOKEN)) {
105 categorizer.addDomainSuffix(trimmed);
113 private DomainTokenizer() {
125 private void addDomainSuffix(String domainSuffix) {
126 if (StringUtils.isBlank(domainSuffix)) {
130 String[] tokens = domainSuffix.toLowerCase().trim().split(DELIMITER);
133 for (
int i = tokens.length - 1; i >= 0; i--) {
134 String token = tokens[i];
135 if (StringUtils.isBlank(token)) {
139 cat = cat.getOrAddChild(tokens[i]);
153 String getDomain(String domain) {
154 if (StringUtils.isBlank(domain)) {
158 List<String> tokens = Stream.of(domain.toLowerCase().split(DELIMITER))
159 .filter(StringUtils::isNotBlank)
160 .collect(Collectors.toList());
162 int idx = tokens.size() - 1;
165 for (; idx >= 0; idx--) {
169 if (cat.get(EXCEPTION_PREFIX + tokens.get(idx)) !=
null) {
176 if (newCat ==
null) {
179 newCat = cat.get(WILDCARD);
180 if (newCat ==
null) {
189 if (idx == tokens.size() - 1) {
192 int minIndex = Math.max(0, idx);
193 List<String> subList = tokens.subList(minIndex, tokens.size());
194 return String.join(JOINER, subList);
DomainCategory getOrAddChild(String childKey)