Autopsy 4.22.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
LanguageSpecificContentIndexingHelper.java
Go to the documentation of this file.
1/*
2 * Autopsy Forensic Browser
3 *
4 * Copyright 2011-2021 Basis Technology Corp.
5 * Contact: carrier <at> sleuthkit <dot> org
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19package org.sleuthkit.autopsy.keywordsearch;
20
21import org.apache.commons.lang3.math.NumberUtils;
22import org.apache.solr.common.SolrInputDocument;
23import org.openide.util.NbBundle;
24import org.sleuthkit.autopsy.healthmonitor.HealthMonitor;
25import org.sleuthkit.autopsy.healthmonitor.TimingMetric;
26
27import java.util.ArrayList;
28import java.util.List;
29import java.util.Map;
30import java.util.Optional;
31
35class LanguageSpecificContentIndexingHelper {
36
37 private final LanguageDetector languageDetector = new LanguageDetector();
38
39 Optional<Language> detectLanguageIfNeeded(String text) throws NoOpenCoreException {
40 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
41 if (2.2 <= indexSchemaVersion) {
42 return languageDetector.detect(text);
43 } else {
44 return Optional.empty();
45 }
46 }
47
48 void updateLanguageSpecificFields(Map<String, Object> fields, Chunker.Chunk chunk, Language language) {
49 List<String> values = new ArrayList<>();
50 values.add(chunk.toString());
51 if (fields.containsKey(Server.Schema.FILE_NAME.toString())) {
52 values.add(Chunker.sanitize(fields.get(Server.Schema.FILE_NAME.toString()).toString()).toString());
53 }
54
55 // index the chunk to a language specific field
56 fields.put(Server.Schema.CONTENT_JA.toString(), values);
57 fields.put(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString());
58 }
59
60 void indexMiniChunk(Chunker.Chunk chunk, String sourceName, Map<String, Object> fields, String baseChunkID, Language language)
61 throws Ingester.IngesterException {
62 //Make a SolrInputDocument out of the field map
63 SolrInputDocument updateDoc = new SolrInputDocument();
64 for (String key : fields.keySet()) {
65 if (fields.get(key).getClass() == String.class) {
66 updateDoc.addField(key, Chunker.sanitize((String)fields.get(key)).toString());
67 } else {
68 updateDoc.addField(key, fields.get(key));
69 }
70 }
71
72 try {
73 updateDoc.setField(Server.Schema.ID.toString(), Chunker.sanitize(MiniChunkHelper.getChunkIdString(baseChunkID)).toString());
74
75 // index the chunk to a language specific field
76 updateDoc.addField(Server.Schema.CONTENT_JA.toString(), Chunker.sanitize(chunk.toString().substring(chunk.getBaseChunkLength())).toString());
77 updateDoc.addField(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString());
78
79 TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
80
81 KeywordSearch.getServer().addDocument(updateDoc);
82 HealthMonitor.submitTimingMetric(metric);
83
84 } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
85 throw new Ingester.IngesterException(
86 NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
87 }
88 }
89}

Copyright © 2012-2024 Sleuth Kit Labs. Generated on:
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.