19package org.sleuthkit.autopsy.textextractors;
21import java.io.IOException;
22import java.io.InputStream;
23import java.io.InputStreamReader;
24import java.nio.charset.Charset;
25import java.util.ArrayList;
27import java.util.Objects;
28import org.openide.util.Lookup;
29import org.sleuthkit.autopsy.coreutils.StringExtract;
30import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
31import org.sleuthkit.autopsy.textextractors.configs.StringsConfig;
32import org.sleuthkit.datamodel.Content;
33import org.sleuthkit.datamodel.TskCoreException;
34import org.sleuthkit.datamodel.TskException;
41 private boolean extractUTF8;
42 private boolean extractUTF16;
43 private final Content content;
44 private final static String DEFAULT_INDEXED_TEXT_CHARSET =
"UTF-8";
46 private final List<SCRIPT> extractScripts =
new ArrayList<>();
53 public StringsTextExtractor(Content content) {
55 extractScripts.add(SCRIPT.LATIN_2);
57 this.content = content;
65 public final void setScripts(List<SCRIPT> extractScripts) {
66 if (extractScripts ==
null) {
70 this.extractScripts.clear();
71 this.extractScripts.addAll(extractScripts);
84 InputStream stringStream = getInputStream(content);
85 return new InputStreamReader(stringStream, Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
88 InputStream getInputStream(Content content) {
90 if (extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)) {
107 public void setExtractionSettings(Lookup context) {
108 if (context !=
null) {
109 StringsConfig configInstance = context.lookup(StringsConfig.class);
110 if (configInstance ==
null) {
113 if (Objects.nonNull(configInstance.getExtractUTF8())) {
114 extractUTF8 = configInstance.getExtractUTF8();
116 if (Objects.nonNull(configInstance.getExtractUTF16())) {
117 extractUTF16 = configInstance.getExtractUTF16();
119 if (Objects.nonNull(configInstance.getLanguageScripts())) {
120 setScripts(configInstance.getLanguageScripts());
131 return extractUTF8 || extractUTF16;
148 private static final String
NLS = Character.toString((
char) 10);
181 public int read(
byte[] b,
int off,
int len)
throws IOException {
183 throw new NullPointerException();
184 }
else if (off < 0 || len < 0 || len > b.length - off) {
185 throw new IndexOutOfBoundsException();
186 }
else if (len == 0) {
189 long fileSize =
content.getSize();
203 boolean singleConsecZero =
false;
205 while (newCurLen < len) {
212 }
catch (TskException ex) {
241 singleConsecZero = c == 0 && singleConsecZero ==
false;
249 }
else if (!singleConsecZero) {
275 if (newCurLen > len) {
279 String toAppend =
tempString.substring(0, appendChars);
280 String newTemp =
tempString.substring(appendChars);
318 final String curStringS =
curString.toString();
320 byte[] stringBytes = curStringS.getBytes(Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
321 System.arraycopy(stringBytes, 0, b, off, Math.min(
curStringLen, (
int) len));
331 public int read() throws IOException {
347 public long skip(
long n)
throws IOException {
350 return super.skip(n);
395 this.nothingToDo = extractUTF8 ==
false && extractUTF16 ==
false;
401 public int read() throws IOException {
414 public int read(
byte[] b,
int off,
int len)
throws IOException {
416 throw new NullPointerException();
417 }
else if (off < 0 || len < 0 || len > b.length - off) {
418 throw new IndexOutOfBoundsException();
419 }
else if (len == 0) {
425 long fileSize =
content.getSize();
432 int offsetUser = off;
433 while (bytesToUser < len && offsetUser < len) {
456 }
catch (TskCoreException ex) {
461 if (
convertBuff ==
null || convertBuffRemain == 0) {
463 return bytesToUser > 0 ? bytesToUser : -1;
470 final int toCopy = Math.min(convertBuffRemain, len - offsetUser);
474 offsetUser += toCopy;
475 bytesToUser += toCopy;