19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.IOException;
 
   22 import java.io.InputStream;
 
   23 import java.io.InputStreamReader;
 
   24 import java.util.ArrayList;
 
   25 import java.util.HashMap;
 
   26 import java.util.List;
 
   28 import java.util.logging.Level;
 
   39 class StringsTextExtractor 
extends FileTextExtractor {
 
   41     static final private Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
 
   51     private final List<SCRIPT> extractScripts = 
new ArrayList<>();
 
   52     private Map<String, String> extractOptions = 
new HashMap<>();
 
   54     public StringsTextExtractor() {
 
   56         extractScripts.add(SCRIPT.LATIN_2);
 
   64     public void setScripts(List<SCRIPT> extractScripts) {
 
   65         this.extractScripts.clear();
 
   66         this.extractScripts.addAll(extractScripts);
 
   74     public List<SCRIPT> getScripts() {
 
   75         return new ArrayList<>(extractScripts);
 
   84     public Map<String, String> getOptions() {
 
   85         return extractOptions;
 
   93     public void setOptions(Map<String, String> options) {
 
   94         this.extractOptions = options;
 
   98     public void logWarning(
final String msg, Exception ex) {
 
   99         logger.log(Level.WARNING, msg, ex); 
 
  103     public boolean isDisabled() {
 
  104         boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
 
  105         boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
 
  107         return extractUTF8 == 
false && extractUTF16 == 
false;
 
  111     public InputStreamReader getReader(AbstractFile sourceFile) 
throws TextExtractorException {
 
  112         InputStream stringStream = getInputStream(sourceFile);
 
  113         return new InputStreamReader(stringStream, Server.DEFAULT_INDEXED_TEXT_CHARSET);
 
  116     InputStream getInputStream(AbstractFile sourceFile) {
 
  118         if (extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)) {
 
  119             return new EnglishOnlyStream(sourceFile);
 
  121             boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
 
  122             boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
 
  124             return new InternationalStream(sourceFile, extractScripts, extractUTF8, extractUTF16);
 
  129     public boolean isContentTypeSpecific() {
 
  134     public boolean isSupported(AbstractFile file, String detectedFormat) {
 
  154         private static final String 
NLS = Character.toString((
char) 10); 
 
  187         public int read(byte[] b, 
int off, 
int len) 
throws IOException {
 
  189                 throw new NullPointerException();
 
  190             } 
else if (off < 0 || len < 0 || len > b.length - off) {
 
  191                 throw new IndexOutOfBoundsException();
 
  192             } 
else if (len == 0) {
 
  195             long fileSize = content.getSize();
 
  202             if (stringAtTempBoundary) {
 
  206                 stringAtTempBoundary = 
false;
 
  209             boolean singleConsecZero = 
false; 
 
  211             while (newCurLen < len) {
 
  213                 if (readBufOffset > bytesInReadBuf - 1) {
 
  217                         bytesInReadBuf = content.read(curReadBuf, contentOffset, READ_BUF_SIZE);
 
  218                     } 
catch (TskException ex) {
 
  219                         if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  229                     if (bytesInReadBuf < 1) {
 
  230                         if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  246                 char c = (char) curReadBuf[readBufOffset++];
 
  247                 if (c == 0 && singleConsecZero == 
false) {
 
  249                     singleConsecZero = 
true;
 
  251                     singleConsecZero = 
false;
 
  254                     tempString.append(c);
 
  256                     if (tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  260                 } 
else if (!singleConsecZero) {
 
  262                     if (tempStringLen >= MIN_PRINTABLE_CHARS || stringAtBufBoundary) {
 
  264                         tempString.append(NLS);
 
  266                         curString.append(tempString);
 
  268                         stringAtBufBoundary = 
false;
 
  271                     tempString = 
new StringBuilder();
 
  280                 stringAtBufBoundary = 
true; 
 
  285             if (tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  286                 if (newCurLen > len) {
 
  290                     String toAppend = tempString.substring(0, appendChars);
 
  291                     String newTemp = tempString.substring(appendChars);
 
  292                     curString.append(toAppend);
 
  293                     curStringLen += appendChars;
 
  294                     tempString = 
new StringBuilder(newTemp);
 
  295                     tempStringLen = newTemp.length();
 
  296                     stringAtTempBoundary = 
true;
 
  299                     curString.append(tempString);
 
  302                     tempString = 
new StringBuilder();
 
  318             if (tempStringLen >= MIN_PRINTABLE_CHARS) {
 
  319                 curString.append(tempString);
 
  321                 tempString = 
new StringBuilder();
 
  329             final String curStringS = curString.toString();
 
  332             System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (
int) len));
 
  335             curString = 
new StringBuilder();
 
  342         public int read() throws IOException {
 
  343             final int read = 
read(oneCharBuf, 0, 1);
 
  345                 return oneCharBuf[0];
 
  358         public long skip(
long n) 
throws IOException {
 
  361             return super.skip(n);
 
  403         private InternationalStream(AbstractFile content, List<SCRIPT> scripts, 
boolean extractUTF8, 
boolean extractUTF16) {
 
  407             this.nothingToDo = extractUTF8 == 
false && extractUTF16 == 
false;
 
  413         public int read() throws IOException {
 
  417             final int read = 
read(oneCharBuf, 0, 1);
 
  419                 return oneCharBuf[0];
 
  426         public int read(byte[] b, 
int off, 
int len) 
throws IOException {
 
  428                 throw new NullPointerException();
 
  429             } 
else if (off < 0 || len < 0 || len > b.length - off) {
 
  430                 throw new IndexOutOfBoundsException();
 
  431             } 
else if (len == 0) {
 
  437             long fileSize = content.getSize();
 
  444             int offsetUser = off;
 
  445             while (bytesToUser < len && offsetUser < len) {
 
  448                 if ((convertBuff == null || convertBuffRemain == 0) && !fileEOF && fileReadOffset < fileSize) {
 
  454                         toRead = Math.min(FILE_BUF_SIZE, fileSize - fileReadOffset);
 
  456                         int read = content.read(fileReadBuff, fileReadOffset, toRead);
 
  457                         if (read == -1 || read == 0) {
 
  460                             fileReadOffset += 
read;
 
  461                             if (fileReadOffset >= fileSize) {
 
  468                     } 
catch (TskCoreException ex) {
 
  474                 if (convertBuff == null || convertBuffRemain == 0) {
 
  476                         return bytesToUser > 0 ? bytesToUser : -1;
 
  483                 final int toCopy = Math.min(convertBuffRemain, len - offsetUser);
 
  484                 System.arraycopy(convertBuff, convertBuffOffset, b, offsetUser, toCopy);
 
  486                 convertBuffOffset += toCopy;
 
  487                 offsetUser += toCopy;
 
  488                 bytesToUser += toCopy;
 
  505                 bytesInConvertBuff = 0;
 
  507                 bytesInConvertBuff = convertBuff.length;
 
  509             convertBuffOffset = 0;
 
static final Charset DEFAULT_INDEXED_TEXT_CHARSET
default Charset to index text as 
 
synchronized static Logger getLogger(String name)