19 package org.sleuthkit.autopsy.keywordsearch;
 
   20 import java.io.IOException;
 
   21 import java.io.InputStream;
 
   22 import java.io.BufferedInputStream;
 
   23 import java.io.Reader;
 
   24 import org.apache.tika.parser.txt.CharsetDetector;
 
   25 import org.apache.tika.parser.txt.CharsetMatch;
 
   32 final class TextFileExtractor {
 
   39     static final private int MIN_MATCH_CONFIDENCE = 20;
 
   41     public Reader getReader(AbstractFile source) 
throws TextFileExtractorException {
 
   42         CharsetDetector detector = 
new CharsetDetector();
 
   44         InputStream stream = 
new BufferedInputStream(
new ReadContentInputStream(source));
 
   46             detector.setText(stream);
 
   47         } 
catch (IOException ex) {
 
   48             throw new TextFileExtractorException(
"Unable to get string from detected text in TextFileExtractor", ex);
 
   50         CharsetMatch match = detector.detect();
 
   52             throw new TextFileExtractorException(
"Unable to detect any matches using TextFileExtractor");
 
   53         } 
else if (match.getConfidence() < MIN_MATCH_CONFIDENCE) {
 
   54             throw new TextFileExtractorException(
"Text does not match any character set with a high enough confidence for TextFileExtractor");
 
   57         return match.getReader();