83 public static Charset
getEncoding(AbstractFile file)
throws TskCoreException, IOException {
87 try (InputStream stream =
new BufferedInputStream(
new ReadContentInputStream(file))) {
88 CharsetDetector detector =
new CharsetDetector();
89 detector.setText(stream);
91 CharsetMatch[] tikaResults = detector.detectAll();
94 if (tikaResults.length > 0) {
95 CharsetMatch topPick = tikaResults[0];
97 if (topPick.getName().equalsIgnoreCase(
"IBM500") && tikaResults.length > 1) {
103 topPick = tikaResults[1];
106 if (!topPick.getName().equalsIgnoreCase(
"IBM500") &&
108 Charset.isSupported(topPick.getName())) {
111 return Charset.forName(topPick.getName());
119 int maxBytes = 100000;
120 int numBytes = maxBytes;
121 if (file.getSize() < maxBytes) {
122 numBytes = (int) file.getSize();
125 byte[] targetArray =
new byte[numBytes];
126 file.read(targetArray, 0, numBytes);
127 List<DecodetectResult> results = Decodetect.DECODETECT.getResults(targetArray);
128 if (!results.isEmpty()) {
129 DecodetectResult topResult = results.get(0);
131 return topResult.getEncoding();