Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
AbstractFileStringStream.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2012 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.nio.charset.Charset;
26 import org.sleuthkit.datamodel.AbstractFile;
27 import org.sleuthkit.datamodel.TskException;
28 
41 class AbstractFileStringStream extends InputStream {
42 
43  //args
44  private AbstractFile content;
45  private Charset outputCharset;
46  //internal data
47  private static final Logger logger = Logger.getLogger(AbstractFileStringStream.class.getName());
48  private static final String NLS = Character.toString((char) 10); //new line
49  private static final int READ_BUF_SIZE = 256;
50  private long contentOffset = 0; //offset in fscontent read into curReadBuf
51  private final byte[] curReadBuf = new byte[READ_BUF_SIZE];
52  private int bytesInReadBuf = 0;
53  private int readBufOffset = 0; //offset in read buf processed
54  private StringBuilder curString = new StringBuilder();
55  private int curStringLen = 0;
56  private StringBuilder tempString = new StringBuilder();
57  private int tempStringLen = 0;
58  private boolean isEOF = false;
59  private boolean stringAtTempBoundary = false; //if temp has part of string that didn't make it in previous read()
60  private boolean stringAtBufBoundary = false; //if read buffer has string being processed, continue as string from prev read() in next read()
61  private boolean inString = false; //if current temp has min chars required
62  private final byte[] oneCharBuf = new byte[1];
63  private final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
64 
76  public AbstractFileStringStream(AbstractFile content, Charset outputCharset, boolean preserveOnBuffBoundary) {
77  this.content = content;
78  this.outputCharset = outputCharset;
79  //this.preserveOnBuffBoundary = preserveOnBuffBoundary;
80  //logger.log(Level.INFO, "FILE: " + content.getParentPath() + "/" + content.getName());
81  }
82 
91  public AbstractFileStringStream(AbstractFile content, Charset outCharset) {
92  this(content, outCharset, false);
93  }
94 
95  @Override
96  public int read(byte[] b, int off, int len) throws IOException {
97  if (b == null) {
98  throw new NullPointerException();
99  } else if (off < 0 || len < 0 || len > b.length - off) {
100  throw new IndexOutOfBoundsException();
101  } else if (len == 0) {
102  return 0;
103  }
104 
105  long fileSize = content.getSize();
106  if (fileSize == 0) {
107  return -1;
108  }
109 
110  if (isEOF) {
111  return -1;
112  }
113 
114  if (stringAtTempBoundary) {
115  //append entire temp string residual from previous read()
116  //because qualified string was broken down into 2 parts
117  appendResetTemp();
118 
119  stringAtTempBoundary = false;
120  //there could be more to this string in fscontent/buffer
121  }
122 
123  boolean singleConsecZero = false; //preserve the current sequence of chars if 1 consecutive zero char
124  int newCurLen = curStringLen + tempStringLen;
125 
126  while (newCurLen < len) {
127  //need to extract more strings
128  if (readBufOffset > bytesInReadBuf - 1) {
129  //no more bytes to process into strings, read them
130  try {
131  bytesInReadBuf = 0;
132  bytesInReadBuf = content.read(curReadBuf, contentOffset, READ_BUF_SIZE);
133  } catch (TskException ex) {
134  if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
135  appendResetTemp();
136  //have some extracted string, return that, and fail next time
137  isEOF = true;
138  int copied = copyToReturn(b, off, len);
139  return copied;
140  } else {
141  return -1; //EOF
142  }
143  }
144  if (bytesInReadBuf < 1) {
145  if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
146  appendResetTemp();
147  //have some extracted string, return that, and fail next time
148  isEOF = true;
149  int copied = copyToReturn(b, off, len);
150  return copied;
151  } else {
152  return -1; //EOF
153  }
154  }
155  //increment content offset for next read
156  contentOffset += bytesInReadBuf;
157  //reset read buf position
158  readBufOffset = 0;
159  }
160  //get char from cur read buf
161  char c = (char) curReadBuf[readBufOffset++];
162  if (c == 0 && singleConsecZero == false) {
163  //preserve the current sequence if max consec. 1 zero char
164  singleConsecZero = true;
165  } else {
166  singleConsecZero = false;
167  }
168  if (StringExtract.isPrintableAscii(c)) {
169  tempString.append(c);
170  ++tempStringLen;
171  if (tempStringLen >= MIN_PRINTABLE_CHARS) {
172  inString = true;
173  }
174 
175  //boundary case when temp has still chars - handled after the loop
176  } else if (!singleConsecZero) {
177  //break the string, clear temp
178  if (tempStringLen >= MIN_PRINTABLE_CHARS
179  || stringAtBufBoundary) {
180  //append entire temp string with new line
181  tempString.append(NLS);
182  ++tempStringLen;
183 
184  curString.append(tempString);
185  curStringLen += tempStringLen;
186 
187  stringAtBufBoundary = false;
188  }
189  //reset temp
190  tempString = new StringBuilder();
191  tempStringLen = 0;
192  }
193 
194  newCurLen = curStringLen + tempStringLen;
195  }
196 
197  //check if still in string state, so that next chars in read buf bypass min chars check
198  //and qualify as string even if less < min chars required
199  if (inString) {
200  inString = false; //reset
201  stringAtBufBoundary = true; //will bypass the check
202  }
203 
204  //check if temp still has chars to qualify as a string
205  //we might need to break up temp into 2 parts for next read() call
206  //consume as many as possible to fill entire user buffer
207  if (tempStringLen >= MIN_PRINTABLE_CHARS) {
208  if (newCurLen > len) {
209  int appendChars = len - curStringLen;
210  //save part for next user read(), need to break up temp string
211  //do not append new line
212  String toAppend = tempString.substring(0, appendChars);
213  String newTemp = tempString.substring(appendChars);
214 
215  curString.append(toAppend);
216  curStringLen += appendChars;
217 
218  tempString = new StringBuilder(newTemp);
219  tempStringLen = newTemp.length();
220 
221  stringAtTempBoundary = true;
222 
223  } else {
224  //append entire temp
225  curString.append(tempString);
226  curStringLen += tempStringLen;
227 
228  //reset temp
229  tempString = new StringBuilder();
230  tempStringLen = 0;
231 
232  }
233  } else {
234  //if temp has a few chars, not qualified as string for now,
235  //will be processed during next read() call
236  }
237 
238  //copy current strings to user
239  final int copied = copyToReturn(b, off, len);
240  //there may be still chars in read buffer or tempString, for next read()
241 
242  return copied;
243  }
244 
245  //append temp buffer to cur string buffer and reset temp, if enough chars
246  //does not append new line
247  private void appendResetTemp() {
248  if (tempStringLen >= MIN_PRINTABLE_CHARS) {
249  curString.append(tempString);
250  curStringLen += tempStringLen;
251  tempString = new StringBuilder();
252  tempStringLen = 0;
253  }
254  }
255 
256  //copy currently extracted string to user buffer
257  //and reset for next read() call
258  private int copyToReturn(byte[] b, int off, long len) {
259 
260  final String curStringS = curString.toString();
261  //logger.log(Level.INFO, curStringS);
262  byte[] stringBytes = curStringS.getBytes(outputCharset);
263  System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (int) len));
264  //logger.log(Level.INFO, curStringS);
265  //copied all string, reset
266  curString = new StringBuilder();
267  int ret = curStringLen;
268  curStringLen = 0;
269  return ret;
270 
271  }
272 
273  @Override
274  public int read() throws IOException {
275  final int read = read(oneCharBuf, 0, 1);
276  if (read == 1) {
277  return oneCharBuf[0];
278  } else {
279  return -1;
280  }
281 
282  }
283 
284  @Override
285  public int available() throws IOException {
286  //we don't know how many bytes in curReadBuf may end up as strings
287  return 0;
288  }
289 
290  @Override
291  public long skip(long n) throws IOException {
292  //use default implementation that reads into skip buffer
293  //but it could be more efficient
294  return super.skip(n);
295  }
296 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.