Autopsy  3.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
AbstractFileStringStream.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2012 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.nio.charset.Charset;
28 
39  class AbstractFileStringStream extends InputStream {
40 
41  //args
42  private AbstractFile content;
43  private Charset outputCharset;
44  //internal data
45  private static final Logger logger = Logger.getLogger(AbstractFileStringStream.class.getName());
46  private static final String NLS = Character.toString((char) 10); //new line
47  private static final int READ_BUF_SIZE = 256;
48  private long contentOffset = 0; //offset in fscontent read into curReadBuf
49  private final byte[] curReadBuf = new byte[READ_BUF_SIZE];
50  private int bytesInReadBuf = 0;
51  private int readBufOffset = 0; //offset in read buf processed
52  private StringBuilder curString = new StringBuilder();
53  private int curStringLen = 0;
54  private StringBuilder tempString = new StringBuilder();
55  private int tempStringLen = 0;
56  private boolean isEOF = false;
57  private boolean stringAtTempBoundary = false; //if temp has part of string that didn't make it in previous read()
58  private boolean stringAtBufBoundary = false; //if read buffer has string being processed, continue as string from prev read() in next read()
59  private boolean inString = false; //if current temp has min chars required
60  private final byte[] oneCharBuf = new byte[1];
61  private final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
62 
73  public AbstractFileStringStream(AbstractFile content, Charset outputCharset, boolean preserveOnBuffBoundary) {
74  this.content = content;
75  this.outputCharset = outputCharset;
76  //this.preserveOnBuffBoundary = preserveOnBuffBoundary;
77  //logger.log(Level.INFO, "FILE: " + content.getParentPath() + "/" + content.getName());
78  }
79 
87  public AbstractFileStringStream(AbstractFile content, Charset outCharset) {
88  this(content, outCharset, false);
89  }
90 
91  @Override
92  public int read(byte[] b, int off, int len) throws IOException {
93  if (b == null) {
94  throw new NullPointerException();
95  } else if (off < 0 || len < 0 || len > b.length - off) {
96  throw new IndexOutOfBoundsException();
97  } else if (len == 0) {
98  return 0;
99  }
100 
101  long fileSize = content.getSize();
102  if (fileSize == 0) {
103  return -1;
104  }
105 
106  if (isEOF) {
107  return -1;
108  }
109 
110 
111  if (stringAtTempBoundary) {
112  //append entire temp string residual from previous read()
113  //because qualified string was broken down into 2 parts
114  appendResetTemp();
115 
116  stringAtTempBoundary = false;
117  //there could be more to this string in fscontent/buffer
118  }
119 
120  boolean singleConsecZero = false; //preserve the current sequence of chars if 1 consecutive zero char
121  int newCurLen = curStringLen + tempStringLen;
122 
123 
124  while (newCurLen < len) {
125  //need to extract more strings
126  if (readBufOffset > bytesInReadBuf - 1) {
127  //no more bytes to process into strings, read them
128  try {
129  bytesInReadBuf = 0;
130  bytesInReadBuf = content.read(curReadBuf, contentOffset, READ_BUF_SIZE);
131  } catch (TskException ex) {
132  if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
133  appendResetTemp();
134  //have some extracted string, return that, and fail next time
135  isEOF = true;
136  int copied = copyToReturn(b, off, len);
137  return copied;
138  } else {
139  return -1; //EOF
140  }
141  }
142  if (bytesInReadBuf < 1) {
143  if (curStringLen > 0 || tempStringLen >= MIN_PRINTABLE_CHARS) {
144  appendResetTemp();
145  //have some extracted string, return that, and fail next time
146  isEOF = true;
147  int copied = copyToReturn(b, off, len);
148  return copied;
149  } else {
150  return -1; //EOF
151  }
152  }
153  //increment content offset for next read
154  contentOffset += bytesInReadBuf;
155  //reset read buf position
156  readBufOffset = 0;
157  }
158  //get char from cur read buf
159  char c = (char) curReadBuf[readBufOffset++];
160  if (c == 0 && singleConsecZero == false) {
161  //preserve the current sequence if max consec. 1 zero char
162  singleConsecZero = true;
163  } else {
164  singleConsecZero = false;
165  }
166  if (StringExtract.isPrintableAscii(c)) {
167  tempString.append(c);
168  ++tempStringLen;
169  if (tempStringLen >= MIN_PRINTABLE_CHARS) {
170  inString = true;
171  }
172 
173  //boundary case when temp has still chars - handled after the loop
174  } else if (!singleConsecZero) {
175  //break the string, clear temp
176  if (tempStringLen >= MIN_PRINTABLE_CHARS
177  || stringAtBufBoundary) {
178  //append entire temp string with new line
179  tempString.append(NLS);
180  ++tempStringLen;
181 
182  curString.append(tempString);
183  curStringLen += tempStringLen;
184 
185  stringAtBufBoundary = false;
186  }
187  //reset temp
188  tempString = new StringBuilder();
189  tempStringLen = 0;
190  }
191 
192  newCurLen = curStringLen + tempStringLen;
193  }
194 
195  //check if still in string state, so that next chars in read buf bypass min chars check
196  //and qualify as string even if less < min chars required
197  if (inString) {
198  inString = false; //reset
199  stringAtBufBoundary = true; //will bypass the check
200  }
201 
202  //check if temp still has chars to qualify as a string
203  //we might need to break up temp into 2 parts for next read() call
204  //consume as many as possible to fill entire user buffer
205  if (tempStringLen >= MIN_PRINTABLE_CHARS) {
206  if (newCurLen > len) {
207  int appendChars = len - curStringLen;
208  //save part for next user read(), need to break up temp string
209  //do not append new line
210  String toAppend = tempString.substring(0, appendChars);
211  String newTemp = tempString.substring(appendChars);
212 
213  curString.append(toAppend);
214  curStringLen += appendChars;
215 
216  tempString = new StringBuilder(newTemp);
217  tempStringLen = newTemp.length();
218 
219  stringAtTempBoundary = true;
220 
221  } else {
222  //append entire temp
223  curString.append(tempString);
224  curStringLen += tempStringLen;
225 
226  //reset temp
227  tempString = new StringBuilder();
228  tempStringLen = 0;
229 
230  }
231  } else {
232  //if temp has a few chars, not qualified as string for now,
233  //will be processed during next read() call
234  }
235 
236  //copy current strings to user
237  final int copied = copyToReturn(b, off, len);
238  //there may be still chars in read buffer or tempString, for next read()
239 
240  return copied;
241  }
242 
243  //append temp buffer to cur string buffer and reset temp, if enough chars
244  //does not append new line
245  private void appendResetTemp() {
246  if (tempStringLen >= MIN_PRINTABLE_CHARS) {
247  curString.append(tempString);
248  curStringLen += tempStringLen;
249  tempString = new StringBuilder();
250  tempStringLen = 0;
251  }
252  }
253 
254  //copy currently extracted string to user buffer
255  //and reset for next read() call
256  private int copyToReturn(byte[] b, int off, long len) {
257 
258  final String curStringS = curString.toString();
259  //logger.log(Level.INFO, curStringS);
260  byte[] stringBytes = curStringS.getBytes(outputCharset);
261  System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (int) len));
262  //logger.log(Level.INFO, curStringS);
263  //copied all string, reset
264  curString = new StringBuilder();
265  int ret = curStringLen;
266  curStringLen = 0;
267  return ret;
268 
269  }
270 
271  @Override
272  public int read() throws IOException {
273  final int read = read(oneCharBuf, 0, 1);
274  if (read == 1) {
275  return oneCharBuf[0];
276  } else {
277  return -1;
278  }
279 
280  }
281 
282  @Override
283  public int available() throws IOException {
284  //we don't know how many bytes in curReadBuf may end up as strings
285  return 0;
286  }
287 
288  @Override
289  public long skip(long n) throws IOException {
290  //use default implementation that reads into skip buffer
291  //but it could be more efficient
292  return super.skip(n);
293  }
294 }

Copyright © 2012-2015 Basis Technology. Generated on: Mon Oct 19 2015
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.