19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.ByteArrayInputStream;
 
   22 import java.io.IOException;
 
   23 import java.io.InputStream;
 
   24 import java.io.Reader;
 
   25 import java.io.UnsupportedEncodingException;
 
   26 import java.util.HashMap;
 
   28 import java.util.logging.Level;
 
   29 import org.apache.solr.client.solrj.SolrServerException;
 
   30 import org.apache.solr.common.util.ContentStream;
 
   31 import org.apache.solr.common.SolrInputDocument;
 
   32 import org.openide.util.NbBundle;
 
   54     private static final Logger logger = Logger.getLogger(Ingester.class.getName());
 
   55     private volatile boolean uncommitedIngests = 
false;
 
   56     private final Server solrServer = KeywordSearch.getServer();
 
   57     private final GetContentFieldsV getContentFieldsV = 
new GetContentFieldsV();
 
   58     private static Ingester instance;
 
   62     private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
 
   63     private static final String ENCODING = 
"UTF-8"; 
 
   68     public static synchronized Ingester getDefault() {
 
   69         if (instance == null) {
 
   70             instance = 
new Ingester();
 
   76     @SuppressWarnings(
"FinalizeDeclaration")
 
   77     protected 
void finalize() throws Throwable {
 
   81         if (uncommitedIngests) {
 
   82             logger.warning(
"Ingester was used to add files that it never committed."); 
 
   95     void ingest(AbstractFileStringContentStream afscs) 
throws IngesterException {
 
   96         Map<String, String> params = getContentFields(afscs.getSourceContent());
 
   97         ingest(afscs, params, afscs.getSourceContent().getSize());
 
  112     void ingest(TextExtractor fe) 
throws IngesterException {
 
  113         Map<String, String> params = getContentFields(fe.getSourceFile());
 
  115         params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
 
  117         ingest(
new NullContentStream(fe.getSourceFile()), params, 0);
 
  132     void ingest(AbstractFileChunk fec, ByteContentStream bcs, 
int size) 
throws IngesterException {
 
  133         AbstractContent sourceContent = bcs.getSourceContent();
 
  134         Map<String, String> params = getContentFields(sourceContent);
 
  137         params.put(Server.Schema.ID.toString(),
 
  138                 Server.getChunkIdString(sourceContent.getId(), fec.getChunkNumber()));
 
  140         ingest(bcs, params, size);
 
  156     void ingest(AbstractFile file, 
boolean ingestContent) 
throws IngesterException {
 
  157         if (ingestContent == 
false || file.isDir()) {
 
  158             ingest(
new NullContentStream(file), getContentFields(file), 0);
 
  160             ingest(
new FscContentStream(file), getContentFields(file), file.getSize());
 
  171     private Map<String, String> getContentFields(AbstractContent fsc) {
 
  172         return fsc.accept(getContentFieldsV);
 
  182             return new HashMap<>();
 
  186         public Map<String, String> 
visit(File f) {
 
  193         public Map<String, String> 
visit(DerivedFile df) {
 
  200         public Map<String, String> 
visit(Directory d) {
 
  207         public Map<String, String> 
visit(LayoutFile lf) {
 
  213         public Map<String, String> 
visit(LocalFile lf) {
 
  220         public Map<String, String> 
visit(SlackFile f) {
 
  235             Map<String, String> params = 
new HashMap<>();
 
  236             params.put(
Server.
Schema.ID.toString(), Long.toString(af.getId()));
 
  238                 long dataSourceId = af.getDataSource().getId();
 
  239                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
 
  240             } 
catch (TskCoreException ex) {
 
  241                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the file {0}", af.getId()); 
 
  242                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  245             params.put(
Server.
Schema.FILE_NAME.toString(), af.getName());
 
  266     void ingest(ContentStream cs, Map<String, String> fields, 
final long size) 
throws IngesterException {
 
  267         if (fields.get(
Server.
Schema.IMAGE_ID.toString()) == null) {
 
  269             String msg = NbBundle.getMessage(this.getClass(),
 
  270                     "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
 
  271             logger.log(Level.SEVERE, msg);
 
  272             throw new IngesterException(msg);
 
  275         final byte[] docChunkContentBuf = 
new byte[MAX_DOC_CHUNK_SIZE];
 
  276         SolrInputDocument updateDoc = 
new SolrInputDocument();
 
  278         for (String key : fields.keySet()) {
 
  279             updateDoc.addField(key, fields.get(key));
 
  286             InputStream is = null;
 
  290                 read = is.read(docChunkContentBuf);
 
  291             } 
catch (IOException ex) {
 
  292                 throw new IngesterException(
 
  293                         NbBundle.getMessage(
this.getClass(), 
"Ingester.ingest.exception.cantReadStream.msg",
 
  299                     } 
catch (IOException ex) {
 
  300                         logger.log(Level.WARNING, 
"Could not close input stream after reading content, " + cs.getName(), ex); 
 
  308                     s = 
new String(docChunkContentBuf, 0, read, ENCODING);
 
  311                     for (
int i = 0; i < s.length(); i++) {
 
  312                         if (!TextUtil.isValidSolrUTF8(s.charAt(i))) {
 
  315                                 chars = s.toCharArray();
 
  322                         s = 
new String(chars);
 
  324                 } 
catch (UnsupportedEncodingException ex) {
 
  325                     logger.log(Level.SEVERE, 
"Unsupported encoding", ex); 
 
  327                 updateDoc.addField(Server.Schema.CONTENT.toString(), s);
 
  329                 updateDoc.addField(Server.Schema.CONTENT.toString(), 
"");
 
  333             updateDoc.addField(Server.Schema.CONTENT.toString(), 
"");
 
  338             solrServer.addDocument(updateDoc);
 
  339             uncommitedIngests = 
true;
 
  340         } 
catch (KeywordSearchModuleException ex) {
 
  341             throw new IngesterException(
 
  342                     NbBundle.getMessage(
this.getClass(), 
"Ingester.ingest.exception.err.msg", cs.getName()), ex);
 
  354     static int getTimeout(
long size) {
 
  355         if (size < 1024 * 1024L) 
 
  358         } 
else if (size < 10 * 1024 * 1024L) 
 
  361         } 
else if (size < 100 * 1024 * 1024L) 
 
  377             uncommitedIngests = 
false;
 
  378         } 
catch (NoOpenCoreException | SolrServerException ex) {
 
  379             logger.log(Level.WARNING, 
"Error commiting index", ex); 
 
  388         private AbstractFile 
f;
 
  401             return NbBundle.getMessage(this.getClass(), 
"Ingester.FscContentStream.getSrcInfo", f.getId());
 
  416             return new ReadContentInputStream(f);
 
  421             throw new UnsupportedOperationException(
 
  422                     NbBundle.getMessage(
this.getClass(), 
"Ingester.FscContentStream.getReader"));
 
  431         AbstractContent aContent;
 
  434             this.aContent = aContent;
 
  439             return aContent.getName();
 
  444             return NbBundle.getMessage(this.getClass(), 
"Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
 
  459             return new ByteArrayInputStream(
new byte[0]);
 
  464             throw new UnsupportedOperationException(
 
  465                     NbBundle.getMessage(
this.getClass(), 
"Ingester.NullContentStream.getReader"));
 
  473     static class IngesterException 
extends Exception {
 
  475         private static final long serialVersionUID = 1L;
 
  477         IngesterException(String message, Throwable ex) {
 
  481         IngesterException(String message) {
 
Map< String, String > visit(Directory d)
Map< String, String > visit(SlackFile f)
Map< String, String > defaultVisit(Content cntnt)
Map< String, String > visit(DerivedFile df)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > visit(File f)
Map< String, String > getCommonFields(AbstractFile af)
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Map< String, String > visit(LocalFile lf)
Map< String, String > visit(LayoutFile lf)