api-docs/4.0/_search_engine_u_r_l_query_analyzer_8java_source.html

 /*

  * Autopsy Forensic Browser

  *

  * Copyright 2012-2014 Basis Technology Corp.

  * Contact: carrier <at> sleuthkit <dot> org

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 package org.sleuthkit.autopsy.recentactivity;


 import java.io.File;

 import java.io.IOException;

 import java.io.UnsupportedEncodingException;

 import java.net.URLDecoder;

 import java.util.ArrayList;

 import java.util.Collection;

 import java.util.List;

 import java.util.logging.Level;

 import javax.xml.parsers.DocumentBuilder;

 import javax.xml.parsers.DocumentBuilderFactory;

 import javax.xml.parsers.ParserConfigurationException;

 import org.openide.util.NbBundle;

 import org.sleuthkit.autopsy.coreutils.Logger;

 import org.sleuthkit.autopsy.coreutils.PlatformUtil;

 import org.sleuthkit.autopsy.coreutils.XMLUtil;

 import org.sleuthkit.autopsy.ingest.IngestJobContext;

 import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;

 import org.sleuthkit.autopsy.ingest.IngestServices;

 import org.sleuthkit.autopsy.ingest.ModuleDataEvent;

 import org.sleuthkit.datamodel.AbstractFile;

 import org.sleuthkit.datamodel.BlackboardArtifact;

 import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;

 import org.sleuthkit.datamodel.BlackboardAttribute;

 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;

 import org.sleuthkit.datamodel.Content;

 import org.sleuthkit.datamodel.TskCoreException;

 import org.w3c.dom.Document;

 import org.w3c.dom.NamedNodeMap;

 import org.w3c.dom.NodeList;

 import org.xml.sax.SAXException;


 class SearchEngineURLQueryAnalyzer extends Extract {


     private static final Logger logger = Logger.getLogger(SearchEngineURLQueryAnalyzer.class.getName());

     private static final String XMLFILE = "SEUQAMappings.xml"; //NON-NLS

     private static final String XSDFILE = "SearchEngineSchema.xsd"; //NON-NLS

     private static SearchEngineURLQueryAnalyzer.SearchEngine[] engines;


     private Content dataSource;

     private IngestJobContext context;


     SearchEngineURLQueryAnalyzer() {

         moduleName = NbBundle.getMessage(ExtractIE.class, "SearchEngineURLQueryAnalyzer.moduleName.text");

     }


     private static class KeyPair {


         private final String key;

         private final String keyRegExp;


         KeyPair(String key, String keyRegExp) {

             this.key = key;

             this.keyRegExp = keyRegExp;

         }


         String getKey() {

             return key;

         }


         String getKeyRegExp() {

             return keyRegExp;

         }


     }


     private static class SearchEngine {


         private final String engineName;

         private final String domainSubstring;

         private final List<KeyPair> keyPairs;

         private int count;


         SearchEngine(String engineName, String domainSubstring, List<KeyPair> keyPairs) {

             this.engineName = engineName;

             this.domainSubstring = domainSubstring;

             this.keyPairs = keyPairs;

             count = 0;

         }


         void increment() {

             ++count;

         }


         String getEngineName() {

             return engineName;

         }


         String getDomainSubstring() {

             return domainSubstring;

         }


         int getTotal() {

             return count;

         }


         List<KeyPair> getKeys() {

             return this.keyPairs;

         }


         @Override

         public String toString() {

             String split = " ";

             for (KeyPair kp : keyPairs) {

                 split = split + "[ " + kp.getKey() + " :: " + kp.getKeyRegExp() + " ]" + ", ";

             }

             return NbBundle.getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.toString",

                     engineName, domainSubstring, count, split);

         }

     }


     private void loadConfigFile() throws IngestModuleException {

         Document xmlinput;

         try {

             String path = PlatformUtil.getUserConfigDirectory() + File.separator + XMLFILE;

             File f = new File(path);

             logger.log(Level.INFO, "Load successful"); //NON-NLS

             DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

             DocumentBuilder db = dbf.newDocumentBuilder();

             xmlinput = db.parse(f);


             if (!XMLUtil.xmlIsValid(xmlinput, SearchEngineURLQueryAnalyzer.class, XSDFILE)) {

                 logger.log(Level.WARNING, "Error loading Search Engines: could not validate against [" + XSDFILE + "], results may not be accurate."); //NON-NLS

             }


         } catch (IOException e) {

             throw new IngestModuleException("Was not able to load SEUQAMappings.xml: " + e.getLocalizedMessage(), e); //NON-NLS

         } catch (ParserConfigurationException pce) {

             throw new IngestModuleException("Unable to build XML parser: " + pce.getLocalizedMessage(), pce); //NON-NLS

         } catch (SAXException sxe) {

             throw new IngestModuleException("Unable to parse XML file: " + sxe.getLocalizedMessage(), sxe); //NON-NLS

         }


         NodeList nlist = xmlinput.getElementsByTagName("SearchEngine"); //NON-NLS

         SearchEngineURLQueryAnalyzer.SearchEngine[] listEngines = new SearchEngineURLQueryAnalyzer.SearchEngine[nlist.getLength()];

         for (int i = 0; i < nlist.getLength(); i++) {

             NamedNodeMap nnm = nlist.item(i).getAttributes();


             String EngineName = nnm.getNamedItem("engine").getNodeValue(); //NON-NLS

             String EnginedomainSubstring = nnm.getNamedItem("domainSubstring").getNodeValue(); //NON-NLS

             List<KeyPair> keys = new ArrayList<>();


             NodeList listSplits = xmlinput.getElementsByTagName("splitToken"); //NON-NLS

             for (int k = 0; k < listSplits.getLength(); k++) {

                 if (listSplits.item(k).getParentNode().getAttributes().getNamedItem("engine").getNodeValue().equals(EngineName)) { //NON-NLS

                     keys.add(new KeyPair(listSplits.item(k).getAttributes().getNamedItem("plainToken").getNodeValue(), listSplits.item(k).getAttributes().getNamedItem("regexToken").getNodeValue())); //NON-NLS

                 }

             }


             SearchEngineURLQueryAnalyzer.SearchEngine Se = new SearchEngineURLQueryAnalyzer.SearchEngine(EngineName, EnginedomainSubstring, keys);

             listEngines[i] = Se;

         }

         engines = listEngines;

     }


     private static SearchEngineURLQueryAnalyzer.SearchEngine getSearchEngineFromUrl(String domain) {

         if (engines == null) {

             return null;

         }

         for (SearchEngine engine : engines) {

             if (domain.contains(engine.getDomainSubstring())) {

                 return engine;

             }

         }

         return null;

     }


     private String extractSearchEngineQuery(SearchEngineURLQueryAnalyzer.SearchEngine eng, String url) {

         String x = ""; //NON-NLS


         for (KeyPair kp : eng.getKeys()) {

             if (url.contains(kp.getKey())) {

                 x = getValue(url, kp.getKeyRegExp());

                 break;

             }

         }

         try { //try to decode the url

             String decoded = URLDecoder.decode(x, "UTF-8"); //NON-NLS

             return decoded;

         } catch (UnsupportedEncodingException uee) { //if it fails, return the encoded string

             logger.log(Level.FINE, "Error during URL decoding ", uee); //NON-NLS

             return x;

         }

     }


     private String getValue(String url, String regExpKey) {

         /*

          * NOTE: This doesn't seem like the most wonderful way to do this, but

          * we have data that has a bunch of bogus URLs. Such as: - Multiple

          * google "q=" terms, including one after a "#" tag. Google used the

          * last one - Search/query part of the URL starting with a '#'. Attemps

          * at more formal approaches of splitting on the "?" and then on "&"

          * resulting in missing things.

          */

         String value = ""; //NON-NLS

         String v = regExpKey;

         //Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex

         if (regExpKey.contains("\\?")) {

             v = regExpKey.replace("\\?", "?");

         }

         String[] sp = url.split(v);

         if (sp.length >= 2) {

             if (sp[sp.length - 1].contains("&")) {

                 value = sp[sp.length - 1].split("&")[0];

             } else {

                 value = sp[sp.length - 1];

             }

         }

         return value;

     }


     private void findSearchQueries() {

         int totalQueries = 0;

         try {

             //from blackboard_artifacts

             Collection<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts("WHERE (artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID() //NON-NLS

                     + "' OR artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + "') ");  //List of every 'web_history' and 'bookmark' artifact NON-NLS

             logger.log(Level.INFO, "Processing {0} blackboard artifacts.", listArtifacts.size()); //NON-NLS


             for (BlackboardArtifact artifact : listArtifacts) {

                 if (context.dataSourceIngestIsCancelled()) {

                     break;       //User cancelled the process.

                 }


                 //initializing default attributes

                 String query = "";

                 String searchEngineDomain = "";

                 String browser = "";

                 long last_accessed = -1;


                 long fileId = artifact.getObjectID();

                 boolean isFromSource = tskCase.isFileFromSource(dataSource, fileId);

                 if (!isFromSource) {

                     //File was from a different dataSource. Skipping.

                     continue;

                 }


                 AbstractFile file = tskCase.getAbstractFileById(fileId);

                 if (file == null) {

                     continue;

                 }


                 SearchEngineURLQueryAnalyzer.SearchEngine se = null;

                 //from blackboard_attributes

                 Collection<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes("WHERE artifact_id = " + artifact.getArtifactID()); //NON-NLS


                 for (BlackboardAttribute attribute : listAttributes) {

                     if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) {

                         final String urlString = attribute.getValueString();

                         se = getSearchEngineFromUrl(urlString);

                         if (se == null) {

                             break;

                         }


                         query = extractSearchEngineQuery(se, attribute.getValueString());

                         if (query.equals("")) //False positive match, artifact was not a query. NON-NLS

                         {

                             break;

                         }


                     } else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) {

                         browser = attribute.getValueString();

                     } else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) {

                         searchEngineDomain = attribute.getValueString();

                     } else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID()) {

                         last_accessed = attribute.getValueLong();

                     }

                 }


                 if (se != null && !query.equals("")) { //NON-NLS

                     // If date doesn't exist, change to 0 (instead of 1969)

                     if (last_accessed == -1) {

                         last_accessed = 0;

                     }

                     Collection<BlackboardAttribute> bbattributes = new ArrayList<>();

                     bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN,

                             NbBundle.getMessage(this.getClass(),

                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), searchEngineDomain));

                     bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_TEXT,

                             NbBundle.getMessage(this.getClass(),

                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), query));

                     bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME,

                             NbBundle.getMessage(this.getClass(),

                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), browser));

                     bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED,

                             NbBundle.getMessage(this.getClass(),

                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), last_accessed));

                     this.addArtifact(ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY, file, bbattributes);

                     se.increment();

                     ++totalQueries;

                 }

             }

         } catch (TskCoreException e) {

             logger.log(Level.SEVERE, "Encountered error retrieving artifacts for search engine queries", e); //NON-NLS

         } finally {

             if (context.dataSourceIngestIsCancelled()) {

                 logger.info("Operation terminated by user."); //NON-NLS

             }

             IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(

                     NbBundle.getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.parentModuleName.noSpace"),

                     BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY));

             logger.log(Level.INFO, "Extracted {0} queries from the blackboard", totalQueries); //NON-NLS

         }

     }


     private String getTotals() {

         String total = "";

         if (engines == null) {

             return total;

         }

         for (SearchEngineURLQueryAnalyzer.SearchEngine se : engines) {

             total += se.getEngineName() + " : " + se.getTotal() + "\n";

         }

         return total;

     }


     @Override

     public void process(Content dataSource, IngestJobContext context) {

         this.dataSource = dataSource;

         this.context = context;

         this.findSearchQueries();

         logger.log(Level.INFO, "Search Engine stats: \n{0}", getTotals()); //NON-NLS

     }


     @Override

     void init() throws IngestModuleException {

         try {

             PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryAnalyzer.class, XMLFILE, true);

         } catch (IOException e) {

             String message = NbBundle

                     .getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.init.exception.msg", XMLFILE);

             logger.log(Level.SEVERE, message, e);

             throw new IngestModuleException(message, e);

         }


         loadConfigFile();

     }


     @Override

     public void complete() {

         logger.info("Search Engine URL Query Analyzer has completed."); //NON-NLS

     }

 }

org.sleuthkit

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.KeyPair.keyRegExp
final String keyRegExp
Definition: SearchEngineURLQueryAnalyzer.java:82

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.SearchEngine.engineName
final String engineName
Definition: SearchEngineURLQueryAnalyzer.java:101

org.sleuthkit.autopsy.coreutils.PlatformUtil.getUserConfigDirectory
static String getUserConfigDirectory()
Definition: PlatformUtil.java:184

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.KeyPair
Definition: SearchEngineURLQueryAnalyzer.java:79

org

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.SearchEngine.domainSubstring
final String domainSubstring
Definition: SearchEngineURLQueryAnalyzer.java:102

org.sleuthkit.autopsy.ingest.ModuleDataEvent
Definition: ModuleDataEvent.java:49

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.SearchEngine.count
int count
Definition: SearchEngineURLQueryAnalyzer.java:104

org.sleuthkit.autopsy.coreutils
Definition: AutopsyExceptionHandler.java:19

org.sleuthkit.autopsy.coreutils.Logger
Definition: Logger.java:36

org.sleuthkit.autopsy.coreutils.PlatformUtil
Definition: PlatformUtil.java:51

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.SearchEngine.keyPairs
final List< KeyPair > keyPairs
Definition: SearchEngineURLQueryAnalyzer.java:103

org.sleuthkit.autopsy.ingest
Definition: DataSourceIngestCancellationPanel.java:19

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.SearchEngine.toString
String toString()
Definition: SearchEngineURLQueryAnalyzer.java:139

org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException
Definition: IngestModule.java:61

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.SearchEngine
Definition: SearchEngineURLQueryAnalyzer.java:99

org.sleuthkit.autopsy.coreutils.XMLUtil
Definition: XMLUtil.java:56

org.sleuthkit.autopsy.ingest.IngestJobContext
Definition: IngestJobContext.java:29

org.sleuthkit.autopsy.ingest.IngestServices
Definition: IngestServices.java:32

org.sleuthkit.autopsy.coreutils.XMLUtil.xmlIsValid
static< T > boolean xmlIsValid(DOMSource xmlfile, Class< T > clazz, String schemaFile)
Definition: XMLUtil.java:179

org.sleuthkit.autopsy

org.sleuthkit.autopsy.ingest.IngestModule
Definition: IngestModule.java:47

org.sleuthkit.autopsy.recentactivity.SearchEngineURLQueryAnalyzer.KeyPair.key
final String key
Definition: SearchEngineURLQueryAnalyzer.java:81