19 package org.sleuthkit.autopsy.recentactivity;
 
   22 import java.io.IOException;
 
   23 import java.io.UnsupportedEncodingException;
 
   24 import java.net.URLDecoder;
 
   25 import java.util.ArrayList;
 
   26 import java.util.Collection;
 
   27 import java.util.List;
 
   28 import java.util.logging.Level;
 
   29 import javax.xml.parsers.DocumentBuilder;
 
   30 import javax.xml.parsers.DocumentBuilderFactory;
 
   31 import javax.xml.parsers.ParserConfigurationException;
 
   32 import org.openide.util.NbBundle;
 
   43 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
 
   45 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
 
   48 import org.w3c.dom.Document;
 
   49 import org.w3c.dom.NamedNodeMap;
 
   50 import org.w3c.dom.NodeList;
 
   51 import org.xml.sax.SAXException;
 
   63     "cannotBuildXmlParser=Unable to build XML parser: ",
 
   64     "cannotLoadSEUQA=Unable to load Search Engine URL Query Analyzer settings file, SEUQAMappings.xml: ",
 
   65     "cannotParseXml=Unable to parse XML file: ",
 
   66     "# {0} - file name", 
"SearchEngineURLQueryAnalyzer.init.exception.msg=Unable to find {0}.",
 
   67     "Progress_Message_Find_Search_Query=Find Search Queries" 
   69 class SearchEngineURLQueryAnalyzer extends Extract {
 
   71     private static final Logger logger = Logger.getLogger(SearchEngineURLQueryAnalyzer.class.getName());
 
   72     private static final String XMLFILE = 
"SEUQAMappings.xml"; 
 
   73     private static final String XSDFILE = 
"SearchEngineSchema.xsd"; 
 
   74     private static SearchEngineURLQueryAnalyzer.SearchEngine[] engines;
 
   76     private Content dataSource;
 
   77     private IngestJobContext context;
 
   79     SearchEngineURLQueryAnalyzer() {
 
   80         moduleName = NbBundle.getMessage(ExtractIE.class, 
"SearchEngineURLQueryAnalyzer.moduleName.text");
 
   89         private final String 
key;
 
   92         KeyPair(String key, String keyRegExp) {
 
   94             this.keyRegExp = keyRegExp;
 
  101         String getKeyRegExp() {
 
  114         SearchEngine(String engineName, String domainSubstring, List<KeyPair> keyPairs) {
 
  115             this.engineName = engineName;
 
  116             this.domainSubstring = domainSubstring;
 
  117             this.keyPairs = keyPairs;
 
  125         String getEngineName() {
 
  129         String getDomainSubstring() {
 
  130             return domainSubstring;
 
  142         List<KeyPair> getKeys() {
 
  143             return this.keyPairs;
 
  150                 split = split + 
"[ " + kp.getKey() + 
" :: " + kp.getKeyRegExp() + 
" ]" + 
", ";
 
  152             return NbBundle.getMessage(this.getClass(), 
"SearchEngineURLQueryAnalyzer.toString",
 
  153                     engineName, domainSubstring, count, split);
 
  161             File f = 
new File(path);
 
  162             logger.log(Level.INFO, 
"Load successful"); 
 
  163             DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
 
  164             DocumentBuilder db = dbf.newDocumentBuilder();
 
  165             xmlinput = db.parse(f);
 
  167             if (!
XMLUtil.
xmlIsValid(xmlinput, SearchEngineURLQueryAnalyzer.class, XSDFILE)) {
 
  168                 logger.log(Level.WARNING, 
"Error loading Search Engines: could not validate against [" + XSDFILE + 
"], results may not be accurate."); 
 
  171         } 
catch (IOException e) {
 
  172             throw new IngestModuleException(Bundle.cannotLoadSEUQA() + e.getLocalizedMessage(), e); 
 
  173         } 
catch (ParserConfigurationException pce) {
 
  174             throw new IngestModuleException(Bundle.cannotBuildXmlParser() + pce.getLocalizedMessage(), pce); 
 
  175         } 
catch (SAXException sxe) {
 
  176             throw new IngestModuleException(Bundle.cannotParseXml() + sxe.getLocalizedMessage(), sxe); 
 
  179         NodeList nlist = xmlinput.getElementsByTagName(
"SearchEngine"); 
 
  180         SearchEngineURLQueryAnalyzer.SearchEngine[] listEngines = 
new SearchEngineURLQueryAnalyzer.SearchEngine[nlist.getLength()];
 
  181         for (
int i = 0; i < nlist.getLength(); i++) {
 
  182             NamedNodeMap nnm = nlist.item(i).getAttributes();
 
  184             String EngineName = nnm.getNamedItem(
"engine").getNodeValue(); 
 
  185             String EnginedomainSubstring = nnm.getNamedItem(
"domainSubstring").getNodeValue(); 
 
  186             List<KeyPair> keys = 
new ArrayList<>();
 
  188             NodeList listSplits = xmlinput.getElementsByTagName(
"splitToken"); 
 
  189             for (
int k = 0; k < listSplits.getLength(); k++) {
 
  190                 if (listSplits.item(k).getParentNode().getAttributes().getNamedItem(
"engine").getNodeValue().equals(EngineName)) { 
 
  191                     keys.add(
new KeyPair(listSplits.item(k).getAttributes().getNamedItem(
"plainToken").getNodeValue(), listSplits.item(k).getAttributes().getNamedItem(
"regexToken").getNodeValue())); 
 
  195             SearchEngineURLQueryAnalyzer.SearchEngine Se = 
new SearchEngineURLQueryAnalyzer.SearchEngine(EngineName, EnginedomainSubstring, keys);
 
  198         engines = listEngines;
 
  211     private static SearchEngineURLQueryAnalyzer.SearchEngine getSearchEngineFromUrl(String domain) {
 
  212         if (engines == null) {
 
  215         for (SearchEngine engine : engines) {
 
  216             if (domain.contains(engine.getDomainSubstring())) {
 
  230     private String extractSearchEngineQuery(SearchEngineURLQueryAnalyzer.SearchEngine eng, String url) {
 
  233         for (KeyPair kp : eng.getKeys()) {
 
  234             if (url.contains(kp.getKey())) {
 
  235                 x = getValue(url, kp.getKeyRegExp());
 
  240             String decoded = URLDecoder.decode(x, 
"UTF-8"); 
 
  242         } 
catch (UnsupportedEncodingException exception) { 
 
  243             logger.log(Level.FINE, 
"Error during URL decoding, returning undecoded value:" 
  245                     + 
"\n\tUndecoded value: " + x
 
  246                     + 
"\n\tEngine name: " + eng.getEngineName()
 
  247                     + 
"\n\tEngine domain: " + eng.getDomainSubstring(), exception); 
 
  249         } 
catch (IllegalArgumentException exception) { 
 
  250             logger.log(Level.SEVERE, 
"Illegal argument passed to URL decoding, returning undecoded value:" 
  252                     + 
"\n\tUndecoded value: " + x
 
  253                     + 
"\n\tEngine name: " + eng.getEngineName()
 
  254                     + 
"\n\tEngine domain: " + eng.getDomainSubstring(), exception); 
 
  269     private String getValue(String url, String regExpKey) {
 
  279         String v = regExpKey;
 
  281         if (regExpKey.contains(
"\\?")) {
 
  282             v = regExpKey.replace(
"\\?", 
"?");
 
  284         String[] sp = url.split(v);
 
  285         if (sp.length >= 2) {
 
  286             if (sp[sp.length - 1].contains(
"&")) {
 
  287                 value = sp[sp.length - 1].split(
"&")[0];
 
  289                 value = sp[sp.length - 1];
 
  295     private void findSearchQueries() {
 
  296         int totalQueries = 0;
 
  299             Collection<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts(
"WHERE (blackboard_artifacts.artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID() 
 
  300                     + 
"' OR blackboard_artifacts.artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + 
"') ");  
 
  301             logger.log(Level.INFO, 
"Processing {0} blackboard artifacts.", listArtifacts.size()); 
 
  303             for (BlackboardArtifact artifact : listArtifacts) {
 
  304                 if (context.dataSourceIngestIsCancelled()) {
 
  310                 String searchEngineDomain = 
"";
 
  312                 long last_accessed = -1;
 
  314                 long fileId = artifact.getObjectID();
 
  315                 boolean isFromSource = tskCase.isFileFromSource(dataSource, fileId);
 
  321                 AbstractFile file = tskCase.getAbstractFileById(fileId);
 
  326                 SearchEngineURLQueryAnalyzer.SearchEngine se = null;
 
  328                 Collection<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes(
"WHERE artifact_id = " + artifact.getArtifactID()); 
 
  330                 for (BlackboardAttribute attribute : listAttributes) {
 
  331                     if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) {
 
  332                         final String urlString = attribute.getValueString();
 
  333                         se = getSearchEngineFromUrl(urlString);
 
  338                         query = extractSearchEngineQuery(se, attribute.getValueString());
 
  339                         if (query.equals(
"")) 
 
  344                     } 
else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) {
 
  345                         browser = attribute.getValueString();
 
  346                     } 
else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) {
 
  347                         searchEngineDomain = attribute.getValueString();
 
  348                     } 
else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID()) {
 
  349                         last_accessed = attribute.getValueLong();
 
  353                 if (se != null && !query.equals(
"")) { 
 
  355                     if (last_accessed == -1) {
 
  358                     Collection<BlackboardAttribute> bbattributes = 
new ArrayList<>();
 
  359                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN,
 
  360                             NbBundle.getMessage(
this.getClass(),
 
  361                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), searchEngineDomain));
 
  362                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_TEXT,
 
  363                             NbBundle.getMessage(
this.getClass(),
 
  364                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), query));
 
  365                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME,
 
  366                             NbBundle.getMessage(
this.getClass(),
 
  367                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), browser));
 
  368                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED,
 
  369                             NbBundle.getMessage(
this.getClass(),
 
  370                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), last_accessed));
 
  371                     this.addArtifact(ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY, file, bbattributes);
 
  376         } 
catch (TskCoreException e) {
 
  377             logger.log(Level.SEVERE, 
"Encountered error retrieving artifacts for search engine queries", e); 
 
  379             if (context.dataSourceIngestIsCancelled()) {
 
  380                 logger.info(
"Operation terminated by user."); 
 
  382             IngestServices.getInstance().fireModuleDataEvent(
new ModuleDataEvent(
 
  383                     NbBundle.getMessage(
this.getClass(), 
"SearchEngineURLQueryAnalyzer.parentModuleName.noSpace"),
 
  384                     BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY));
 
  385             logger.log(Level.INFO, 
"Extracted {0} queries from the blackboard", totalQueries); 
 
  389     private String getTotals() {
 
  391         if (engines == null) {
 
  394         for (SearchEngineURLQueryAnalyzer.SearchEngine se : engines) {
 
  395             total += se.getEngineName() + 
" : " + se.getTotal() + 
"\n";
 
  401     public void process(Content dataSource, IngestJobContext context, DataSourceIngestModuleProgress progressBar) {
 
  402         this.dataSource = dataSource;
 
  403         this.context = context;
 
  405         progressBar.progress(Bundle.Progress_Message_Find_Search_Query());
 
  406         this.findSearchQueries();
 
  407         logger.log(Level.INFO, 
"Search Engine stats: \n{0}", getTotals()); 
 
  411     void configExtractor() throws IngestModuleException {
 
  413             PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryAnalyzer.class, XMLFILE, 
true);
 
  414         } 
catch (IOException e) {
 
  415             String message = Bundle.SearchEngineURLQueryAnalyzer_init_exception_msg(XMLFILE);
 
  416             logger.log(Level.SEVERE, message, e);
 
  417             throw new IngestModuleException(message, e);
 
  423     public void complete() {
 
  424         logger.info(
"Search Engine URL Query Analyzer has completed."); 
 
final String domainSubstring
final List< KeyPair > keyPairs
static< T > boolean xmlIsValid(DOMSource xmlfile, Class< T > clazz, String schemaFile)