Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
IngestTasksScheduler.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2012-2015 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.ingest;
20 
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Comparator;
24 import java.util.HashSet;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Set;
28 import java.util.TreeSet;
29 import java.util.concurrent.BlockingDeque;
30 import java.util.concurrent.LinkedBlockingDeque;
31 import java.util.concurrent.LinkedBlockingQueue;
32 import java.util.logging.Level;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
36 import org.sleuthkit.datamodel.AbstractFile;
37 import org.sleuthkit.datamodel.Content;
38 import org.sleuthkit.datamodel.FileSystem;
39 import org.sleuthkit.datamodel.TskCoreException;
40 import org.sleuthkit.datamodel.TskData;
41 
46 final class IngestTasksScheduler {
47 
48  private static final Logger logger = Logger.getLogger(IngestTasksScheduler.class.getName());
49  private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue();
50  private static IngestTasksScheduler instance;
51 
58  private final LinkedBlockingQueue<DataSourceIngestTask> pendingDataSourceTasks;
59  private final DataSourceIngestTaskQueue dataSourceTasksDispenser;
60 
84  private final TreeSet<FileIngestTask> rootDirectoryTasks;
85  private final List<FileIngestTask> directoryTasks;
86  private final BlockingDeque<FileIngestTask> pendingFileTasks;
87  private final FileIngestTaskQueue fileTasksDispenser;
88 
100  private final Set<IngestTask> tasksInProgress;
101 
105  synchronized static IngestTasksScheduler getInstance() {
106  if (IngestTasksScheduler.instance == null) {
107  IngestTasksScheduler.instance = new IngestTasksScheduler();
108  }
109  return IngestTasksScheduler.instance;
110  }
111 
115  private IngestTasksScheduler() {
116  this.pendingDataSourceTasks = new LinkedBlockingQueue<>();
117  this.dataSourceTasksDispenser = new DataSourceIngestTaskQueue();
118  this.rootDirectoryTasks = new TreeSet<>(new RootDirectoryTaskComparator());
119  this.directoryTasks = new ArrayList<>();
120  this.pendingFileTasks = new LinkedBlockingDeque<>();
121  this.fileTasksDispenser = new FileIngestTaskQueue();
122  this.tasksInProgress = new HashSet<>();
123  }
124 
131  IngestTaskQueue getDataSourceIngestTaskQueue() {
132  return this.dataSourceTasksDispenser;
133  }
134 
141  IngestTaskQueue getFileIngestTaskQueue() {
142  return this.fileTasksDispenser;
143  }
144 
154  synchronized void scheduleIngestTasks(DataSourceIngestJob job) {
155  if (!job.isCancelled()) {
156  // Scheduling of both a data source ingest task and file ingest tasks
157  // for a job must be an atomic operation. Otherwise, the data source
158  // task might be completed before the file tasks are scheduled,
159  // resulting in a potential false positive when another thread checks
160  // whether or not all the tasks for the job are completed.
161  this.scheduleDataSourceIngestTask(job);
162  this.scheduleFileIngestTasks(job);
163  }
164  }
165 
171  synchronized void scheduleDataSourceIngestTask(DataSourceIngestJob job) {
172  if (!job.isCancelled()) {
173  DataSourceIngestTask task = new DataSourceIngestTask(job);
174  this.tasksInProgress.add(task);
175  try {
176  this.pendingDataSourceTasks.put(task);
177  } catch (InterruptedException ex) {
182  this.tasksInProgress.remove(task);
183  Thread.currentThread().interrupt();
184  }
185  }
186  }
187 
193  synchronized void scheduleFileIngestTasks(DataSourceIngestJob job) {
194  if (!job.isCancelled()) {
195  // Get the top level files for the data source associated with this job
196  // and add them to the root directories priority queue.
197  List<AbstractFile> topLevelFiles = getTopLevelFiles(job.getDataSource());
198  for (AbstractFile firstLevelFile : topLevelFiles) {
199  FileIngestTask task = new FileIngestTask(job, firstLevelFile);
200  if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
201  this.tasksInProgress.add(task);
202  this.rootDirectoryTasks.add(task);
203  }
204  }
205  shuffleFileTaskQueues();
206  }
207  }
208 
215  synchronized void scheduleFileIngestTask(DataSourceIngestJob job, AbstractFile file) {
216  if (!job.isCancelled()) {
217  FileIngestTask task = new FileIngestTask(job, file);
218  if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
219  this.tasksInProgress.add(task);
220  addToPendingFileTasksQueue(task);
221  }
222  }
223  }
224 
231  synchronized void notifyTaskCompleted(IngestTask task) {
232  tasksInProgress.remove(task);
233  }
234 
243  synchronized boolean tasksForJobAreCompleted(DataSourceIngestJob job) {
244  for (IngestTask task : tasksInProgress) {
245  if (task.getIngestJob().getId() == job.getId()) {
246  return false;
247  }
248  }
249  return true;
250  }
251 
262  synchronized void cancelPendingTasksForIngestJob(DataSourceIngestJob job) {
271  long jobId = job.getId();
272  this.removeTasksForJob(this.rootDirectoryTasks, jobId);
273  this.removeTasksForJob(this.directoryTasks, jobId);
274  this.shuffleFileTaskQueues();
275  }
276 
286  private static List<AbstractFile> getTopLevelFiles(Content dataSource) {
287  List<AbstractFile> topLevelFiles = new ArrayList<>();
288  Collection<AbstractFile> rootObjects = dataSource.accept(new GetRootDirectoryVisitor());
289  if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) {
290  // The data source is itself a file to be processed.
291  topLevelFiles.add((AbstractFile) dataSource);
292  } else {
293  for (AbstractFile root : rootObjects) {
294  List<Content> children;
295  try {
296  children = root.getChildren();
297  if (children.isEmpty()) {
298  // Add the root object itself, it could be an unallocated
299  // space file, or a child of a volume or an image.
300  topLevelFiles.add(root);
301  } else {
302  // The root object is a file system root directory, get
303  // the files within it.
304  for (Content child : children) {
305  if (child instanceof AbstractFile) {
306  topLevelFiles.add((AbstractFile) child);
307  }
308  }
309  }
310  } catch (TskCoreException ex) {
311  logger.log(Level.WARNING, "Could not get children of root to enqueue: " + root.getId() + ": " + root.getName(), ex); //NON-NLS
312  }
313  }
314  }
315  return topLevelFiles;
316  }
317 
323  synchronized private void shuffleFileTaskQueues() {
324  // This is synchronized because it is called both by synchronized
325  // methods of this ingest scheduler and an unsynchronized method of its
326  // file tasks "dispenser".
327  while (true) {
328  // Loop until either the pending file tasks queue is NOT empty
329  // or the upstream queues that feed into it ARE empty.
330  if (!this.pendingFileTasks.isEmpty()) {
331  // There are file tasks ready to be consumed, exit.
332  return;
333  }
334  if (this.directoryTasks.isEmpty()) {
335  if (this.rootDirectoryTasks.isEmpty()) {
336  // There are no root directory tasks to move into the
337  // directory queue, exit.
338  return;
339  } else {
340  // Move the next root directory task into the
341  // directories queue. Note that the task was already
342  // added to the tasks in progress list when the task was
343  // created in scheduleFileIngestTasks().
344  this.directoryTasks.add(this.rootDirectoryTasks.pollFirst());
345  }
346  }
347 
348  // Try to add the most recently added directory from the
349  // directory tasks queue to the pending file tasks queue.
350  FileIngestTask directoryTask = this.directoryTasks.remove(this.directoryTasks.size() - 1);
351  if (shouldEnqueueFileTask(directoryTask)) {
352  addToPendingFileTasksQueue(directoryTask);
353  } else {
354  this.tasksInProgress.remove(directoryTask);
355  }
356 
357  // If the directory contains subdirectories or files, try to
358  // enqueue tasks for them as well.
359  final AbstractFile directory = directoryTask.getFile();
360  try {
361  for (Content child : directory.getChildren()) {
362  if (child instanceof AbstractFile) {
363  AbstractFile file = (AbstractFile) child;
364  FileIngestTask childTask = new FileIngestTask(directoryTask.getIngestJob(), file);
365  if (file.hasChildren()) {
366  // Found a subdirectory, put the task in the
367  // pending directory tasks queue. Note the
368  // addition of the task to the tasks in progress
369  // list. This is necessary because this is the
370  // first appearance of this task in the queues.
371  this.tasksInProgress.add(childTask);
372  this.directoryTasks.add(childTask);
373  } else if (shouldEnqueueFileTask(childTask)) {
374  // Found a file, put the task directly into the
375  // pending file tasks queue.
376  this.tasksInProgress.add(childTask);
377  addToPendingFileTasksQueue(childTask);
378  }
379  }
380  }
381  } catch (TskCoreException ex) {
382  String errorMessage = String.format("An error occurred getting the children of %s", directory.getName()); //NON-NLS
383  logger.log(Level.SEVERE, errorMessage, ex);
384  }
385  }
386  }
387 
397  private static boolean shouldEnqueueFileTask(final FileIngestTask task) {
398  final AbstractFile file = task.getFile();
399 
400  // Skip the task if the file is an unallocated space file and the
401  // process unallocated space flag is not set for this job.
402  if (!task.getIngestJob().shouldProcessUnallocatedSpace()
403  && file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) {
404  return false;
405  }
406 
407  // Skip the task if the file is actually the pseudo-file for the parent
408  // or current directory.
409  String fileName = file.getName();
410  if (fileName.equals(".") || fileName.equals("..")) {
411  return false;
412  }
413 
414  // Skip the task if the file is one of a select group of special, large
415  // NTFS or FAT file system files.
416  if (file instanceof org.sleuthkit.datamodel.File) {
417  final org.sleuthkit.datamodel.File f = (org.sleuthkit.datamodel.File) file;
418 
419  // Get the type of the file system, if any, that owns the file.
420  TskData.TSK_FS_TYPE_ENUM fsType = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_UNSUPP;
421  try {
422  FileSystem fs = f.getFileSystem();
423  if (fs != null) {
424  fsType = fs.getFsType();
425  }
426  } catch (TskCoreException ex) {
427  logger.log(Level.SEVERE, "Error querying file system for " + f, ex); //NON-NLS
428  }
429 
430  // If the file system is not NTFS or FAT, don't skip the file.
431  if ((fsType.getValue() & FAT_NTFS_FLAGS) == 0) {
432  return true;
433  }
434 
435  // Find out whether the file is in a root directory.
436  boolean isInRootDir = false;
437  try {
438  AbstractFile parent = f.getParentDirectory();
439  isInRootDir = parent.isRoot();
440  } catch (TskCoreException ex) {
441  logger.log(Level.WARNING, "Error querying parent directory for" + f.getName(), ex); //NON-NLS
442  }
443 
444  // If the file is in the root directory of an NTFS or FAT file
445  // system, check its meta-address and check its name for the '$'
446  // character and a ':' character (not a default attribute).
447  if (isInRootDir && f.getMetaAddr() < 32) {
448  String name = f.getName();
449  if (name.length() > 0 && name.charAt(0) == '$' && name.contains(":")) {
450  return false;
451  }
452  }
453  }
454 
455  return true;
456  }
457 
463  synchronized private void addToPendingFileTasksQueue(FileIngestTask task) {
464  try {
465  this.pendingFileTasks.putFirst(task);
466  } catch (InterruptedException ex) {
471  this.tasksInProgress.remove(task);
472  Thread.currentThread().interrupt();
473  }
474  }
475 
484  synchronized private void removeTasksForJob(Collection<? extends IngestTask> taskQueue, long jobId) {
485  Iterator<? extends IngestTask> iterator = taskQueue.iterator();
486  while (iterator.hasNext()) {
487  IngestTask task = iterator.next();
488  if (task.getIngestJob().getId() == jobId) {
489  this.tasksInProgress.remove(task);
490  iterator.remove();
491  }
492  }
493  }
494 
503  private static int countTasksForJob(Collection<? extends IngestTask> queue, long jobId) {
504  Iterator<? extends IngestTask> iterator = queue.iterator();
505  int count = 0;
506  while (iterator.hasNext()) {
507  IngestTask task = (IngestTask) iterator.next();
508  if (task.getIngestJob().getId() == jobId) {
509  count++;
510  }
511  }
512  return count;
513  }
514 
523  synchronized IngestJobTasksSnapshot getTasksSnapshotForJob(long jobId) {
524  return new IngestJobTasksSnapshot(jobId);
525  }
526 
531  private static class RootDirectoryTaskComparator implements Comparator<FileIngestTask> {
532 
533  @Override
534  public int compare(FileIngestTask q1, FileIngestTask q2) {
535  AbstractFilePriority.Priority p1 = AbstractFilePriority.getPriority(q1.getFile());
536  AbstractFilePriority.Priority p2 = AbstractFilePriority.getPriority(q2.getFile());
537  if (p1 == p2) {
538  return (int) (q2.getFile().getId() - q1.getFile().getId());
539  } else {
540  return p2.ordinal() - p1.ordinal();
541  }
542  }
543 
544  private static class AbstractFilePriority {
545 
546  enum Priority {
547 
548  LAST, LOW, MEDIUM, HIGH
549  }
550 
551  static final List<Pattern> LAST_PRI_PATHS = new ArrayList<>();
552 
553  static final List<Pattern> LOW_PRI_PATHS = new ArrayList<>();
554 
555  static final List<Pattern> MEDIUM_PRI_PATHS = new ArrayList<>();
556 
557  static final List<Pattern> HIGH_PRI_PATHS = new ArrayList<>();
558  /*
559  * prioritize root directory folders based on the assumption that we
560  * are looking for user content. Other types of investigations may
561  * want different priorities.
562  */
563 
564  static /*
565  * prioritize root directory folders based on the assumption that we
566  * are looking for user content. Other types of investigations may
567  * want different priorities.
568  */ {
569  // these files have no structure, so they go last
570  //unalloc files are handled as virtual files in getPriority()
571  //LAST_PRI_PATHS.schedule(Pattern.compile("^\\$Unalloc", Pattern.CASE_INSENSITIVE));
572  //LAST_PRI_PATHS.schedule(Pattern.compile("^\\Unalloc", Pattern.CASE_INSENSITIVE));
573  LAST_PRI_PATHS.add(Pattern.compile("^pagefile", Pattern.CASE_INSENSITIVE));
574  LAST_PRI_PATHS.add(Pattern.compile("^hiberfil", Pattern.CASE_INSENSITIVE));
575  // orphan files are often corrupt and windows does not typically have
576  // user content, so put them towards the bottom
577  LOW_PRI_PATHS.add(Pattern.compile("^\\$OrphanFiles", Pattern.CASE_INSENSITIVE));
578  LOW_PRI_PATHS.add(Pattern.compile("^Windows", Pattern.CASE_INSENSITIVE));
579  // all other files go into the medium category too
580  MEDIUM_PRI_PATHS.add(Pattern.compile("^Program Files", Pattern.CASE_INSENSITIVE));
581  // user content is top priority
582  HIGH_PRI_PATHS.add(Pattern.compile("^Users", Pattern.CASE_INSENSITIVE));
583  HIGH_PRI_PATHS.add(Pattern.compile("^Documents and Settings", Pattern.CASE_INSENSITIVE));
584  HIGH_PRI_PATHS.add(Pattern.compile("^home", Pattern.CASE_INSENSITIVE));
585  HIGH_PRI_PATHS.add(Pattern.compile("^ProgramData", Pattern.CASE_INSENSITIVE));
586  }
587 
595  static AbstractFilePriority.Priority getPriority(final AbstractFile abstractFile) {
596  if (!abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.FS)) {
597  //quickly filter out unstructured content
598  //non-fs virtual files and dirs, such as representing unalloc space
599  return AbstractFilePriority.Priority.LAST;
600  }
601  //determine the fs files priority by name
602  final String path = abstractFile.getName();
603  if (path == null) {
604  return AbstractFilePriority.Priority.MEDIUM;
605  }
606  for (Pattern p : HIGH_PRI_PATHS) {
607  Matcher m = p.matcher(path);
608  if (m.find()) {
609  return AbstractFilePriority.Priority.HIGH;
610  }
611  }
612  for (Pattern p : MEDIUM_PRI_PATHS) {
613  Matcher m = p.matcher(path);
614  if (m.find()) {
615  return AbstractFilePriority.Priority.MEDIUM;
616  }
617  }
618  for (Pattern p : LOW_PRI_PATHS) {
619  Matcher m = p.matcher(path);
620  if (m.find()) {
621  return AbstractFilePriority.Priority.LOW;
622  }
623  }
624  for (Pattern p : LAST_PRI_PATHS) {
625  Matcher m = p.matcher(path);
626  if (m.find()) {
627  return AbstractFilePriority.Priority.LAST;
628  }
629  }
630  //default is medium
631  return AbstractFilePriority.Priority.MEDIUM;
632  }
633  }
634  }
635 
640  private final class DataSourceIngestTaskQueue implements IngestTaskQueue {
641 
642  @Override
643  public IngestTask getNextTask() throws InterruptedException {
644  return IngestTasksScheduler.this.pendingDataSourceTasks.take();
645  }
646  }
647 
652  private final class FileIngestTaskQueue implements IngestTaskQueue {
653 
654  @Override
655  public IngestTask getNextTask() throws InterruptedException {
656  FileIngestTask task = IngestTasksScheduler.this.pendingFileTasks.takeFirst();
657  shuffleFileTaskQueues();
658  return task;
659  }
660 
661  }
662 
666  class IngestJobTasksSnapshot {
667 
668  private final long jobId;
669  private final long rootQueueSize;
670  private final long dirQueueSize;
671  private final long fileQueueSize;
672  private final long dsQueueSize;
673  private final long runningListSize;
674 
680  IngestJobTasksSnapshot(long jobId) {
681  this.jobId = jobId;
682  this.rootQueueSize = countTasksForJob(IngestTasksScheduler.this.rootDirectoryTasks, jobId);
683  this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryTasks, jobId);
684  this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingFileTasks, jobId);
685  this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingDataSourceTasks, jobId);
686  this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgress, jobId);
687  }
688 
695  long getJobId() {
696  return jobId;
697  }
698 
705  long getRootQueueSize() {
706  return rootQueueSize;
707  }
708 
715  long getDirectoryTasksQueueSize() {
716  return dirQueueSize;
717  }
718 
719  long getFileQueueSize() {
720  return fileQueueSize;
721  }
722 
723  long getDsQueueSize() {
724  return dsQueueSize;
725  }
726 
727  long getRunningListSize() {
728  return runningListSize;
729  }
730 
731  }
732 
733 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Jan 2 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.