Autopsy  3.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
IngestTasksScheduler.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2012-2014 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.ingest;
20 
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Comparator;
24 import java.util.HashSet;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Set;
28 import java.util.TreeSet;
29 import java.util.concurrent.BlockingDeque;
30 import java.util.concurrent.LinkedBlockingDeque;
31 import java.util.concurrent.LinkedBlockingQueue;
32 import java.util.logging.Level;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
41 
46 final class IngestTasksScheduler {
47 
48  private static final Logger logger = Logger.getLogger(IngestTasksScheduler.class.getName());
49  private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue();
50  private static IngestTasksScheduler instance;
51 
58  private final LinkedBlockingQueue<DataSourceIngestTask> pendingDataSourceTasks;
59  private final DataSourceIngestTaskQueue dataSourceTasksDispenser;
60 
84  private final TreeSet<FileIngestTask> rootDirectoryTasks;
85  private final List<FileIngestTask> directoryTasks;
86  private final BlockingDeque<FileIngestTask> pendingFileTasks;
87  private final FileIngestTaskQueue fileTasksDispenser;
88 
100  private final Set<IngestTask> tasksInProgress;
101 
105  synchronized static IngestTasksScheduler getInstance() {
106  if (IngestTasksScheduler.instance == null) {
107  IngestTasksScheduler.instance = new IngestTasksScheduler();
108  }
109  return IngestTasksScheduler.instance;
110  }
111 
115  private IngestTasksScheduler() {
116  this.pendingDataSourceTasks = new LinkedBlockingQueue<>();
117  this.dataSourceTasksDispenser = new DataSourceIngestTaskQueue();
118  this.rootDirectoryTasks = new TreeSet<>(new RootDirectoryTaskComparator());
119  this.directoryTasks = new ArrayList<>();
120  this.pendingFileTasks = new LinkedBlockingDeque<>();
121  this.fileTasksDispenser = new FileIngestTaskQueue();
122  this.tasksInProgress = new HashSet<>();
123  }
124 
131  IngestTaskQueue getDataSourceIngestTaskQueue() {
132  return this.dataSourceTasksDispenser;
133  }
134 
141  IngestTaskQueue getFileIngestTaskQueue() {
142  return this.fileTasksDispenser;
143  }
144 
153  synchronized void scheduleIngestTasks(DataSourceIngestJob job) {
154  // Scheduling of both a data source ingest task and file ingest tasks
155  // for a job must be an atomic operation. Otherwise, the data source
156  // task might be completed before the file tasks are scheduled,
157  // resulting in a potential false positive when another thread checks
158  // whether or not all the tasks for the job are completed.
159  this.scheduleDataSourceIngestTask(job);
160  this.scheduleFileIngestTasks(job);
161  }
162 
168  synchronized void scheduleDataSourceIngestTask(DataSourceIngestJob job) {
169  DataSourceIngestTask task = new DataSourceIngestTask(job);
170  this.tasksInProgress.add(task);
171  try {
172  this.pendingDataSourceTasks.put(task);
173  } catch (InterruptedException ex) {
178  this.tasksInProgress.remove(task);
179  Thread.currentThread().interrupt();
180  }
181  }
182 
188  synchronized void scheduleFileIngestTasks(DataSourceIngestJob job) {
189  // Get the top level files for the data source associated with this job
190  // and add them to the root directories priority queue.
191  List<AbstractFile> topLevelFiles = getTopLevelFiles(job.getDataSource());
192  for (AbstractFile firstLevelFile : topLevelFiles) {
193  FileIngestTask task = new FileIngestTask(job, firstLevelFile);
194  if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
195  this.tasksInProgress.add(task);
196  this.rootDirectoryTasks.add(task);
197  }
198  }
199  shuffleFileTaskQueues();
200  }
201 
208  synchronized void scheduleFileIngestTask(DataSourceIngestJob job, AbstractFile file) {
209  FileIngestTask task = new FileIngestTask(job, file);
210  if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
211  this.tasksInProgress.add(task);
212  addToPendingFileTasksQueue(task);
213  }
214  }
215 
222  synchronized void notifyTaskCompleted(IngestTask task) {
223  tasksInProgress.remove(task);
224  }
225 
233  synchronized boolean tasksForJobAreCompleted(DataSourceIngestJob job) {
234  for (IngestTask task : tasksInProgress) {
235  if (task.getIngestJob().getId() == job.getId()) {
236  return false;
237  }
238  }
239  return true;
240  }
241 
250  synchronized void cancelPendingTasksForIngestJob(DataSourceIngestJob job) {
251  long jobId = job.getId();
252  this.removeTasksForJob(this.rootDirectoryTasks, jobId);
253  this.removeTasksForJob(this.directoryTasks, jobId);
254  this.removeTasksForJob(this.pendingFileTasks, jobId);
255  this.removeTasksForJob(this.pendingDataSourceTasks, jobId);
256  this.shuffleFileTaskQueues();
257  }
258 
267  private static List<AbstractFile> getTopLevelFiles(Content dataSource) {
268  List<AbstractFile> topLevelFiles = new ArrayList<>();
269  Collection<AbstractFile> rootObjects = dataSource.accept(new GetRootDirectoryVisitor());
270  if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) {
271  // The data source is itself a file to be processed.
272  topLevelFiles.add((AbstractFile) dataSource);
273  } else {
274  for (AbstractFile root : rootObjects) {
275  List<Content> children;
276  try {
277  children = root.getChildren();
278  if (children.isEmpty()) {
279  // Add the root object itself, it could be an unallocated
280  // space file, or a child of a volume or an image.
281  topLevelFiles.add(root);
282  } else {
283  // The root object is a file system root directory, get
284  // the files within it.
285  for (Content child : children) {
286  if (child instanceof AbstractFile) {
287  topLevelFiles.add((AbstractFile) child);
288  }
289  }
290  }
291  } catch (TskCoreException ex) {
292  logger.log(Level.WARNING, "Could not get children of root to enqueue: " + root.getId() + ": " + root.getName(), ex); //NON-NLS
293  }
294  }
295  }
296  return topLevelFiles;
297  }
298 
304  synchronized private void shuffleFileTaskQueues() {
305  // This is synchronized because it is called both by synchronized
306  // methods of this ingest scheduler and an unsynchronized method of its
307  // file tasks "dispenser".
308  while (true) {
309  // Loop until either the pending file tasks queue is NOT empty
310  // or the upstream queues that feed into it ARE empty.
311  if (!this.pendingFileTasks.isEmpty()) {
312  // There are file tasks ready to be consumed, exit.
313  return;
314  }
315  if (this.directoryTasks.isEmpty()) {
316  if (this.rootDirectoryTasks.isEmpty()) {
317  // There are no root directory tasks to move into the
318  // directory queue, exit.
319  return;
320  } else {
321  // Move the next root directory task into the
322  // directories queue. Note that the task was already
323  // added to the tasks in progress list when the task was
324  // created in scheduleFileIngestTasks().
325  this.directoryTasks.add(this.rootDirectoryTasks.pollFirst());
326  }
327  }
328 
329  // Try to add the most recently added directory from the
330  // directory tasks queue to the pending file tasks queue.
331  FileIngestTask directoryTask = this.directoryTasks.remove(this.directoryTasks.size() - 1);
332  if (shouldEnqueueFileTask(directoryTask)) {
333  addToPendingFileTasksQueue(directoryTask);
334  } else {
335  this.tasksInProgress.remove(directoryTask);
336  }
337 
338  // If the directory contains subdirectories or files, try to
339  // enqueue tasks for them as well.
340  final AbstractFile directory = directoryTask.getFile();
341  try {
342  for (Content child : directory.getChildren()) {
343  if (child instanceof AbstractFile) {
344  AbstractFile file = (AbstractFile) child;
345  FileIngestTask childTask = new FileIngestTask(directoryTask.getIngestJob(), file);
346  if (file.hasChildren()) {
347  // Found a subdirectory, put the task in the
348  // pending directory tasks queue. Note the
349  // addition of the task to the tasks in progress
350  // list. This is necessary because this is the
351  // first appearance of this task in the queues.
352  this.tasksInProgress.add(childTask);
353  this.directoryTasks.add(childTask);
354  } else if (shouldEnqueueFileTask(childTask)) {
355  // Found a file, put the task directly into the
356  // pending file tasks queue.
357  this.tasksInProgress.add(childTask);
358  addToPendingFileTasksQueue(childTask);
359  }
360  }
361  }
362  } catch (TskCoreException ex) {
363  String errorMessage = String.format("An error occurred getting the children of %s", directory.getName()); //NON-NLS
364  logger.log(Level.SEVERE, errorMessage, ex);
365  }
366  }
367  }
368 
377  private static boolean shouldEnqueueFileTask(final FileIngestTask task) {
378  final AbstractFile file = task.getFile();
379 
380  // Skip the task if the file is an unallocated space file and the
381  // process unallocated space flag is not set for this job.
382  if (!task.getIngestJob().shouldProcessUnallocatedSpace()
383  && file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) {
384  return false;
385  }
386 
387  // Skip the task if the file is actually the pseudo-file for the parent
388  // or current directory.
389  String fileName = file.getName();
390  if (fileName.equals(".") || fileName.equals("..")) {
391  return false;
392  }
393 
394  // Skip the task if the file is one of a select group of special, large
395  // NTFS or FAT file system files.
396  if (file instanceof org.sleuthkit.datamodel.File) {
398 
399  // Get the type of the file system, if any, that owns the file.
400  TskData.TSK_FS_TYPE_ENUM fsType = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_UNSUPP;
401  try {
402  FileSystem fs = f.getFileSystem();
403  if (fs != null) {
404  fsType = fs.getFsType();
405  }
406  } catch (TskCoreException ex) {
407  logger.log(Level.SEVERE, "Error querying file system for " + f, ex); //NON-NLS
408  }
409 
410  // If the file system is not NTFS or FAT, don't skip the file.
411  if ((fsType.getValue() & FAT_NTFS_FLAGS) == 0) {
412  return true;
413  }
414 
415  // Find out whether the file is in a root directory.
416  boolean isInRootDir = false;
417  try {
418  AbstractFile parent = f.getParentDirectory();
419  isInRootDir = parent.isRoot();
420  } catch (TskCoreException ex) {
421  logger.log(Level.WARNING, "Error querying parent directory for" + f.getName(), ex); //NON-NLS
422  }
423 
424  // If the file is in the root directory of an NTFS or FAT file
425  // system, check its meta-address and check its name for the '$'
426  // character and a ':' character (not a default attribute).
427  if (isInRootDir && f.getMetaAddr() < 32) {
428  String name = f.getName();
429  if (name.length() > 0 && name.charAt(0) == '$' && name.contains(":")) {
430  return false;
431  }
432  }
433  }
434 
435  return true;
436  }
437 
443  synchronized private void addToPendingFileTasksQueue(FileIngestTask task) {
444  try {
445  this.pendingFileTasks.putFirst(task);
446  } catch (InterruptedException ex) {
451  this.tasksInProgress.remove(task);
452  Thread.currentThread().interrupt();
453  }
454  }
455 
464  private void removeTasksForJob(Collection<? extends IngestTask> taskQueue, long jobId) {
465  Iterator<? extends IngestTask> iterator = taskQueue.iterator();
466  while (iterator.hasNext()) {
467  IngestTask task = iterator.next();
468  if (task.getIngestJob().getId() == jobId) {
469  this.tasksInProgress.remove(task);
470  iterator.remove();
471  }
472  }
473  }
474 
482  private static int countTasksForJob(Collection<? extends IngestTask> queue, long jobId) {
483  Iterator<? extends IngestTask> iterator = queue.iterator();
484  int count = 0;
485  while (iterator.hasNext()) {
486  IngestTask task = (IngestTask) iterator.next();
487  if (task.getIngestJob().getId() == jobId) {
488  count++;
489  }
490  }
491  return count;
492  }
493 
501  synchronized IngestJobTasksSnapshot getTasksSnapshotForJob(long jobId) {
502  return new IngestJobTasksSnapshot(jobId);
503  }
504 
509  private static class RootDirectoryTaskComparator implements Comparator<FileIngestTask> {
510 
511  @Override
512  public int compare(FileIngestTask q1, FileIngestTask q2) {
513  AbstractFilePriority.Priority p1 = AbstractFilePriority.getPriority(q1.getFile());
514  AbstractFilePriority.Priority p2 = AbstractFilePriority.getPriority(q2.getFile());
515  if (p1 == p2) {
516  return (int) (q2.getFile().getId() - q1.getFile().getId());
517  } else {
518  return p2.ordinal() - p1.ordinal();
519  }
520  }
521 
522  private static class AbstractFilePriority {
523 
524  enum Priority {
525 
526  LAST, LOW, MEDIUM, HIGH
527  }
528 
529  static final List<Pattern> LAST_PRI_PATHS = new ArrayList<>();
530 
531  static final List<Pattern> LOW_PRI_PATHS = new ArrayList<>();
532 
533  static final List<Pattern> MEDIUM_PRI_PATHS = new ArrayList<>();
534 
535  static final List<Pattern> HIGH_PRI_PATHS = new ArrayList<>();
536  /* prioritize root directory folders based on the assumption that we
537  * are
538  * looking for user content. Other types of investigations may want
539  * different
540  * priorities. */
541 
542  static /* prioritize root directory
543  * folders based on the assumption that we are
544  * looking for user content. Other types of investigations may want
545  * different
546  * priorities. */ {
547  // these files have no structure, so they go last
548  //unalloc files are handled as virtual files in getPriority()
549  //LAST_PRI_PATHS.schedule(Pattern.compile("^\\$Unalloc", Pattern.CASE_INSENSITIVE));
550  //LAST_PRI_PATHS.schedule(Pattern.compile("^\\Unalloc", Pattern.CASE_INSENSITIVE));
551  LAST_PRI_PATHS.add(Pattern.compile("^pagefile", Pattern.CASE_INSENSITIVE));
552  LAST_PRI_PATHS.add(Pattern.compile("^hiberfil", Pattern.CASE_INSENSITIVE));
553  // orphan files are often corrupt and windows does not typically have
554  // user content, so put them towards the bottom
555  LOW_PRI_PATHS.add(Pattern.compile("^\\$OrphanFiles", Pattern.CASE_INSENSITIVE));
556  LOW_PRI_PATHS.add(Pattern.compile("^Windows", Pattern.CASE_INSENSITIVE));
557  // all other files go into the medium category too
558  MEDIUM_PRI_PATHS.add(Pattern.compile("^Program Files", Pattern.CASE_INSENSITIVE));
559  // user content is top priority
560  HIGH_PRI_PATHS.add(Pattern.compile("^Users", Pattern.CASE_INSENSITIVE));
561  HIGH_PRI_PATHS.add(Pattern.compile("^Documents and Settings", Pattern.CASE_INSENSITIVE));
562  HIGH_PRI_PATHS.add(Pattern.compile("^home", Pattern.CASE_INSENSITIVE));
563  HIGH_PRI_PATHS.add(Pattern.compile("^ProgramData", Pattern.CASE_INSENSITIVE));
564  }
565 
573  static AbstractFilePriority.Priority getPriority(final AbstractFile abstractFile) {
574  if (!abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.FS)) {
575  //quickly filter out unstructured content
576  //non-fs virtual files and dirs, such as representing unalloc space
577  return AbstractFilePriority.Priority.LAST;
578  }
579  //determine the fs files priority by name
580  final String path = abstractFile.getName();
581  if (path == null) {
582  return AbstractFilePriority.Priority.MEDIUM;
583  }
584  for (Pattern p : HIGH_PRI_PATHS) {
585  Matcher m = p.matcher(path);
586  if (m.find()) {
587  return AbstractFilePriority.Priority.HIGH;
588  }
589  }
590  for (Pattern p : MEDIUM_PRI_PATHS) {
591  Matcher m = p.matcher(path);
592  if (m.find()) {
593  return AbstractFilePriority.Priority.MEDIUM;
594  }
595  }
596  for (Pattern p : LOW_PRI_PATHS) {
597  Matcher m = p.matcher(path);
598  if (m.find()) {
599  return AbstractFilePriority.Priority.LOW;
600  }
601  }
602  for (Pattern p : LAST_PRI_PATHS) {
603  Matcher m = p.matcher(path);
604  if (m.find()) {
605  return AbstractFilePriority.Priority.LAST;
606  }
607  }
608  //default is medium
609  return AbstractFilePriority.Priority.MEDIUM;
610  }
611  }
612  }
613 
618  private final class DataSourceIngestTaskQueue implements IngestTaskQueue {
619 
623  @Override
624  public IngestTask getNextTask() throws InterruptedException {
625  return IngestTasksScheduler.this.pendingDataSourceTasks.take();
626  }
627  }
628 
633  private final class FileIngestTaskQueue implements IngestTaskQueue {
634 
638  @Override
639  public IngestTask getNextTask() throws InterruptedException {
640  FileIngestTask task = IngestTasksScheduler.this.pendingFileTasks.takeFirst();
641  shuffleFileTaskQueues();
642  return task;
643  }
644 
645  }
646 
650  class IngestJobTasksSnapshot {
651 
652  private final long jobId;
653  private final long rootQueueSize;
654  private final long dirQueueSize;
655  private final long fileQueueSize;
656  private final long dsQueueSize;
657  private final long runningListSize;
658 
664  IngestJobTasksSnapshot(long jobId) {
665  this.jobId = jobId;
666  this.rootQueueSize = countTasksForJob(IngestTasksScheduler.this.rootDirectoryTasks, jobId);
667  this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryTasks, jobId);
668  this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingFileTasks, jobId);
669  this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingDataSourceTasks, jobId);
670  this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgress, jobId);
671  }
672 
679  long getJobId() {
680  return jobId;
681  }
682 
689  long getRootQueueSize() {
690  return rootQueueSize;
691  }
692 
699  long getDirectoryTasksQueueSize() {
700  return dirQueueSize;
701  }
702 
703  long getFileQueueSize() {
704  return fileQueueSize;
705  }
706 
707  long getDsQueueSize() {
708  return dsQueueSize;
709  }
710 
711  long getRunningListSize() {
712  return runningListSize;
713  }
714 
715  }
716 
717 }
TskData.TSK_DB_FILES_TYPE_ENUM getType()
TskData.TSK_FS_TYPE_ENUM getFsType()

Copyright © 2012-2015 Basis Technology. Generated on: Mon Oct 19 2015
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.