aarontp · aarontp · Jul 31, 2018 · Jul 31, 2018 · Aug 1, 2018 · Aug 1, 2018
diff --git a/dftimewolf/cli/dftimewolf_recipes.py b/dftimewolf/cli/dftimewolf_recipes.py
@@ -10,12 +10,14 @@
 
 from dftimewolf import config
 
-from dftimewolf.cli.recipes import local_plaso
+from dftimewolf.cli.recipes import gcp_turbinia
+from dftimewolf.cli.recipes import gcp_turbinia_import
 from dftimewolf.cli.recipes import grr_artifact_hosts
 from dftimewolf.cli.recipes import grr_flow_download
 from dftimewolf.cli.recipes import grr_hunt_artifacts
 from dftimewolf.cli.recipes import grr_hunt_file
 from dftimewolf.cli.recipes import grr_huntresults_plaso_timesketch
+from dftimewolf.cli.recipes import local_plaso
 from dftimewolf.cli.recipes import timesketch_upload
 
 from dftimewolf.lib import utils
@@ -26,6 +28,8 @@
 from dftimewolf.lib.exporters import timesketch
 from dftimewolf.lib.exporters import local_filesystem
 from dftimewolf.lib.processors import localplaso
+from dftimewolf.lib.processors import turbinia
+from dftimewolf.lib.collectors.gcloud import GoogleCloudCollector
 
 from dftimewolf.lib.state import DFTimewolfState
 
@@ -35,6 +39,7 @@
 config.Config.register_module(filesystem.FilesystemCollector)
 config.Config.register_module(localplaso.LocalPlasoProcessor)
 config.Config.register_module(timesketch.TimesketchExporter)
+config.Config.register_module(GoogleCloudCollector)
 
 config.Config.register_module(grr_hosts.GRRArtifactCollector)
 config.Config.register_module(grr_hosts.GRRFileCollector)
@@ -45,6 +50,7 @@
 
 config.Config.register_module(timesketch.TimesketchExporter)
 config.Config.register_module(local_filesystem.LocalFilesystemCopy)
+config.Config.register_module(turbinia.TurbiniaProcessor)
 
 # Try to open config.json and load configuration data from it.
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
@@ -59,6 +65,9 @@
 config.Config.register_recipe(grr_hunt_file)
 config.Config.register_recipe(grr_huntresults_plaso_timesketch)
 config.Config.register_recipe(timesketch_upload)
+config.Config.register_recipe(gcp_import_analysis)
+config.Config.register_recipe(gcp_turbinia)
+config.Config.register_recipe(gcp_turbinia_import)
 
 # TODO(tomchop) Change the print statements by a better logging / display system
 

diff --git a/dftimewolf/cli/recipes/gcp_import_analysis.py b/dftimewolf/cli/recipes/gcp_import_analysis.py
@@ -0,0 +1,65 @@
+"""A dftimewolf recipe for responding to GCP incidents.
+
+This recipe will copy disks from remote projects into our forensics project and
+create an analysis VM to analyze it with the disk attached.
+
+Disks of interest can either be specified in multiple ways:
+
+  --disk_names will copy specific disks in project, regardless of the instance
+        they're attached to.
+  --instance will copy the instance's boot disk.
+  --instance combined with --all_disks will copy all disks attached to a
+        specific instance
+
+Sample usage
+
+To copy the boot disk of "owned-instance" in "owned project":
+
+  $ dftimewolf gcp_import_analysis owned-project --instance owned-instance
+
+To copy "disk1" and "disk2" in "owned-project":
+
+  $ dftimewolf gcp_import_analysis owned-project --disks=disk1,disk2
+
+To copy all disks attached to "myinstance" in "myproject":
+
+  $ dftimewolf gcp_import_analysis myproject --instance myinstance \
+      --all_disks=True
+"""
+
+from __future__ import unicode_literals
+from datetime import datetime
+
+contents = {
+    'name':
+        'gcp_import_analysis',
+    'short_description': 'Copies disk from a GCP project to an analysis VM.',
+    'modules': [{
+        'name': 'GoogleCloudCollector',
+        'args': {
+            'analysis_project_name': '@analysis_project_name',
+            'remote_project_name': '@remote_project_name',
+            'remote_instance_name': '@instance',
+            'incident_id': '@incident_id',
+            'zone': '@zone',
+            'disk_names': '@disks',
+            'all_disks': '@all_disks',
+            'boot_disk_size': '@boot_disk_size',
+        },
+    }]
+}
+
+args = [
+    ('remote_project_name',
+     'Name of the project containing the instance / disks to copy ', None),
+    ('--incident_id', 'The Incident ID on which the name of the analysis VM '
+     'will be based', datetime.now().strftime("%Y%m%d%H%M%S")),
+    ('--instance', 'Name of the instance to analyze.', None),
+    ('--disks', 'Comma-separated list of disks to copy.', None),
+    ('--all_disks', 'Copy all disks in the designated instance. '
+                    'Overrides disk_names if specified', False),
+    ('--analysis_project_name', 'Name of the project where the analysis VM will'
+                                ' be created', None),
+    ('--boot_disk_size', 'The size of the analysis VM boot disk (in GB)', 50.0),
+    ('--zone', 'The GCP zone the forensics project is in', None),
+]
diff --git a/dftimewolf/cli/recipes/gcp_turbinia.py b/dftimewolf/cli/recipes/gcp_turbinia.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+"""Process a GCP persistent disk with Turbinia and send output to Timesketch.
+
+This processes a disk that is already in the project where Turbinia exists.  If
+you want to copy the disk from another project, use the gcp_turbinia_import
+recipe.
+"""
+
+from __future__ import unicode_literals
+
+_short_description = ('Processes a GCP persistent disk with Turbinia and sends '
+                      'results to Timesketch.')
+
+contents = {
+    'name': 'gcp_turbinia',
+    'short_description': _short_description,
+    'modules': [{
+        'name': 'TurbiniaProcessor',
+        'args': {
+            'disk_name': '@disk_name',
+            'project': '@project',
+            'zone': '@zone',
+        },
+    }, {
+        'name': 'TimesketchExporter',
+        'args': {
+            'endpoint': '@ts_endpoint',
+            'username': '@ts_username',
+            'password': '@ts_password',
+            'incident_id': '@incident_id',
+            'sketch_id': '@sketch_id',
+        }
+    }]
+}
+
+args = [
+    ('disk_name', 'Name of GCP persistent disk to process', None),
+    ('project', 'Name of GCP project disk exists in', None),
+    ('zone', 'The GCP zone the disk to process (and Turbinia workers) are in',
+     None),
+    ('--incident_id', 'Incident ID (used for Timesketch description)', None),
+    ('--sketch_id', 'Sketch to which the timeline should be added', None),
+]
diff --git a/dftimewolf/cli/recipes/gcp_turbinia_import.py b/dftimewolf/cli/recipes/gcp_turbinia_import.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+"""Imports a remote GCP persistent disk and sends to Turbinia and Timesketch.
+
+This copies a disk from a remote GCP project and sends to Turbinia for
+processing and then sends those results to Timesketch.  It will also start an
+analysis VM with the attached disk.  If you want to process a disk already in
+the same project as Turbinia you can use the gcp_turbinia recipe.
+"""
+
+from __future__ import unicode_literals
+from datetime import datetime
+
+_short_description = ('Imports a remote GCP persistent disk and sends to '
+                      'Turbinia and Timesketch.')
+
+contents = {
+    'name': 'gcp_turbinia_import',
+    'short_description': _short_description,
+    'modules': [{
+        'name': 'GoogleCloudCollector',
+        'args': {
+            'analysis_project_name': '@analysis_project_name',
+            'remote_project_name': '@remote_project_name',
+            'remote_instance_name': '@instance',
+            'incident_id': '@incident_id',
+            'zone': '@zone',
+            'disk_names': '@disks',
+            'all_disks': '@all_disks',
+            'boot_disk_size': '@boot_disk_size',
+        },
+    }, {
+        'name': 'TurbiniaProcessor',
+        'args': {
+            'disk_name': None,
+            'project': '@analysis_project_name',
+            'zone': '@zone',
+        },
+    }, {
+        'name': 'TimesketchExporter',
+        'args': {
+            'endpoint': '@ts_endpoint',
+            'username': '@ts_username',
+            'password': '@ts_password',
+            'incident_id': '@incident_id',
+            'sketch_id': '@sketch_id',
+        }
+    }]
+}
+
+args = [
+    ('remote_project_name',
+     'Name of the project containing the instance / disks to copy ', None),
+
+    ('--zone', 'The GCP zone the disk to process (and Turbinia workers) are in',
+     None),
+    ('--incident_id', 'Incident ID (used for Timesketch description)',
+     datetime.now().strftime("%Y%m%d%H%M%S")),
+    ('--sketch_id', 'Sketch to which the timeline should be added', None),
+    ('--timesketch_endpoint', 'Endpoint of the Timesketch server to use',
+     'https://localhost:5000'),
+    ('--instance', 'Name of the instance to analyze.', None),
+    ('--disks', 'Comma-separated list of disks to copy.', None),
+    ('--all_disks', 'Copy all disks in the designated instance. '
+                    'Overrides disk_names if specified', False),
+    ('--analysis_project_name', 'Name of the project where the analysis VM will'
+                                ' be created', 'turbinia-external-test'),
+    ('--boot_disk_size', 'The size of the analysis VM boot disk (in GB)', 50.0),
+]
diff --git a/dftimewolf/lib/collectors/gcloud.py b/dftimewolf/lib/collectors/gcloud.py
@@ -0,0 +1,158 @@
+"""Creates a forensic VM and copies a GCP disk to it for anaysis."""
+
+from dftimewolf.lib import module
+
+from googleapiclient.errors import HttpError
+from oauth2client.client import AccessTokenRefreshError
+from turbinia.lib import libcloudforensics
+
+
+class GoogleCloudCollector(module.BaseModule):
+  """Class for Google Cloud Collector.
+
+  Attributes:
+    analysis_project: The project that contains the analysis VM (instance of
+        libcloudforensics.GoogleCloudProject).
+    analysis_vm: The analysis VM on which the disk copy will be attached
+        (instance of libcloudforensics.GoogleComputeInstance).
+    incident_id: The incident ID used to name the Analysis VM (string).
+    disks_to_copy: A list containing the disks to copy to the analysis project
+        (instances of libcloudforensics.GoogleComputeDisk).
+  """
+
+  def __init__(self, state):
+    super(GoogleCloudCollector, self).__init__(state)
+    self.analysis_project = None
+    self.analysis_vm = None
+    self.incident_id = None
+    self.disks_to_copy = []
+
+  def cleanup(self):
+    pass
+
+  def process(self):
+    """Copy a disk to the analysis project.
+
+    Returns:
+      Array containing a tuple of the analysis VM's name and name of the new
+      copy of the disk.
+    """
+    for disk in self.disks_to_copy:
+      print "Disk copy of {0:s} started...".format(disk.name)
+      snapshot = disk.snapshot()
+      new_disk = self.analysis_project.create_disk_from_snapshot(
+          snapshot, disk_name_prefix="incident" + self.incident_id)
+      self.analysis_vm.attach_disk(new_disk)
+      snapshot.delete()
+      print "Disk {0:s} succesfully copied to {1:s}".format(
+          disk.name, new_disk.name)
+      self.state.output.append((self.analysis_vm.name, new_disk))
+
+  # pylint: disable=arguments-differ
+  def setup(self,
+            analysis_project_name,
+            remote_project_name,
+            incident_id,
+            zone,
+            boot_disk_size,
+            remote_instance_name=None,
+            disk_names=None,
+            all_disks=False):
+    """Sets up a Google cloud collector.
+
+    This method creates and starts an analysis VM in the analysis project and
+    selects disks to copy from the remote project.
+
+    If disk_names is specified, it will copy the corresponding disks from the
+    project, ignoring disks belonging to any specific instances.
+
+    If remote_instance_name is specified, two behaviors are possible:
+      - If no other parameters are specified, it will select the instance's boot
+        disk
+      - if all_disks is set to True, it will select all disks in the project
+        that are attached to the instance
+
+    disk_names takes precedence over instance_names
+
+    Args:
+      analysis_project_name: The name of the project that contains the analysis
+          VM (string).
+      remote_project_name: The name of the remote project where the disks must
+          be copied from (string).
+      incident_id: The incident ID on which the name of the analysis VM will be
+          based (string).
+      zone: The sone in which new resources should be created (string).
+      boot_disk_size: The size of the analysis VM boot disk (in GB) (float).
+      remote_instance_name: The name of the instance in the remote project
+          containing the disks to be copied (string).
+      disk_names: Comma separated string with disk names to copy (string).
+      all_disks: Copy all disks attached to the source instance (bool).
+    """
+
+    disk_names = disk_names.split(",") if disk_names else []
+
+    self.analysis_project = libcloudforensics.GoogleCloudProject(
+        analysis_project_name, default_zone=zone)
+    remote_project = libcloudforensics.GoogleCloudProject(
+        remote_project_name)
+
+    if not (remote_instance_name or disk_names):
+      self.state.add_error(
+          "You need to specify at least an instance name or disks to copy",
+          critical=True)
+      return
+
+    self.incident_id = incident_id
+    analysis_vm_name = "gcp-forensics-vm-{0:s}".format(incident_id)
+    print "Your analysis VM will be: {0:s}".format(analysis_vm_name)
+    print "Complimentary gcloud command:"
+    print "gcloud compute ssh --project {0:s} {1:s} --zone {2:s}".format(
+        analysis_project_name,
+        analysis_vm_name,
+        zone)
+
+    try:
+      self.analysis_vm, _ = libcloudforensics.start_analysis_vm(
+          self.analysis_project.project_id, analysis_vm_name, zone,
+          boot_disk_size)
+
+      if disk_names:
+        for name in disk_names:
+          try:
+            self.disks_to_copy.append(remote_project.get_disk(name))
+          except RuntimeError:
+            self.state.add_error(
+                "Disk '{0:s}' was not found in project {1:s}".format(
+                    name, remote_project_name),
+                critical=True)
+            break
+
+      elif remote_instance_name:
+        remote_instance = remote_project.get_instance(
+            remote_instance_name)
+
+        if all_disks:
+          self.disks_to_copy = [
+              remote_project.get_disk(disk_name)
+              for disk_name in remote_instance.list_disks()
+          ]
+        else:
+          self.disks_to_copy = [remote_instance.get_boot_disk()]
+
+        if not self.disks_to_copy:
+          self.state.add_error("Could not find any disks to copy",
+                               critical=True)
+
+    except AccessTokenRefreshError as err:
+      self.state.add_error("Something is wrong with your gcloud access token.")
+      self.state.add_error(err, critical=True)
+
+    except HttpError as err:
+      if err.resp.status == 403:
+        self.state.add_error(
+            "Make sure you have the appropriate permissions on the project")
+      if err.resp.status == 404:
+        self.state.add_error(
+            "GCP resource not found. Maybe a typo in the project / instance / "
+            "disk name?")
+      self.state.add_error(err, critical=True)