diff --git a/dftimewolf/cli/dftimewolf_recipes.py b/dftimewolf/cli/dftimewolf_recipes.py index 57397d78f..d3b41903d 100755 --- a/dftimewolf/cli/dftimewolf_recipes.py +++ b/dftimewolf/cli/dftimewolf_recipes.py @@ -10,12 +10,14 @@ from dftimewolf import config -from dftimewolf.cli.recipes import local_plaso +from dftimewolf.cli.recipes import gcp_turbinia +from dftimewolf.cli.recipes import gcp_turbinia_import from dftimewolf.cli.recipes import grr_artifact_hosts from dftimewolf.cli.recipes import grr_flow_download from dftimewolf.cli.recipes import grr_hunt_artifacts from dftimewolf.cli.recipes import grr_hunt_file from dftimewolf.cli.recipes import grr_huntresults_plaso_timesketch +from dftimewolf.cli.recipes import local_plaso from dftimewolf.cli.recipes import timesketch_upload from dftimewolf.lib import utils @@ -26,6 +28,8 @@ from dftimewolf.lib.exporters import timesketch from dftimewolf.lib.exporters import local_filesystem from dftimewolf.lib.processors import localplaso +from dftimewolf.lib.processors import turbinia +from dftimewolf.lib.collectors.gcloud import GoogleCloudCollector from dftimewolf.lib.state import DFTimewolfState @@ -35,6 +39,7 @@ config.Config.register_module(filesystem.FilesystemCollector) config.Config.register_module(localplaso.LocalPlasoProcessor) config.Config.register_module(timesketch.TimesketchExporter) +config.Config.register_module(GoogleCloudCollector) config.Config.register_module(grr_hosts.GRRArtifactCollector) config.Config.register_module(grr_hosts.GRRFileCollector) @@ -45,6 +50,7 @@ config.Config.register_module(timesketch.TimesketchExporter) config.Config.register_module(local_filesystem.LocalFilesystemCopy) +config.Config.register_module(turbinia.TurbiniaProcessor) # Try to open config.json and load configuration data from it. ROOT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) @@ -59,6 +65,9 @@ config.Config.register_recipe(grr_hunt_file) config.Config.register_recipe(grr_huntresults_plaso_timesketch) config.Config.register_recipe(timesketch_upload) +config.Config.register_recipe(gcp_import_analysis) +config.Config.register_recipe(gcp_turbinia) +config.Config.register_recipe(gcp_turbinia_import) # TODO(tomchop) Change the print statements by a better logging / display system diff --git a/dftimewolf/cli/recipes/gcp_import_analysis.py b/dftimewolf/cli/recipes/gcp_import_analysis.py new file mode 100644 index 000000000..a1c3d52c0 --- /dev/null +++ b/dftimewolf/cli/recipes/gcp_import_analysis.py @@ -0,0 +1,65 @@ +"""A dftimewolf recipe for responding to GCP incidents. + +This recipe will copy disks from remote projects into our forensics project and +create an analysis VM to analyze it with the disk attached. + +Disks of interest can either be specified in multiple ways: + + --disk_names will copy specific disks in project, regardless of the instance + they're attached to. + --instance will copy the instance's boot disk. + --instance combined with --all_disks will copy all disks attached to a + specific instance + +Sample usage + +To copy the boot disk of "owned-instance" in "owned project": + + $ dftimewolf gcp_import_analysis owned-project --instance owned-instance + +To copy "disk1" and "disk2" in "owned-project": + + $ dftimewolf gcp_import_analysis owned-project --disks=disk1,disk2 + +To copy all disks attached to "myinstance" in "myproject": + + $ dftimewolf gcp_import_analysis myproject --instance myinstance \ + --all_disks=True +""" + +from __future__ import unicode_literals +from datetime import datetime + +contents = { + 'name': + 'gcp_import_analysis', + 'short_description': 'Copies disk from a GCP project to an analysis VM.', + 'modules': [{ + 'name': 'GoogleCloudCollector', + 'args': { + 'analysis_project_name': '@analysis_project_name', + 'remote_project_name': '@remote_project_name', + 'remote_instance_name': '@instance', + 'incident_id': '@incident_id', + 'zone': '@zone', + 'disk_names': '@disks', + 'all_disks': '@all_disks', + 'boot_disk_size': '@boot_disk_size', + }, + }] +} + +args = [ + ('remote_project_name', + 'Name of the project containing the instance / disks to copy ', None), + ('--incident_id', 'The Incident ID on which the name of the analysis VM ' + 'will be based', datetime.now().strftime("%Y%m%d%H%M%S")), + ('--instance', 'Name of the instance to analyze.', None), + ('--disks', 'Comma-separated list of disks to copy.', None), + ('--all_disks', 'Copy all disks in the designated instance. ' + 'Overrides disk_names if specified', False), + ('--analysis_project_name', 'Name of the project where the analysis VM will' + ' be created', None), + ('--boot_disk_size', 'The size of the analysis VM boot disk (in GB)', 50.0), + ('--zone', 'The GCP zone the forensics project is in', None), +] diff --git a/dftimewolf/cli/recipes/gcp_turbinia.py b/dftimewolf/cli/recipes/gcp_turbinia.py new file mode 100644 index 000000000..e028753d3 --- /dev/null +++ b/dftimewolf/cli/recipes/gcp_turbinia.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +"""Process a GCP persistent disk with Turbinia and send output to Timesketch. + +This processes a disk that is already in the project where Turbinia exists. If +you want to copy the disk from another project, use the gcp_turbinia_import +recipe. +""" + +from __future__ import unicode_literals + +_short_description = ('Processes a GCP persistent disk with Turbinia and sends ' + 'results to Timesketch.') + +contents = { + 'name': 'gcp_turbinia', + 'short_description': _short_description, + 'modules': [{ + 'name': 'TurbiniaProcessor', + 'args': { + 'disk_name': '@disk_name', + 'project': '@project', + 'zone': '@zone', + }, + }, { + 'name': 'TimesketchExporter', + 'args': { + 'endpoint': '@ts_endpoint', + 'username': '@ts_username', + 'password': '@ts_password', + 'incident_id': '@incident_id', + 'sketch_id': '@sketch_id', + } + }] +} + +args = [ + ('disk_name', 'Name of GCP persistent disk to process', None), + ('project', 'Name of GCP project disk exists in', None), + ('zone', 'The GCP zone the disk to process (and Turbinia workers) are in', + None), + ('--incident_id', 'Incident ID (used for Timesketch description)', None), + ('--sketch_id', 'Sketch to which the timeline should be added', None), +] diff --git a/dftimewolf/cli/recipes/gcp_turbinia_import.py b/dftimewolf/cli/recipes/gcp_turbinia_import.py new file mode 100644 index 000000000..9c80995bb --- /dev/null +++ b/dftimewolf/cli/recipes/gcp_turbinia_import.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +"""Imports a remote GCP persistent disk and sends to Turbinia and Timesketch. + +This copies a disk from a remote GCP project and sends to Turbinia for +processing and then sends those results to Timesketch. It will also start an +analysis VM with the attached disk. If you want to process a disk already in +the same project as Turbinia you can use the gcp_turbinia recipe. +""" + +from __future__ import unicode_literals +from datetime import datetime + +_short_description = ('Imports a remote GCP persistent disk and sends to ' + 'Turbinia and Timesketch.') + +contents = { + 'name': 'gcp_turbinia_import', + 'short_description': _short_description, + 'modules': [{ + 'name': 'GoogleCloudCollector', + 'args': { + 'analysis_project_name': '@analysis_project_name', + 'remote_project_name': '@remote_project_name', + 'remote_instance_name': '@instance', + 'incident_id': '@incident_id', + 'zone': '@zone', + 'disk_names': '@disks', + 'all_disks': '@all_disks', + 'boot_disk_size': '@boot_disk_size', + }, + }, { + 'name': 'TurbiniaProcessor', + 'args': { + 'disk_name': None, + 'project': '@analysis_project_name', + 'zone': '@zone', + }, + }, { + 'name': 'TimesketchExporter', + 'args': { + 'endpoint': '@ts_endpoint', + 'username': '@ts_username', + 'password': '@ts_password', + 'incident_id': '@incident_id', + 'sketch_id': '@sketch_id', + } + }] +} + +args = [ + ('remote_project_name', + 'Name of the project containing the instance / disks to copy ', None), + + ('--zone', 'The GCP zone the disk to process (and Turbinia workers) are in', + None), + ('--incident_id', 'Incident ID (used for Timesketch description)', + datetime.now().strftime("%Y%m%d%H%M%S")), + ('--sketch_id', 'Sketch to which the timeline should be added', None), + ('--timesketch_endpoint', 'Endpoint of the Timesketch server to use', + 'https://localhost:5000'), + ('--instance', 'Name of the instance to analyze.', None), + ('--disks', 'Comma-separated list of disks to copy.', None), + ('--all_disks', 'Copy all disks in the designated instance. ' + 'Overrides disk_names if specified', False), + ('--analysis_project_name', 'Name of the project where the analysis VM will' + ' be created', 'turbinia-external-test'), + ('--boot_disk_size', 'The size of the analysis VM boot disk (in GB)', 50.0), +] diff --git a/dftimewolf/lib/collectors/gcloud.py b/dftimewolf/lib/collectors/gcloud.py new file mode 100755 index 000000000..37086d850 --- /dev/null +++ b/dftimewolf/lib/collectors/gcloud.py @@ -0,0 +1,158 @@ +"""Creates a forensic VM and copies a GCP disk to it for anaysis.""" + +from dftimewolf.lib import module + +from googleapiclient.errors import HttpError +from oauth2client.client import AccessTokenRefreshError +from turbinia.lib import libcloudforensics + + +class GoogleCloudCollector(module.BaseModule): + """Class for Google Cloud Collector. + + Attributes: + analysis_project: The project that contains the analysis VM (instance of + libcloudforensics.GoogleCloudProject). + analysis_vm: The analysis VM on which the disk copy will be attached + (instance of libcloudforensics.GoogleComputeInstance). + incident_id: The incident ID used to name the Analysis VM (string). + disks_to_copy: A list containing the disks to copy to the analysis project + (instances of libcloudforensics.GoogleComputeDisk). + """ + + def __init__(self, state): + super(GoogleCloudCollector, self).__init__(state) + self.analysis_project = None + self.analysis_vm = None + self.incident_id = None + self.disks_to_copy = [] + + def cleanup(self): + pass + + def process(self): + """Copy a disk to the analysis project. + + Returns: + Array containing a tuple of the analysis VM's name and name of the new + copy of the disk. + """ + for disk in self.disks_to_copy: + print "Disk copy of {0:s} started...".format(disk.name) + snapshot = disk.snapshot() + new_disk = self.analysis_project.create_disk_from_snapshot( + snapshot, disk_name_prefix="incident" + self.incident_id) + self.analysis_vm.attach_disk(new_disk) + snapshot.delete() + print "Disk {0:s} succesfully copied to {1:s}".format( + disk.name, new_disk.name) + self.state.output.append((self.analysis_vm.name, new_disk)) + + # pylint: disable=arguments-differ + def setup(self, + analysis_project_name, + remote_project_name, + incident_id, + zone, + boot_disk_size, + remote_instance_name=None, + disk_names=None, + all_disks=False): + """Sets up a Google cloud collector. + + This method creates and starts an analysis VM in the analysis project and + selects disks to copy from the remote project. + + If disk_names is specified, it will copy the corresponding disks from the + project, ignoring disks belonging to any specific instances. + + If remote_instance_name is specified, two behaviors are possible: + - If no other parameters are specified, it will select the instance's boot + disk + - if all_disks is set to True, it will select all disks in the project + that are attached to the instance + + disk_names takes precedence over instance_names + + Args: + analysis_project_name: The name of the project that contains the analysis + VM (string). + remote_project_name: The name of the remote project where the disks must + be copied from (string). + incident_id: The incident ID on which the name of the analysis VM will be + based (string). + zone: The sone in which new resources should be created (string). + boot_disk_size: The size of the analysis VM boot disk (in GB) (float). + remote_instance_name: The name of the instance in the remote project + containing the disks to be copied (string). + disk_names: Comma separated string with disk names to copy (string). + all_disks: Copy all disks attached to the source instance (bool). + """ + + disk_names = disk_names.split(",") if disk_names else [] + + self.analysis_project = libcloudforensics.GoogleCloudProject( + analysis_project_name, default_zone=zone) + remote_project = libcloudforensics.GoogleCloudProject( + remote_project_name) + + if not (remote_instance_name or disk_names): + self.state.add_error( + "You need to specify at least an instance name or disks to copy", + critical=True) + return + + self.incident_id = incident_id + analysis_vm_name = "gcp-forensics-vm-{0:s}".format(incident_id) + print "Your analysis VM will be: {0:s}".format(analysis_vm_name) + print "Complimentary gcloud command:" + print "gcloud compute ssh --project {0:s} {1:s} --zone {2:s}".format( + analysis_project_name, + analysis_vm_name, + zone) + + try: + self.analysis_vm, _ = libcloudforensics.start_analysis_vm( + self.analysis_project.project_id, analysis_vm_name, zone, + boot_disk_size) + + if disk_names: + for name in disk_names: + try: + self.disks_to_copy.append(remote_project.get_disk(name)) + except RuntimeError: + self.state.add_error( + "Disk '{0:s}' was not found in project {1:s}".format( + name, remote_project_name), + critical=True) + break + + elif remote_instance_name: + remote_instance = remote_project.get_instance( + remote_instance_name) + + if all_disks: + self.disks_to_copy = [ + remote_project.get_disk(disk_name) + for disk_name in remote_instance.list_disks() + ] + else: + self.disks_to_copy = [remote_instance.get_boot_disk()] + + if not self.disks_to_copy: + self.state.add_error("Could not find any disks to copy", + critical=True) + + except AccessTokenRefreshError as err: + self.state.add_error("Something is wrong with your gcloud access token.") + self.state.add_error(err, critical=True) + + except HttpError as err: + if err.resp.status == 403: + self.state.add_error( + "Make sure you have the appropriate permissions on the project") + if err.resp.status == 404: + self.state.add_error( + "GCP resource not found. Maybe a typo in the project / instance / " + "disk name?") + self.state.add_error(err, critical=True) diff --git a/dftimewolf/lib/processors/turbinia.py b/dftimewolf/lib/processors/turbinia.py new file mode 100644 index 000000000..3e190eaec --- /dev/null +++ b/dftimewolf/lib/processors/turbinia.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +"""Processes cloud artifacts using a remote Turbinia instance.""" +from __future__ import unicode_literals +from __future__ import absolute_import + +import os +import tempfile + +from dftimewolf.lib.module import BaseModule + +from turbinia import client as turbinia_client +from turbinia import config +from turbinia import evidence +from turbinia import output_manager +from turbinia import TurbiniaException +from turbinia.message import TurbiniaRequest + + +class TurbiniaProcessor(BaseModule): + """Process cloud disks with remote Turbinia instance. + + Attributes: + client: A TurbiniaClient object + disk_name: Name of the disk to process + instance (string): The name of the Turbinia instance + project: The project containing the disk to process + region (string): The region Turbinia is in + zone: The zone containing the disk to process + _output_path: The path to output files + """ + + def __init__(self, state): + """Initialize the Turbinia artifact processor object. + + Args: + state: The dfTimewolf state object + """ + super(TurbiniaProcessor, self).__init__(state) + self.client = None + self.disk_name = None + self.instance = None + self.project = None + self.region = None + self.zone = None + self._output_path = None + + def setup(self, disk_name, project, zone): # pylint: disable=arguments-differ + """Sets up the object attributes. + + Args: + disk_name: Name of the disk to process + project: The project containing the disk to process + zone: The zone containing the disk to process + """ + if self.state.input and not disk_name: + _, disk = self.state.input[0] + disk_name = disk.name + print('Using disk {0:s} from previous collector'.format(disk_name)) + + if disk_name is None or project is None or zone is None: + self.state.add_error( + 'disk_name, project or zone are not all specified, bailing out', + critical=True) + return + self.disk_name = disk_name + self.project = project + self.zone = zone + self._output_path = tempfile.mkdtemp() + + try: + config.LoadConfig() + self.region = config.TURBINIA_REGION + self.instance = config.PUBSUB_TOPIC + if config.PROJECT != self.project: + self.state.add_error( + 'Specified project {0:s} does not match Turbinia configured ' + 'project {1:s}. Use gcp_forensics_import recipe to copy the disk ' + 'into the same project.'.format( + self.project, config.PROJECT), critical=True) + return + self.client = turbinia_client.TurbiniaClient() + except TurbiniaException as e: + self.state.add_error(e, critical=True) + + def cleanup(self): + pass + + def process(self): + """Process files with Turbinia.""" + log_file_path = os.path.join(self._output_path, 'turbinia.log') + print('Turbinia log file: {0:s}'.format(log_file_path)) + + evidence_ = evidence.GoogleCloudDisk( + disk_name=self.disk_name, project=self.project, zone=self.zone) + request = TurbiniaRequest() + request.evidence.append(evidence_) + + try: + print('Creating Turbinia request {0:s} with Evidence {1:s}'.format( + request.request_id, evidence_.name)) + self.client.send_request(request) + print('Waiting for Turbinia request {0:s} to complete'.format( + request.request_id)) + self.client.wait_for_request( + instance=self.instance, project=self.project, region=self.region, + request_id=request.request_id) + task_data = self.client.get_task_data( + instance=self.instance, project=self.project, region=self.region, + request_id=request.request_id) + print self.client.format_task_status( + instance=self.instance, project=self.project, region=self.region, + request_id=request.request_id, all_fields=True) + except TurbiniaException as e: + self.state.add_error(e, critical=True) + return + + # This finds all .plaso files in the Turbinia output, and determines if they + # are local or remote (it's possible this will be running against a local + # instance of Turbinia). + local_paths = [] + gs_paths = [] + for task in task_data: + for path in task.get('saved_paths', []): + if path.startswith('/') and path.endswith('.plaso'): + local_paths.append(path) + if path.startswith('gs://') and path.endswith('.plaso'): + gs_paths.append(path) + + if not local_paths and not gs_paths: + self.state.add_error( + 'No .plaso files found in Turbinia output.', critical=True) + return + + # Any local .plaso files that exist we can add immediately to the output + self.state.output = [(p, p) for p in local_paths if os.path.exists(p)] + + # For files remote in GCS we copy each plaso file back from GCS and then add + # to output paths + for path in gs_paths: + local_path = None + try: + output_writer = output_manager.GCSOutputWriter( + path, local_output_dir=self._output_path) + local_path = output_writer.copy_from(path) + except TurbiniaException as e: + self.state.add_error(e, critical=True) + return + + if local_path: + self.state.output.append((path, local_path)) + + if not self.state.output: + self.state.add_error('No .plaso files could be found.', critical=True) diff --git a/tests/lib/processors/turbinia.py b/tests/lib/processors/turbinia.py new file mode 100644 index 000000000..1ed8735f5 --- /dev/null +++ b/tests/lib/processors/turbinia.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Tests the Turbinia processor.""" + +from __future__ import unicode_literals + +import unittest + +from dftimewolf.lib import state +from dftimewolf.lib.processors import turbinia + + +class TurbiniaProcessorTest(unittest.TestCase): + """Tests for the Turbinia processor.""" + + def testInitialization(self): + """Tests that the processor can be initialized.""" + test_state = state.DFTimewolfState() + turbinia_processor = turbinia.TurbiniaProcessor(test_state) + self.assertIsNotNone(turbinia_processor) + + +if __name__ == '__main__': + unittest.main()