diff --git a/dsi/backends/aws.py b/dsi/backends/aws.py new file mode 100644 index 00000000..71d98dd9 --- /dev/null +++ b/dsi/backends/aws.py @@ -0,0 +1,192 @@ +import sqlite3 +import re +import subprocess +from datetime import datetime +import textwrap +import os +import textwrap +import base64 +import random + +from hashlib import sha1 +from dsi.backends.filesystem import Backend +from collections import OrderedDict + +# HPSS backend class +class AWS(Backend): + def __init__(self, region, id, aws_files, role=None): + """ + Initializes AWS backend + + 'region': The aws region + 'id': The aws id to use + `aws_files`: list of files with aws file paths + 'role': The desired role to use + """ + self.aws_info = OrderedDict() + self.aws_file_info = OrderedDict() + self.aws_info['region'] = region + self.aws_info['id'] = id + self.aws_info['role'] = role + self.aws_info['profile'] = None + for aws_file in aws_files.keys(): + self.aws_file_info[aws_file] = { + 'local_path': aws_files[aws_file], + } + + self.run_aws_saml() + + def git_commit_sha(self): + pass + + + def put(self, local_file, remote_file) -> bool: + """ + Puts a local file/directory on AWS + """ + + result = self.aws_copy(local_file, remote_file) + + return result + + def get(self, remote_file, local_file) -> bool: + + result = self.aws_copy(remote_file, local_file) + + return result + + # OLD NAME OF ingest_artifacts(). TO BE DEPRECATED IN FUTURE DSI RELEASE + def put_artifacts(self, collection, isVerbose=False): + return self.ingest_artifacts(collection, isVerbose) + + def ingest_artifacts(self, collection, isVerbose=False): + for f in self.aws_info.keys(): + self.put(self.aws_info[f]['local_path'], f) + + # DEPRECATING IN FUTURE DSI RELEASE. USE query_artifacts() + def get_artifacts(self, query, kwargs): + pass + + def query_artifacts(self, query, kwargs): + pass + + # DEPRECATING IN FUTURE DSI RELEASE. USE notebook() + def inspect_artifacts(self, kwargs): + pass + + def notebook(self, kwargs): + pass + + # DEPRECATING IN FUTURE DSI RELEASE. USE process_artifacts() + def read_to_artifacts(self, kwargs): + pass + + def process_artifacts(self, kwargs): + pass + + def find(self, query_object, kwargs): + pass + + def find_table(self, query_object, kwargs): + pass + + def find_column(self, query_object, kwargs): + pass + + def find_cell(self, query_object, kwargs): + pass + + def close(self): + pass + + def run_aws_saml(self): + command = ["aws-saml-cmd"] + arg_list = ['-r', self.aws_info['region'], '-id', self.aws_info['id']] + if self.aws_info['role'] is not None: + arg_list += ['-ro', self.aws_info['role']] + + command += arg_list + + stdout = "" + stderr = "" + returncode = -1 + try: + process = subprocess.Popen(command, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + stdout, stderr = process.communicate() + returncode = process.communicate() + except FileNotFoundError as e: + print("Error running aws-saml-cmd: %s" % e) + + print(stdout) + if len(stdout) > 0: + for line in stdout.splitlines(): + line = line.strip() + + if "Configured aws profile:" not in line: + continue + + tokens = line.split(': ') + if len(tokens) != 2: + continue + self.aws_info['profile'] = tokens[1] +# print(self.aws_info) + + return stdout, stderr, returncode + + def run_aws(self, subcmd, arg_list): + """ + Runs aws command wth the supplied subcmd and arguments + """ + + profile = self.aws_info['profile'] + command = ["aws", "--profile", profile, "s3", subcmd] + command += arg_list + + stdout = "" + stderr = "" + returncode = -1 + try: + process = subprocess.Popen(command, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + + stdout, stderr = process.communicate() + returncode = process.communicate() + except FileNotFoundError as e: + print("Error running aws: %s" % e) + + return stdout, stderr, returncode + + def aws_sync(self, source_dir, dest_dir) -> bool: + """ + Syncs a directory to or from aws + source_dir: a local directory or an s3:// path + dest_dir: a local directory or an s3:// path + + returns: True for success or False on failure + """ + + result = True + stdout, stderr, returncode = self.run_aws('sync', [source_dir, dest_dir]) + print(stdout) + if returncode: + print(stderr) + result = False + + return result + + def aws_copy(self, source_file, dest_file) -> bool: + """ + Copies an aws remote file to a local directory + source_file: a local file or an s3:// path + dest_file: a local file or an s3:// path + + returns: True for success or False on failure + """ + + result = True + stdout, stderr, returncode = self.run_aws('cp', [source_file, dest_file]) + print(stdout) + if returncode: + result = False + print(stderr) + + return result diff --git a/dsi/backends/tests/test_aws.py b/dsi/backends/tests/test_aws.py new file mode 100644 index 00000000..611700cd --- /dev/null +++ b/dsi/backends/tests/test_aws.py @@ -0,0 +1,41 @@ +import git +from collections import OrderedDict + +from dsi.backends.aws import AWS +import os + +def test_aws(): + files = { 'wildfiredata.csv': {'local_path': '/path_to/dsi/examples/wildfire/wildfiredata.csv'} } + #aws-saml-cmd -r us-gov-west-1 -id your_aws_id + store = AWS('us-gov-west-1', 'your_aws_id', files) + store.close() + assert True + # No error implies success + +def test_aws_put(): + files = { 'wildfiredata.csv': {'local_path': '/path_to/dsi/examples/wildfire/wildfiredata.csv'} } + store = AWS('us-gov-west-1', 'your_aws_id', files) + store.put(files['wildfiredata.csv']['local_path'], 's3://your_bucket_name/') + store.close() + assert True + +def test_aws_get(): + files = { 'wildfiredata.csv': {'local_path': '/path_to/dsi/examples/data/wildfiredata_copied.csv'} } + store = AWS('us-gov-west-1', 'your_aws_id', files) + store.get('s3://your_bucket_name/wildfiredata.csv', files['wildfiredata.csv']['local_path']) + store.close() + assert True + +def test_aws_sync_remote_to_local(): + files = { 'tmp': {'local_path': '/path_to/dsi/dsi/backends/tests'} } + store = AWS('us-gov-west-1', 'your_aws_id', files) + store.aws_sync('s3://your_bucket_name/tmp', files['tmp']['local_path']) + store.close() + assert True + +def test_aws_sync_local_to_remote(): + files = { 'tmp': {'local_path': '/path_to/dsi/dsi/backends/tests/tmp'} } + store = AWS('us-gov-west-1', 'your_aws_id', files) + store.aws_sync(files['tmp']['local_path'], 's3://your_bucket_name/tmp') + store.close() + assert True diff --git a/requirements.extras.txt b/requirements.extras.txt index 1c675e10..911266e1 100644 --- a/requirements.extras.txt +++ b/requirements.extras.txt @@ -4,4 +4,5 @@ nbformat>=5.10.2 graphviz>=0.20.3 opencv-python>=4.9.0.80 duckdb>=1.2.0 -xmltodict>=0.14.2 \ No newline at end of file +xmltodict>=0.14.2 +git+https://gitlab.aws.lanl.gov/cpt/lanl-saml-cloud.git@latest#egg=lanl-saml-cloud