From d422a3434ef56258cff89f3e0e7648ad13a75cda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Czuba?= Date: Tue, 16 Nov 2021 16:46:20 +0100 Subject: [PATCH] Add set storage class option --- README.md | 2 ++ s3_tar/cli.py | 9 +++++++++ s3_tar/s3_mpu.py | 4 +++- s3_tar/s3_tar.py | 5 ++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4fd7924..3332d00 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,8 @@ optional arguments: ADVANCED: Max retries for each request the s3 client makes --part-size-multiplier PART_SIZE_MULTIPLIER ADVANCED: Multiplied by 5MB to set the max size of each upload chunk + --storage-class STORAGE_CLASS + ADVANCED: Storage class selector (Defaults to 'STANDARD', see s3 documentation for valid choices) ``` diff --git a/s3_tar/cli.py b/s3_tar/cli.py index 3dee1fb..d1594f2 100644 --- a/s3_tar/cli.py +++ b/s3_tar/cli.py @@ -87,6 +87,14 @@ def create_parser(): type=int, default=10, ) + parser.add_argument( + "--storage-class", + help=("ADVANCED: Storage class selector" + " Valid choices are: STANDARD | REDUCED_REDUNDANCY | STANDARD_IA" + " | ONEZONE_IA | INTELLIGENT_TIERING | GLACIER | DEEP_ARCHIVE." + " Defaults to 'STANDARD'"), + default='STANDARD', + ) return parser @@ -105,6 +113,7 @@ def cli(): allow_dups=args.allow_dups, s3_max_retries=args.s3_max_retries, part_size_multiplier=args.part_size_multiplier, + storage_class=args.storage_class, ) # pragma: no cover job.add_files( args.folder, diff --git a/s3_tar/s3_mpu.py b/s3_tar/s3_mpu.py index 1c3b46e..a6a426b 100644 --- a/s3_tar/s3_mpu.py +++ b/s3_tar/s3_mpu.py @@ -5,16 +5,18 @@ class S3MPU: - def __init__(self, s3, target_bucket, target_key): + def __init__(self, s3, target_bucket, target_key, storage_class): self.s3 = s3 self.target_bucket = target_bucket self.target_key = target_key + self.storage_class = storage_class self.parts_mapping = [] logger.info("Creating file {}".format(self.target_key)) self.resp = self.s3.create_multipart_upload( Bucket=self.target_bucket, Key=self.target_key, + StorageClass=self.storage_class, ) logger.debug("Multipart upload start: {}".format(self.resp)) diff --git a/s3_tar/s3_tar.py b/s3_tar/s3_tar.py index 6bf4de4..af71380 100644 --- a/s3_tar/s3_tar.py +++ b/s3_tar/s3_tar.py @@ -22,6 +22,7 @@ def __init__(self, source_bucket, target_key, allow_dups=False, s3_max_retries=4, part_size_multiplier=None, + storage_class='STANDARD', session=boto3.session.Session()): self.allow_dups = allow_dups self.source_bucket = source_bucket @@ -67,6 +68,8 @@ def __init__(self, source_bucket, target_key, self.part_size_multiplier = 10 self.part_size_multiplier = part_size_multiplier + self.storage_class = storage_class + self.all_keys = set() # Keys the user adds self.keys_to_delete = set() # Keys to delete on cleanup self.remove_keys = remove_keys @@ -132,7 +135,7 @@ def _new_file_upload(self, file_number): result_filepath = self._add_file_number(file_number) # Start multipart upload - mpu = S3MPU(self.s3, self.target_bucket, result_filepath) + mpu = S3MPU(self.s3, self.target_bucket, result_filepath, self.storage_class) current_file_size = 0 # If out of files or min size is met, then complete file