diff --git a/config_table.tf b/config_table.tf index ce5c858..9f0ee65 100644 --- a/config_table.tf +++ b/config_table.tf @@ -13,12 +13,12 @@ resource "aws_dynamodb_table" "loader_config" { } resource "aws_dynamodb_table_item" "load_config_full_items" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) + for_each = toset(local.table_names) table_name = aws_dynamodb_table.loader_config.name hash_key = aws_dynamodb_table.loader_config.hash_key - item = data.template_file.loader_config_full_item[each.key].rendered + item = local.loader_config_full_items[each.key] lifecycle { ignore_changes = [ @@ -33,39 +33,13 @@ resource "aws_dynamodb_table_item" "load_config_full_items" { } } -data "template_file" "loader_config_full_item" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) - - template = "${file("${path.module}/config_item.json")}" - vars = { - kind = "full" - bulk_data_table = each.key - redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint - redshift_database_name: var.redshift_database_name - redshift_port = data.aws_redshift_cluster.sync_data_target.port - redshift_username = var.redshift_username - redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob - schema = var.redshift_schema - s3_bucket = "agra-data-exports-${var.controlshift_environment}" - manifest_bucket = aws_s3_bucket.manifest.bucket - manifest_prefix = var.manifest_prefix - failed_manifest_prefix = var.failed_manifest_prefix - success_topic_arn = aws_sns_topic.success_sns_topic.arn - failure_topic_arn = aws_sns_topic.failure_sns_topic.arn - current_batch = random_id.current_batch.b64_url - column_list = data.http.column_list[each.key].body - truncate_target = true - compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") - } -} - resource "aws_dynamodb_table_item" "load_config_incremental_items" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) + for_each = toset(local.table_names) table_name = aws_dynamodb_table.loader_config.name hash_key = aws_dynamodb_table.loader_config.hash_key - item = data.template_file.loader_config_incremental_item[each.key].rendered + item = local.loader_config_incremental_items[each.key] lifecycle { ignore_changes = [ @@ -80,29 +54,53 @@ resource "aws_dynamodb_table_item" "load_config_incremental_items" { } } -data "template_file" "loader_config_incremental_item" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) - - template = "${file("${path.module}/config_item.json")}" - vars = { - kind = "incremental" - bulk_data_table = each.key - redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint - redshift_database_name: var.redshift_database_name - redshift_port = data.aws_redshift_cluster.sync_data_target.port - redshift_username = var.redshift_username - redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob - schema = var.redshift_schema - s3_bucket = "agra-data-exports-${var.controlshift_environment}" - manifest_bucket = aws_s3_bucket.manifest.bucket - manifest_prefix = var.manifest_prefix - failed_manifest_prefix = var.failed_manifest_prefix - success_topic_arn = aws_sns_topic.success_sns_topic.arn - failure_topic_arn = aws_sns_topic.failure_sns_topic.arn - current_batch = random_id.current_batch.b64_url - column_list = data.http.column_list[each.key].body - truncate_target = false - compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") +locals { + table_names = [for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]] + + loader_config_full_items = { + for name in local.table_names : name => templatefile("${path.module}/config_item.json", { + kind = "full" + bulk_data_table = name + redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint + redshift_database_name = var.redshift_database_name + redshift_port = data.aws_redshift_cluster.sync_data_target.port + redshift_username = var.redshift_username + redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob + schema = var.redshift_schema + s3_bucket = "agra-data-exports-${var.controlshift_environment}" + manifest_bucket = aws_s3_bucket.manifest.bucket + manifest_prefix = var.manifest_prefix + failed_manifest_prefix = var.failed_manifest_prefix + success_topic_arn = aws_sns_topic.success_sns_topic.arn + failure_topic_arn = aws_sns_topic.failure_sns_topic.arn + current_batch = random_id.current_batch.b64_url + column_list = data.http.column_list[name].body + truncate_target = true + compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") + }) + } + + loader_config_incremental_items = { + for name in local.table_names : name => templatefile("${path.module}/config_item.json", { + kind = "incremental" + bulk_data_table = name + redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint + redshift_database_name = var.redshift_database_name + redshift_port = data.aws_redshift_cluster.sync_data_target.port + redshift_username = var.redshift_username + redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob + schema = var.redshift_schema + s3_bucket = "agra-data-exports-${var.controlshift_environment}" + manifest_bucket = aws_s3_bucket.manifest.bucket + manifest_prefix = var.manifest_prefix + failed_manifest_prefix = var.failed_manifest_prefix + success_topic_arn = aws_sns_topic.success_sns_topic.arn + failure_topic_arn = aws_sns_topic.failure_sns_topic.arn + current_batch = random_id.current_batch.b64_url + column_list = data.http.column_list[name].body + truncate_target = false + compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") + }) } } @@ -134,11 +132,11 @@ data "http" "bulk_data_schemas" { } locals { - parsed_bulk_data_schemas = jsondecode(data.http.bulk_data_schemas.body) + parsed_bulk_data_schemas = jsondecode(data.http.bulk_data_schemas.response_body) } data "http" "column_list" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) + for_each = toset(local.table_names) url = "https://${var.controlshift_hostname}/api/bulk_data/schema/columns?table=${each.key}" } diff --git a/glue_job.tf b/glue_job.tf index 15bea02..e258101 100644 --- a/glue_job.tf +++ b/glue_job.tf @@ -26,45 +26,69 @@ resource "aws_glue_crawler" "signatures_crawler" { resource "aws_s3_bucket" "glue_resources" { bucket = var.glue_scripts_bucket_name +} + +# Ownership controls block is required to support ACLs. +resource "aws_s3_bucket_ownership_controls" "glue_resources" { + bucket = aws_s3_bucket.glue_resources.id + rule { + object_ownership = "ObjectWriter" + } +} + +resource "aws_s3_bucket_acl" "glue_resources" { + depends_on = [aws_s3_bucket_ownership_controls.glue_resources] + bucket = aws_s3_bucket.glue_resources.id acl = "private" - server_side_encryption_configuration { - rule { - apply_server_side_encryption_by_default { - sse_algorithm = "AES256" - } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "glue_resources" { + bucket = aws_s3_bucket.glue_resources.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" } } +} + +resource "aws_s3_bucket_lifecycle_configuration" "glue_resources" { + bucket = aws_s3_bucket.glue_resources.id + + rule { + id = "Remove temp files over a week old" + status = "Enabled" - lifecycle_rule { - id = "Remove temp files over a week old" - abort_incomplete_multipart_upload_days = 0 - enabled = true - prefix = "production/temp/" + filter { + prefix = "production/temp/" + } expiration { days = 7 - expired_object_delete_marker = false + } + + abort_incomplete_multipart_upload { + days_after_initiation = 7 # Note: must be greater than 0 } } } -data "template_file" "signatures_script" { - template = file("${path.module}/templates/signatures_job.py.tpl") - vars = { +locals { + signatures_script = templatefile("${path.module}/templates/signatures_job.py.tpl", { catalog_database_name = aws_glue_catalog_database.catalog_db.name redshift_database_name = var.redshift_database_name redshift_schema = var.redshift_schema redshift_connection_name = aws_glue_connection.redshift_connection.name - } + }) } -resource "aws_s3_bucket_object" "signatures_script" { +resource "aws_s3_object" "signatures_script" { bucket = aws_s3_bucket.glue_resources.id key = "${var.controlshift_environment}/signatures_job.py" acl = "private" - content = data.template_file.signatures_script.rendered + content = local.signatures_script } resource "aws_iam_role" "glue_service_role" { diff --git a/s3.tf b/s3.tf index 359fa29..b982978 100644 --- a/s3.tf +++ b/s3.tf @@ -7,29 +7,55 @@ provider "aws" { resource "aws_s3_bucket" "manifest" { provider = aws.controlshift bucket = var.manifest_bucket_name - acl = "private" - server_side_encryption_configuration { - rule { - apply_server_side_encryption_by_default { - sse_algorithm = "AES256" - } - } - } tags = { Name = "ControlShift puts import manifests here" } +} + +# Ownership controls block is required to support ACLs. +resource "aws_s3_bucket_ownership_controls" "manifest" { + provider = aws.controlshift + bucket = aws_s3_bucket.manifest.id + rule { + object_ownership = "ObjectWriter" + } +} + +resource "aws_s3_bucket_lifecycle_configuration" "manifest" { + provider = aws.controlshift + bucket = aws_s3_bucket.manifest.id # expire the ingested manifests after 5 days after they have been processed to save disk space while providing enough # time to analyze things that might have gone wrong. - lifecycle_rule { - id = "expire-manifests" - enabled = true + rule { + id = "expire-manifests" + status = "Enabled" expiration { days = 5 } + + # Best practice: filter is now required inside the rule block + filter {} } } +resource "aws_s3_bucket_acl" "manifest" { + provider = aws.controlshift + depends_on = [aws_s3_bucket_ownership_controls.manifest] + + bucket = aws_s3_bucket.manifest.id + acl = "private" +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "manifest" { + provider = aws.controlshift + bucket = aws_s3_bucket.manifest.id + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} diff --git a/versions.tf b/versions.tf index 5c6fd54..9fa1eee 100644 --- a/versions.tf +++ b/versions.tf @@ -1,5 +1,5 @@ terraform { - required_version = ">= 0.13" + required_version = ">= 1.4.5" required_providers { archive = { source = "hashicorp/archive" @@ -14,8 +14,5 @@ terraform { random = { source = "hashicorp/random" } - template = { - source = "hashicorp/template" - } } }