Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 53 additions & 55 deletions config_table.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ resource "aws_dynamodb_table" "loader_config" {
}

resource "aws_dynamodb_table_item" "load_config_full_items" {
for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]])
for_each = toset(local.table_names)

table_name = aws_dynamodb_table.loader_config.name
hash_key = aws_dynamodb_table.loader_config.hash_key

item = data.template_file.loader_config_full_item[each.key].rendered
item = local.loader_config_full_items[each.key]

lifecycle {
ignore_changes = [
Expand All @@ -33,39 +33,13 @@ resource "aws_dynamodb_table_item" "load_config_full_items" {
}
}

data "template_file" "loader_config_full_item" {
for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]])

template = "${file("${path.module}/config_item.json")}"
vars = {
kind = "full"
bulk_data_table = each.key
redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint
redshift_database_name: var.redshift_database_name
redshift_port = data.aws_redshift_cluster.sync_data_target.port
redshift_username = var.redshift_username
redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob
schema = var.redshift_schema
s3_bucket = "agra-data-exports-${var.controlshift_environment}"
manifest_bucket = aws_s3_bucket.manifest.bucket
manifest_prefix = var.manifest_prefix
failed_manifest_prefix = var.failed_manifest_prefix
success_topic_arn = aws_sns_topic.success_sns_topic.arn
failure_topic_arn = aws_sns_topic.failure_sns_topic.arn
current_batch = random_id.current_batch.b64_url
column_list = data.http.column_list[each.key].body
truncate_target = true
compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "")
}
}

resource "aws_dynamodb_table_item" "load_config_incremental_items" {
for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]])
for_each = toset(local.table_names)

table_name = aws_dynamodb_table.loader_config.name
hash_key = aws_dynamodb_table.loader_config.hash_key

item = data.template_file.loader_config_incremental_item[each.key].rendered
item = local.loader_config_incremental_items[each.key]

lifecycle {
ignore_changes = [
Expand All @@ -80,29 +54,53 @@ resource "aws_dynamodb_table_item" "load_config_incremental_items" {
}
}

data "template_file" "loader_config_incremental_item" {
for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]])

template = "${file("${path.module}/config_item.json")}"
vars = {
kind = "incremental"
bulk_data_table = each.key
redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint
redshift_database_name: var.redshift_database_name
redshift_port = data.aws_redshift_cluster.sync_data_target.port
redshift_username = var.redshift_username
redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob
schema = var.redshift_schema
s3_bucket = "agra-data-exports-${var.controlshift_environment}"
manifest_bucket = aws_s3_bucket.manifest.bucket
manifest_prefix = var.manifest_prefix
failed_manifest_prefix = var.failed_manifest_prefix
success_topic_arn = aws_sns_topic.success_sns_topic.arn
failure_topic_arn = aws_sns_topic.failure_sns_topic.arn
current_batch = random_id.current_batch.b64_url
column_list = data.http.column_list[each.key].body
truncate_target = false
compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "")
locals {
table_names = [for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]

loader_config_full_items = {
for name in local.table_names : name => templatefile("${path.module}/config_item.json", {
kind = "full"
bulk_data_table = name
redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint
redshift_database_name = var.redshift_database_name
redshift_port = data.aws_redshift_cluster.sync_data_target.port
redshift_username = var.redshift_username
redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob
schema = var.redshift_schema
s3_bucket = "agra-data-exports-${var.controlshift_environment}"
manifest_bucket = aws_s3_bucket.manifest.bucket
manifest_prefix = var.manifest_prefix
failed_manifest_prefix = var.failed_manifest_prefix
success_topic_arn = aws_sns_topic.success_sns_topic.arn
failure_topic_arn = aws_sns_topic.failure_sns_topic.arn
current_batch = random_id.current_batch.b64_url
column_list = data.http.column_list[name].body
truncate_target = true
compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "")
})
}

loader_config_incremental_items = {
for name in local.table_names : name => templatefile("${path.module}/config_item.json", {
kind = "incremental"
bulk_data_table = name
redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint
redshift_database_name = var.redshift_database_name
redshift_port = data.aws_redshift_cluster.sync_data_target.port
redshift_username = var.redshift_username
redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob
schema = var.redshift_schema
s3_bucket = "agra-data-exports-${var.controlshift_environment}"
manifest_bucket = aws_s3_bucket.manifest.bucket
manifest_prefix = var.manifest_prefix
failed_manifest_prefix = var.failed_manifest_prefix
success_topic_arn = aws_sns_topic.success_sns_topic.arn
failure_topic_arn = aws_sns_topic.failure_sns_topic.arn
current_batch = random_id.current_batch.b64_url
column_list = data.http.column_list[name].body
truncate_target = false
compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "")
})
}
}

Expand Down Expand Up @@ -134,11 +132,11 @@ data "http" "bulk_data_schemas" {
}

locals {
parsed_bulk_data_schemas = jsondecode(data.http.bulk_data_schemas.body)
parsed_bulk_data_schemas = jsondecode(data.http.bulk_data_schemas.response_body)
}

data "http" "column_list" {
for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]])
for_each = toset(local.table_names)

url = "https://${var.controlshift_hostname}/api/bulk_data/schema/columns?table=${each.key}"
}
58 changes: 41 additions & 17 deletions glue_job.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,45 +26,69 @@ resource "aws_glue_crawler" "signatures_crawler" {

resource "aws_s3_bucket" "glue_resources" {
bucket = var.glue_scripts_bucket_name
}

# Ownership controls block is required to support ACLs.
resource "aws_s3_bucket_ownership_controls" "glue_resources" {
bucket = aws_s3_bucket.glue_resources.id
rule {
object_ownership = "ObjectWriter"
}
}

resource "aws_s3_bucket_acl" "glue_resources" {
depends_on = [aws_s3_bucket_ownership_controls.glue_resources]

bucket = aws_s3_bucket.glue_resources.id
acl = "private"
server_side_encryption_configuration {
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}

resource "aws_s3_bucket_server_side_encryption_configuration" "glue_resources" {
bucket = aws_s3_bucket.glue_resources.id

rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}

resource "aws_s3_bucket_lifecycle_configuration" "glue_resources" {
bucket = aws_s3_bucket.glue_resources.id

rule {
id = "Remove temp files over a week old"
status = "Enabled"

lifecycle_rule {
id = "Remove temp files over a week old"
abort_incomplete_multipart_upload_days = 0
enabled = true
prefix = "production/temp/"
filter {
prefix = "production/temp/"
}

expiration {
days = 7
expired_object_delete_marker = false
}

abort_incomplete_multipart_upload {
days_after_initiation = 7 # Note: must be greater than 0
}
}
}

data "template_file" "signatures_script" {
template = file("${path.module}/templates/signatures_job.py.tpl")
vars = {
locals {
signatures_script = templatefile("${path.module}/templates/signatures_job.py.tpl", {
catalog_database_name = aws_glue_catalog_database.catalog_db.name
redshift_database_name = var.redshift_database_name
redshift_schema = var.redshift_schema
redshift_connection_name = aws_glue_connection.redshift_connection.name
}
})
}

resource "aws_s3_bucket_object" "signatures_script" {
resource "aws_s3_object" "signatures_script" {
bucket = aws_s3_bucket.glue_resources.id
key = "${var.controlshift_environment}/signatures_job.py"
acl = "private"

content = data.template_file.signatures_script.rendered
content = local.signatures_script
}

resource "aws_iam_role" "glue_service_role" {
Expand Down
48 changes: 37 additions & 11 deletions s3.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,55 @@ provider "aws" {
resource "aws_s3_bucket" "manifest" {
provider = aws.controlshift
bucket = var.manifest_bucket_name
acl = "private"

server_side_encryption_configuration {
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
tags = {
Name = "ControlShift puts import manifests here"
}
}

# Ownership controls block is required to support ACLs.
resource "aws_s3_bucket_ownership_controls" "manifest" {
provider = aws.controlshift
bucket = aws_s3_bucket.manifest.id
rule {
object_ownership = "ObjectWriter"
}
}

resource "aws_s3_bucket_lifecycle_configuration" "manifest" {
provider = aws.controlshift
bucket = aws_s3_bucket.manifest.id

# expire the ingested manifests after 5 days after they have been processed to save disk space while providing enough
# time to analyze things that might have gone wrong.
lifecycle_rule {
id = "expire-manifests"
enabled = true
rule {
id = "expire-manifests"
status = "Enabled"

expiration {
days = 5
}

# Best practice: filter is now required inside the rule block
filter {}
}
}

resource "aws_s3_bucket_acl" "manifest" {
provider = aws.controlshift
depends_on = [aws_s3_bucket_ownership_controls.manifest]

bucket = aws_s3_bucket.manifest.id
acl = "private"
}

resource "aws_s3_bucket_server_side_encryption_configuration" "manifest" {
provider = aws.controlshift
bucket = aws_s3_bucket.manifest.id

rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
5 changes: 1 addition & 4 deletions versions.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
terraform {
required_version = ">= 0.13"
required_version = ">= 1.4.5"
required_providers {
archive = {
source = "hashicorp/archive"
Expand All @@ -14,8 +14,5 @@ terraform {
random = {
source = "hashicorp/random"
}
template = {
source = "hashicorp/template"
}
}
}