diff --git a/README.md b/README.md
index 0337acc..d40bac3 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
 # Terraform modules by FastRobot
 
 This is a collection of modules in use by FastRobot. We typically call them 
-via `terragrunt`. Each top-level directory is its own self-contained module.
+via `terragrunt`. Eventually we'll have some common utility modules but for now each top level dir is standalone and defaults to the cheapest possible way to accomplish a task. 
 
 ## Modules
 
-* `elk` - stands up an AWS ES endpoint and an instance running logstash
\ No newline at end of file
+* `atlantis` - opinionated wrapper for the official atlantis terraform module, setting up github webhooks and auth for selected repos, plus some ALB authentication schemes through OIDC providers.
+* `elk` - stands up an AWS ES endpoint and an instance running logstash.
+* `monitoring` - prometheus with various exporters as ecs tasks remote writing to an Amazon Managed Prometheus central collector, fronted by your choice of three grafanas, Amazon Managed Grafana, Grafana Cloud, Open Source Grafana as an ECS task
diff --git a/monitoring/README.md b/monitoring/README.md
index 20b81d6..5b7296f 100644
--- a/monitoring/README.md
+++ b/monitoring/README.md
@@ -1,25 +1,46 @@
 # FR approved Monitoring stack
-Sets up:
+Conditionally sets up most mixes of:
 * AWS Managed Prometheus backend (done)
-* ECS cluster
-  * Prometheus task
-    * prometheus server
-    * prometheus-sdconfig-reloader
-  * node_exporter (daemonset)
-  * blackbox
+* ECS fargate cluster (done)
+  * Prometheus scraper task (done)
+    * prometheus server (done)
+    * prometheus-sdconfig-reloader (done)
+  * node_exporter (daemonset) TODO
+  * blackbox_exporter TODO
   * cadvisor (daemonset)
-  * grafana (optional!)
 * Grafana
-  * in ecs, cheapest?
-  * via aws managed (default)
-  * via grafana cloud (fanciest)
+  * As an ecs service, maybe the cheapest?
+  * via aws managed (default) (done)
+  * via grafana cloud (fanciest, TODO)
+
+We're using a AMP workspace as the central collection point for as many prometheus scrapers and their assorted exporters as you need. Your choice of grafana to view the collected metrics, plus cloudwatch, opensearch and any other service you want to configure.
+
+## AMP Metric Retention
+from https://docs.aws.amazon.com/prometheus/latest/userguide/what-is-Amazon-Managed-Service-Prometheus.html
+> Metrics ingested into a workspace are stored for 150 days, and are then automatically deleted.
 
 ## AMP Ruler and AlertManager
+
+This module also allows you to import a collection of yaml files as recording and alerting rules. We store these in terragrunt's live repo structure, each namespace as their own file, but you can pass any map that matches the structure as linked below.
+
 * use https://docs.aws.amazon.com/prometheus/latest/userguide/AMP-Ruler.html to pass a map of names of yaml strings in by namespace
 * Expects prometheus standard rule and alert configuration 
 * express the inputs however you like. Examples include templates through terragrunt or direct yaml conversion of hcl types in the calling module
 
-
 Roughly based off of the structure from https://github.com/aws-samples/prometheus-for-ecs
 
-# Developing for
+## All the ways to run grafana
+Depending on your complexity/scale/money tradeoffs you may have a clear preference for one of these grafana interfaces:
+
+### Amazon Managed Grafana
+https://aws.amazon.com/grafana/
+
+This one is presumably the easiest to run going forward, as it's completely hosted by Amazon, but charges per user/month ($9 per admin, $5 per lesser user) so is probably best suited for very small teams. Because it's charger per user, you have to set up some form of enterprise auth, in this case I defaulted to AWS SSO and it was a complicated thing I didn't want to take on AND has org-wide implications I had to setup with manual intervention.   
+
+### Open Source Grafana in ECS Fargate
+Not working yet, but next, as I suspect it's the cheapest AND most flexible way to run the grafana I'm used to. I'd prefer to use grafana's auth-against-github to manage access and AFAICT you can't against AMG. 
+
+### Grafana Cloud
+https://grafana.com/products/cloud/
+
+Free forever for 3 users, probably easy to point at the AMP/ES/opensearch. Will probably be cutting edge grafana so worth exploring the premium tier which is a dollar cheaper than AMG. Not working yet
diff --git a/monitoring/data.tf b/monitoring/data.tf
new file mode 100644
index 0000000..e16e84b
--- /dev/null
+++ b/monitoring/data.tf
@@ -0,0 +1,12 @@
+data "aws_region" "current" {}
+
+data "aws_subnets" "private" {
+  filter {
+    name   = "vpc-id"
+    values = [var.vpc_id]
+  }
+}
+
+#output "subnet_cidr_blocks" {
+#  value = [for s in data.aws_subnet.example : s.cidr_block]
+#}
diff --git a/monitoring/ecs.tf b/monitoring/ecs.tf
new file mode 100644
index 0000000..a8cce05
--- /dev/null
+++ b/monitoring/ecs.tf
@@ -0,0 +1,35 @@
+locals {
+  create_ecs = alltrue([var.enable, anytrue([var.enable_grafana_ecs, var.enable_prometheus])])
+  # also local.create_prometheus from main.tf
+}
+
+# ECS cluster for monitoring tasks
+module "ecs" {
+  count   = local.create_ecs ? 1 : 0
+  source  = "terraform-aws-modules/ecs/aws"
+  version = "3.5.0"
+  name    = "${local.full_name}-ecs"
+
+  container_insights = true
+
+  capacity_providers = ["FARGATE", "FARGATE_SPOT"]
+
+  default_capacity_provider_strategy = [
+    {
+      capacity_provider = "FARGATE_SPOT"
+    }
+  ]
+
+  tags = local.tags
+}
+
+# alternately, setup the cloudwatch agent to scrape and remote-write to AMP
+# https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights-Prometheus.html
+
+# define container definitions for a prometheus and sd sidecar and other exporters
+# https://github.com/cloudposse/terraform-aws-ecs-container-definition
+
+
+
+# combine multiple above defs into
+# https://github.com/cloudposse/terraform-aws-ecs-alb-service-task
diff --git a/monitoring/grafana.tf b/monitoring/grafana.tf
new file mode 100644
index 0000000..e07c15e
--- /dev/null
+++ b/monitoring/grafana.tf
@@ -0,0 +1,297 @@
+# all the grafana related tasks, doing any combination of
+# 1) optionally configuring the Amazon Managed Grafana
+#  * satisfies a desire to "keep everything in AWS"
+#  * Free for 90 days, then $9/mo per admin (min 1) and $5/mo per read-only user (min 0)
+# 2) optionally configuring an ECS task running `grafana/grafana-oss`
+#  * update or rewrite of https://github.com/56kcloud/terraform-grafana
+# 3) optionally configuring grafana cloud
+
+locals {
+  create_grafana_managed = alltrue([var.enable, var.enable_grafana_managed])
+  create_grafana_ecs     = alltrue([var.enable, var.enable_grafana_ecs])
+
+  # local.full_name is defined in main.tf
+  # full_name         = "${var.namespace}-${var.name}-${var.environment}"
+  # local.tags comes from main.tf
+}
+
+########################
+# Amazon Managed Grafana
+########################
+
+resource "aws_grafana_workspace" "grafana_managed" {
+  count = local.create_grafana_managed ? 1 : 0
+  # required
+  account_access_type      = var.grafana_managed_account_access_type
+  authentication_providers = var.grafana_managed_authentication_providers
+  permission_type          = var.grafana_managed_permission_type
+  # optional
+  data_sources              = var.grafana_managed_data_sources
+  description               = "tf managed for ${local.full_name}"
+  name                      = "${local.full_name}-grafana"
+  notification_destinations = ["SNS"] # seems like the only possible value
+  role_arn                  = aws_iam_role.grafana[0].arn
+  stack_set_name            = local.full_name
+}
+
+resource "aws_iam_role" "grafana" {
+  count              = local.create_grafana_managed ? 1 : 0
+  name               = "${local.full_name}-grafana"
+  assume_role_policy = data.aws_iam_policy_document.grafana-assume-policy-doc[0].json
+}
+
+data "aws_iam_policy_document" "grafana-assume-policy-doc" {
+  count = local.create_grafana_managed ? 1 : 0
+  statement {
+    actions = ["sts:AssumeRole"]
+    sid     = "AllowGrafanaService${var.environment}role"
+    principals {
+      identifiers = ["grafana.amazonaws.com"]
+      type        = "Service"
+    }
+  }
+}
+
+resource "aws_iam_policy" "grafana" {
+  count       = local.create_grafana_managed ? 1 : 0 # might expand that to also include creating grafana via ecs task
+  name        = "${local.full_name}-grafana"
+  description = "Policy for Managed Grafana"
+  policy      = data.aws_iam_policy_document.grafana-service-access[0].json
+}
+
+data "aws_iam_policy_document" "grafana-service-access" {
+  count = local.create_grafana_managed ? 1 : 0
+  # AMP
+  statement {
+    sid    = "ListAllPrometheusWorkspaces"
+    effect = "Allow"
+    actions = [
+      "aps:ListWorkspaces",
+    ]
+    resources = ["*"]
+  }
+  statement {
+    sid    = "PerPrometheusWorkspacesPermissions"
+    effect = "Allow"
+    actions = [
+      "aps:DescribeWorkspace",
+      "aps:QueryMetrics",
+      "aps:GetLabels",
+      "aps:GetSeries",
+      "aps:GetMetricMetadata"
+    ]
+    resources = ["*"] # this can be tighter
+  }
+  # OpenSearch (formerly elasticsearch)
+  statement {
+    sid    = "OpenSearchList"
+    effect = "Allow"
+    actions = [
+      "es:ESHttpGet",
+      "es:DescribeElasticsearchDomains",
+      "es:ListDomainNames"
+    ]
+    resources = ["*"]
+  }
+  statement {
+    sid = "OpenSearchPost"
+    actions = [
+      "es:ESHttpPost"
+    ]
+    resources = [
+      "arn:aws:es:*:*:domain/*/_msearch*",
+      "arn:aws:es:*:*:domain/*/_opendistro/_ppl"
+    ]
+  }
+  # SNS Alerting
+  statement {
+    sid       = "SNSPublish"
+    actions   = ["sns:Publish"]
+    resources = [var.sns_topic_arn]
+  }
+  # CloudWatch
+  statement {
+    sid = "AllowReadingMetricsFromCloudWatch"
+    actions = [
+      "cloudwatch:DescribeAlarmsForMetric",
+      "cloudwatch:DescribeAlarmHistory",
+      "cloudwatch:DescribeAlarms",
+      "cloudwatch:ListMetrics",
+      "cloudwatch:GetMetricStatistics",
+      "cloudwatch:GetMetricData"
+    ]
+    resources = ["*"]
+  }
+  statement {
+    sid = "AllowReadingLogsFromCloudWatch"
+    actions = [
+      "logs:DescribeLogGroups",
+      "logs:GetLogGroupFields",
+      "logs:StartQuery",
+      "logs:StopQuery",
+      "logs:GetQueryResults",
+      "logs:GetLogEvents"
+    ]
+    resources = ["*"]
+  }
+  statement {
+    sid = "AllowReadingTagsInstancesRegionsFromEC2"
+    actions = [
+      "ec2:DescribeTags",
+      "ec2:DescribeInstances",
+      "ec2:DescribeRegions"
+    ]
+    resources = ["*"]
+  }
+  statement {
+    sid       = "AllowReadingResourcesForTags"
+    actions   = ["tag:GetResources"]
+    resources = ["*"]
+  }
+}
+
+resource "aws_iam_role_policy_attachment" "grafana-service-attach" {
+  count      = local.create_grafana_managed ? 1 : 0
+  policy_arn = aws_iam_policy.grafana[0].arn
+  role       = aws_iam_role.grafana[0].name
+}
+
+# made by hand the AWS SSO signup, created a user and
+# created a grafana-admin group
+# should make a grafana-reader group
+# had to assign the new group to the account, gave it a bunch of iam admin stuff
+# was making a aws-sso permission set
+
+resource "aws_grafana_role_association" "aws_sso_admin_role" {
+  count = local.create_grafana_managed ? 1 : 0
+  role         = "ADMIN"
+  group_ids    = [data.aws_identitystore_group.admins[0].group_id]
+  workspace_id = aws_grafana_workspace.grafana_managed[0].id
+}
+
+data "aws_ssoadmin_instances" "internal" {
+  count = local.create_grafana_managed ? 1 : 0
+}
+
+data "aws_identitystore_group" "admins" {
+  count = local.create_grafana_managed ? 1 : 0
+  identity_store_id = tolist(data.aws_ssoadmin_instances.internal[0].identity_store_ids)[0]
+
+  filter {
+    attribute_path  = "DisplayName"
+    attribute_value = "grafana-admin"
+  }
+}
+
+
+# regardless of which grafana methods we used, do the following
+# load the datastores (did so manually from the UI, sucked)
+# load some dashboards
+# * cAdvisor with filtering for ECS cluster/task https://grafana.com/grafana/dashboards/15200
+
+###########
+# Grafana on ECS (open source, self hosted)
+###########
+
+module "grafana_ecs_container_definition" {
+  count            = local.create_grafana_ecs ? 1 : 0
+  source           = "registry.terraform.io/cloudposse/ecs-container-definition/aws"
+  version          = "0.58.1"
+  container_name   = "${local.full_name}-grafana"
+  container_image  = var.grafana_ecs_container_image
+  container_memory = var.grafana_ecs_container_memory
+  secrets = [
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_ALLOW_SIGN_UP"
+      name      = "GF_AUTH_GITHUB_ALLOW_SIGN_UP"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_ALLOWED_ORGANIZATIONS"
+      name      = "GF_AUTH_GITHUB_ALLOWED_ORGANIZATIONS"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_API_URL"
+      name      = "GF_AUTH_GITHUB_API_URL"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_AUTH_URL"
+      name      = "GF_AUTH_GITHUB_AUTH_URL"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_CLIENT_ID"
+      name      = "GF_AUTH_GITHUB_CLIENT_ID"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_CLIENT_SECRET"
+      name      = "GF_AUTH_GITHUB_CLIENT_SECRET"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_ENABLED"
+      name      = "GF_AUTH_GITHUB_ENABLED"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_SCOPES"
+      name      = "GF_AUTH_GITHUB_SCOPES"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_AUTH_GITHUB_TOKEN_URL"
+      name      = "GF_AUTH_GITHUB_TOKEN_URL"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_SERVER_ROOT_URL"
+      name      = "GF_SERVER_ROOT_URL"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_SERVER_ENABLE_GZIP"
+      name      = "GF_SERVER_ENABLE_GZIP"
+    },
+    {
+      valueFrom = "/grafana/${local.full_name}/GF_DEFAULT_INSTANCE_NAME"
+      name      = "GF_DEFAULT_INSTANCE_NAME"
+    },
+  ]
+  port_mappings = [
+    {
+      containerPort = 3000
+      hostPort      = 3000 # ?maybe?
+      protocol      = "tcp"
+    }
+  ]
+  log_configuration = {
+    logDriver = "awslogs"
+    options = {
+      awslogs-group         = "/ecs/Prometheus"
+      awslogs-create-group  = true
+      awslogs-region        = data.aws_region.current.name
+      awslogs-stream-prefix = "${local.full_name}-reloader"
+    }
+  }
+}
+
+
+module "grafana_ecs_alb_service_task" {
+  count                              = local.create_grafana_ecs ? 1 : 0
+  source                             = "registry.terraform.io/cloudposse/ecs-alb-service-task/aws"
+  version                            = "0.64.0"
+  namespace                          = var.namespace
+  stage                              = var.environment
+  name                               = var.name
+  delimiter                          = "-"
+  alb_security_group                 = aws_security_group.grafana_alb_sg[0].id
+  container_definition_json          = module.grafana_ecs_container_definition[0].json_map_encoded_list
+  ecs_cluster_arn                    = module.ecs[0].ecs_cluster_arn
+  launch_type                        = var.grafana_ecs_launch_types
+  vpc_id                             = var.vpc_id
+  security_group_ids                 = [aws_security_group.grafana_ecs_sg[0].id]
+  subnet_ids                         = var.private_subnet_ids
+  tags                               = local.tags
+  ignore_changes_task_definition     = false
+  assign_public_ip                   = var.grafana_ecs_assign_public_ip
+  propagate_tags                     = "SERVICE"
+  deployment_minimum_healthy_percent = 0   # undeploy old task before new
+  deployment_maximum_percent         = 100 # never run more
+  deployment_controller_type         = "ECS"
+  desired_count                      = 1
+  task_memory                        = var.grafana_ecs_task_memory
+  task_cpu                           = var.grafana_ecs_task_cpu
+}
diff --git a/monitoring/iam.tf b/monitoring/iam.tf
new file mode 100644
index 0000000..7fb47eb
--- /dev/null
+++ b/monitoring/iam.tf
@@ -0,0 +1,125 @@
+## prometheus ecs IAM
+# First, the roles needed for the prom scraper ecs tasks
+resource "aws_iam_role" "prom-scraper-app-role" {
+  name               = "${local.full_name}-prom-srv-ecs-app-role"
+  assume_role_policy = data.aws_iam_policy_document.prom_ecs_task_assume_policy.json
+}
+
+resource "aws_iam_role" "prom-scraper-task-execution-role" {
+  name               = "${local.full_name}-prom-srv-task-execution-role"
+  assume_role_policy = data.aws_iam_policy_document.prom_ecs_task_assume_policy.json
+}
+
+# Attach some policies, managed and not, to the app role
+resource "aws_iam_role_policy_attachment" "prom-generic" {
+  for_each = toset([
+    "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess",
+  ])
+  policy_arn = each.value
+  role       = aws_iam_role.prom-scraper-app-role.name
+}
+
+resource "aws_iam_role_policy_attachment" "prom-generic-writer" {
+  policy_arn = aws_iam_policy.prom_writer.arn
+  role       = aws_iam_role.prom-scraper-app-role.name
+}
+
+resource "aws_iam_role_policy_attachment" "prom-generic-discovery" {
+  policy_arn = aws_iam_policy.prom_stack_discovery.arn
+  role       = aws_iam_role.prom-scraper-app-role.name
+}
+
+# this attachment is for the ECS agent
+resource "aws_iam_role_policy_attachment" "attach_custom_to_task_execution" {
+  for_each = toset([
+    "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess",
+    "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
+  ])
+  policy_arn = each.value
+  role       = aws_iam_role.prom-scraper-task-execution-role.name
+}
+
+data "aws_iam_policy_document" "prom_ecs_task_assume_policy" {
+  statement {
+    sid     = "AssumeRole"
+    effect  = "Allow"
+    actions = ["sts:AssumeRole"]
+
+    principals {
+      type        = "Service"
+      identifiers = ["ecs-tasks.amazonaws.com"]
+    }
+  }
+}
+
+resource "aws_iam_policy" "prom_stack_discovery" {
+  name   = "${local.full_name}-prom-ecs-task-discovery-policy"
+  path   = "/"
+  policy = data.aws_iam_policy_document.prom_custom_task_policy.json
+}
+
+data "aws_iam_policy_document" "prom_custom_task_policy" {
+  statement {
+    sid       = "AllowReadingTagsInstancesRegionsFromEC2"
+    effect    = "Allow"
+    resources = ["*"]
+
+    actions = [
+      "ec2:DescribeTags",
+      "ec2:DescribeInstances",
+      "ec2:DescribeRegions",
+    ]
+  }
+
+  statement {
+    sid       = "AllowReadingResourcesForTags"
+    effect    = "Allow"
+    resources = ["*"]
+    actions   = ["tag:GetResources"]
+  }
+}
+
+resource "aws_iam_policy" "prom_writer" {
+  name   = "${local.full_name}-prom-ecs-task-writer-policy"
+  path   = "/"
+  policy = data.aws_iam_policy_document.prom_task_aps_write.json
+}
+
+data "aws_iam_policy_document" "prom_task_aps_write" {
+  statement {
+    sid       = "WritePrometheusMetrics"
+    effect    = "Allow"
+    resources = ["*"]
+
+    actions = [
+      "aps:RemoteWrite",
+      "aps:GetSeries",
+      "aps:GetLabels",
+      "aps:GetMetricMetadata",
+    ]
+  }
+
+  statement {
+    sid       = "SSMGet"
+    effect    = "Allow"
+    resources = ["*"]
+    actions   = ["ssm:GetParameter"]
+  }
+
+  statement {
+    sid       = "AllowServiceDiscovery"
+    effect    = "Allow"
+    resources = ["*"]
+    actions   = ["servicediscovery:*"]
+  }
+  statement {
+    sid       = "AllowPromEFS"
+    effect    = "Allow"
+    resources = [aws_efs_file_system.prom_service_storage.arn]
+    actions = [
+      "elasticfilesystem:ClientWrite",
+      "elasticfilesystem:ClientMount",
+      "elasticfilesystem:ClientRoot"
+    ]
+  }
+}
diff --git a/monitoring/main.tf b/monitoring/main.tf
index a90aae3..0de66ff 100644
--- a/monitoring/main.tf
+++ b/monitoring/main.tf
@@ -1,7 +1,6 @@
 locals {
   create_amp        = alltrue([var.enable, var.enable_amp])
   create_prometheus = alltrue([var.enable, var.enable_prometheus])
-  create_grafana    = alltrue([var.enable, var.enable_grafana_managed])
   full_name         = "${var.namespace}-${var.name}-${var.environment}"
   tags = {
     Environment = var.environment
@@ -30,3 +29,184 @@ resource "aws_prometheus_alert_manager_definition" "prom-alerts" {
   definition   = each.value
   workspace_id = aws_prometheus_workspace.prom[0].id
 }
+
+
+resource "aws_efs_file_system" "prom_service_storage" {
+  encrypted = true
+  tags = {
+    Name = "${local.full_name}-prometheus-config"
+  }
+}
+
+resource "aws_efs_mount_target" "prom_service_storage" {
+  count = length(var.private_subnet_ids)
+
+  file_system_id  = aws_efs_file_system.prom_service_storage.id
+  subnet_id       = var.private_subnet_ids[count.index]
+  security_groups = [aws_security_group.prom_efs_sg.id]
+}
+
+# define the prometheus config-reloader (from https://github.com/aws-samples/prometheus-for-ecs)
+# This is the least hacky way I can find to get a templated prometheus.yml into the stock prometheus
+# container task. The 4 variables in the environment control the frequency at which this sidecar task
+# will check ParameterStore for a prometheus.yml
+# this container will template configs onto a EFS mount shared with a prometheus server
+module "reloader_container_def" {
+  count = local.create_prometheus ? 1 : 0
+
+  source  = "cloudposse/ecs-container-definition/aws"
+  version = "0.58.1"
+
+  container_name   = "${local.full_name}-prometheus-config-reloader"
+  container_image  = "public.ecr.aws/awsvijisarathy/prometheus-sdconfig-reloader:4.0"
+  container_memory = 128 # tiny  process, tiny memory
+  container_cpu    = 10
+  user = "root"
+  map_environment = {
+    CONFIG_FILE_DIR                     = "/etc/config"
+    CONFIG_RELOAD_FREQUENCY             = 60
+    PROMETHEUS_CONFIG_PARAMETER_NAME    = "/${var.environment}/ECS-Prometheus-Configuration"
+    DISCOVERY_NAMESPACES_PARAMETER_NAME = "/${var.environment}/ECS-ServiceDiscovery-Namespaces"
+    AWS_REGION                          = data.aws_region.current.name
+  }
+  log_configuration = {
+    logDriver = "awslogs"
+    options = {
+      awslogs-group         = "/ecs/Prometheus"
+      awslogs-create-group  = true
+      awslogs-region        = data.aws_region.current.name
+      awslogs-stream-prefix = "${local.full_name}-reloader"
+    }
+  }
+  mount_points = [
+    {
+      containerPath = "/etc/config"
+      sourceVolume  = "${local.full_name}-prometheus-config"
+      readOnly      = false
+    }
+  ]
+}
+
+resource "aws_ssm_parameter" "prom_config" {
+  name  = "/${var.environment}/ECS-Prometheus-Configuration"
+  type  = "String"
+  value = replace(var.prometheus_config, "REMOTE_WRITE_URL", "${aws_prometheus_workspace.prom[0].prometheus_endpoint}api/v1/remote_write")
+}
+
+resource "aws_ssm_parameter" "prom_sd_ns" {
+  name  = "/${var.environment}/ECS-ServiceDiscovery-Namespaces"
+  type  = "String"
+  value = "ecs-services"
+}
+
+# define the prometheus scraper container
+# this container will scrape various prometheus exporters, then remote_write all data to the AMP prometheus endpoint
+module "prometheus_container_def" {
+  count = local.create_prometheus ? 1 : 0
+
+  source  = "cloudposse/ecs-container-definition/aws"
+  version = "0.58.1"
+
+  container_name   = "${local.full_name}-prometheus"
+  container_image  = var.prometheus_image
+  container_memory = var.prometheus_container_memory
+  container_cpu    = var.prometheus_container_cpu
+  user = "root"
+  port_mappings = [
+    {
+      containerPort = 9090
+      hostPort      = 9090 # ?maybe?
+      protocol      = "tcp"
+    }
+  ]
+  command = [
+    "--storage.tsdb.retention.time=15d",
+    "--config.file=/etc/config/prometheus.yaml",
+    "--storage.tsdb.path=/data",
+    "--web.console.libraries=/etc/prometheus/console_libraries",
+    "--web.console.templates=/etc/prometheus/consoles",
+    "--web.enable-lifecycle"
+  ]
+  log_configuration = {
+    logDriver = "awslogs"
+    options = {
+      awslogs-group         = "/ecs/Prometheus"
+      awslogs-create-group  = true
+      awslogs-region        = data.aws_region.current.name
+      awslogs-stream-prefix = "${local.full_name}-prometheus"
+    }
+  }
+  mount_points = [
+    {
+      containerPath = "/etc/config"
+      sourceVolume  = "${local.full_name}-prometheus-config"
+      readOnly      = true
+    },
+    {
+      containerPath = "/data"
+      sourceVolume  = "${local.full_name}-prometheus-data"
+      readOnly      = false
+    }
+  ]
+  healthcheck = {
+    command = [
+      "CMD-SHELL",
+      "wget http://localhost:9090/-/healthy -O /dev/null || exit 1"
+    ]
+    retries     = 2
+    timeout     = 2
+    interval    = 10
+    startPeriod = 10
+  }
+  container_depends_on = [
+    {
+      containerName = "${local.full_name}-prometheus-config-reloader"
+      condition     = "START"
+    }
+  ]
+}
+
+# join all above containers into a single task
+resource "aws_ecs_task_definition" "prom_stack" {
+  count = local.create_prometheus ? 1 : 0
+  container_definitions = jsonencode([
+    module.reloader_container_def[0].json_map_object,
+    module.prometheus_container_def[0].json_map_object
+  ])
+  cpu                      = 256
+  memory                   = 512
+  family                   = "${local.full_name}-prometheus-stack"
+  requires_compatibilities = ["FARGATE"]
+  network_mode             = "awsvpc"
+  task_role_arn            = aws_iam_role.prom-scraper-app-role.arn            # perms for the actual prom scraper
+  execution_role_arn       = aws_iam_role.prom-scraper-task-execution-role.arn # perms for the ecs agent to launch the containers
+  volume {
+    name = "${local.full_name}-prometheus-config"
+    efs_volume_configuration {
+      file_system_id          = aws_efs_file_system.prom_service_storage.id
+      root_directory          = "/"
+      transit_encryption      = "ENABLED"
+      transit_encryption_port = 2049
+    }
+  }
+  volume {
+    # without any other identifiers, this becomes an ephemeral volume, which is fine cause we're remote-writing data to AMP
+    name = "${local.full_name}-prometheus-data"
+  }
+}
+
+resource "aws_ecs_service" "prom" {
+  name                               = "${local.full_name}-prometheus-scraper"
+  count                              = local.create_prometheus ? 1 : 0
+  cluster                            = module.ecs[0].ecs_cluster_id
+  task_definition                    = aws_ecs_task_definition.prom_stack[0].arn
+  desired_count                      = 1
+  deployment_maximum_percent         = 100
+  deployment_minimum_healthy_percent = 0
+  launch_type                        = "FARGATE"
+  network_configuration {
+    subnets          = var.private_subnet_ids
+    security_groups  = [aws_security_group.prom_ecs_sg[0].id]
+    assign_public_ip = false
+  }
+}
diff --git a/monitoring/outputs.tf b/monitoring/outputs.tf
new file mode 100644
index 0000000..6fc4a70
--- /dev/null
+++ b/monitoring/outputs.tf
@@ -0,0 +1,24 @@
+output "prometheus_endpoint" {
+  value = aws_prometheus_workspace.prom[0].prometheus_endpoint
+}
+
+output "prometheus_endpoint_remote_write" {
+  value = "${aws_prometheus_workspace.prom[0].prometheus_endpoint}api/v1/remote_write"
+}
+
+output "prometheus_endpoint_query" {
+  value = "${aws_prometheus_workspace.prom[0].prometheus_endpoint}api/v1/query"
+}
+
+output "prometheus_arn" {
+  value = aws_prometheus_workspace.prom[*].arn
+}
+
+output "grafana_managed_endpoint" {
+  value = [for ws in aws_grafana_workspace.grafana_managed[*] : "https://${ws.endpoint}"]
+}
+
+# not yet supported?
+#output "grafana_managed_version" {
+#  value = aws_grafana_workspace.grafana_managed[*].version
+#}
diff --git a/monitoring/security_groups.tf b/monitoring/security_groups.tf
new file mode 100644
index 0000000..37bf051
--- /dev/null
+++ b/monitoring/security_groups.tf
@@ -0,0 +1,117 @@
+# misc security groups needed across the monitoring stack, they're so bulky I like to move them out of the way
+
+# Security group for the ALB
+resource "aws_security_group" "grafana_alb_sg" {
+  count       = local.create_grafana_ecs ? 1 : 0
+  name        = "${local.full_name}-grafana-alb-sg"
+  description = "Allow traffic to the ALB created for the ${local.full_name} grafana service"
+  vpc_id      = var.vpc_id
+
+}
+
+resource "aws_security_group_rule" "allow_outbound_grafana_ecs_alb_http_redirect_all" {
+  count             = local.create_grafana_ecs ? 1 : 0
+  security_group_id = aws_security_group.grafana_alb_sg[0].id
+  type              = "ingress"
+  from_port         = 80
+  to_port           = 80
+  protocol          = "tcp"
+  cidr_blocks       = ["0.0.0.0/0"]
+}
+
+resource "aws_security_group_rule" "allow_outbound_grafana_ecs_alb_service_all" {
+  count             = local.create_grafana_ecs ? 1 : 0
+  security_group_id = aws_security_group.grafana_alb_sg[0].id
+  type              = "ingress"
+  from_port         = 443
+  to_port           = 443
+  protocol          = "tcp"
+  cidr_blocks       = ["0.0.0.0/0"]
+}
+
+resource "aws_security_group_rule" "allow_outbound_grafana_ecs_alb_all" {
+  count             = local.create_grafana_ecs ? 1 : 0
+  security_group_id = aws_security_group.grafana_alb_sg[0].id
+  type              = "egress"
+  from_port         = 0
+  to_port           = 0
+  protocol          = "-1"
+  cidr_blocks       = ["0.0.0.0/0"]
+}
+
+# Security group for the grafana ecs task
+resource "aws_security_group" "grafana_ecs_sg" {
+  count       = local.create_grafana_ecs ? 1 : 0
+  name        = "${local.full_name}-grafana-ecs-sg"
+  description = "Allow traffic to the ecs task created for the ${local.full_name} grafana service"
+  vpc_id      = var.vpc_id
+}
+
+resource "aws_security_group_rule" "allow_grafana_ecs_http_all" {
+  count             = local.create_grafana_ecs ? 1 : 0
+  security_group_id = aws_security_group.grafana_ecs_sg[0].id
+  type              = "ingress"
+  from_port         = 3000
+  to_port           = 3000
+  protocol          = "tcp"
+  cidr_blocks       = ["0.0.0.0/0"]
+}
+
+resource "aws_security_group_rule" "allow_outbound_grafana_ecs_all" {
+  count             = local.create_grafana_ecs ? 1 : 0
+  security_group_id = aws_security_group.grafana_ecs_sg[0].id
+  type              = "egress"
+  from_port         = 0
+  to_port           = 0
+  protocol          = "-1"
+  cidr_blocks       = ["0.0.0.0/0"]
+}
+
+# Security group for the prometheus server ecs task
+resource "aws_security_group" "prom_ecs_sg" {
+  count       = local.create_prometheus ? 1 : 0
+  name        = "${local.full_name}-grafana-ecs-sg"
+  description = "Allow traffic to the ecs task created for the ${local.full_name} grafana service"
+  vpc_id      = var.vpc_id
+}
+
+resource "aws_security_group_rule" "allow_prom_ecs_http_all" {
+  count             = local.create_prometheus ? 1 : 0
+  security_group_id = aws_security_group.prom_ecs_sg[0].id
+  type              = "ingress"
+  from_port         = 9090
+  to_port           = 9090
+  protocol          = "tcp"
+  # TODO lock this down to something much more restrictive, nothing needs api access but admins
+  cidr_blocks = ["0.0.0.0/0"]
+}
+
+resource "aws_security_group_rule" "allow_outbound_prom_ecs_all" {
+  count             = local.create_prometheus ? 1 : 0
+  security_group_id = aws_security_group.prom_ecs_sg[0].id
+  type              = "egress"
+  from_port         = 0
+  to_port           = 0
+  protocol          = "-1"
+  cidr_blocks       = ["0.0.0.0/0"]
+}
+
+resource "aws_security_group" "prom_efs_sg" {
+  name        = "${local.full_name}-efs-sg"
+  description = "Allow traffic to the prometheus EFS storage volume"
+  vpc_id      = var.vpc_id
+
+  ingress {
+    from_port       = 2049
+    to_port         = 2049
+    protocol        = "tcp"
+    security_groups = [aws_security_group.prom_ecs_sg[0].id]
+  }
+
+  egress {
+    from_port   = 0
+    to_port     = 0
+    protocol    = "-1"
+    cidr_blocks = ["0.0.0.0/0"]
+  }
+}
diff --git a/monitoring/variables.tf b/monitoring/variables.tf
index d8846de..b164fb4 100644
--- a/monitoring/variables.tf
+++ b/monitoring/variables.tf
@@ -26,7 +26,7 @@ variable "enable_amp" {
 }
 
 variable "enable_prometheus" {
-  description = "should we make the ECS cluster and prometheus collector services"
+  description = "Make the ECS fargate cluster and prometheus collector services"
   default     = true
   type        = bool
 }
@@ -35,11 +35,16 @@ variable "enable_prometheus" {
 # enable_grafana_task - run grafana as an ECS task behind an ALB
 # enable_grafana_cloud
 variable "enable_grafana_managed" {
-  description = "should we make the managed grafana resources"
+  description = "Make the Amazaon Managed Grafana resources"
   default     = true
   type        = bool
 }
 
+variable "enable_grafana_ecs" {
+  description = "Should we make an ECS fargate cluster to run OSS grafana resources"
+  default     = false
+  type        = bool
+}
 variable "rule_groups" {
   default = {
     basic = <<EOF
@@ -59,3 +64,89 @@ variable "alerts" {
   description = "Route, inhibit, and silence alerts, named map of yaml string value"
   type        = map(any)
 }
+
+variable "sns_topic_arn" {
+  default     = ""
+  description = "The sns topic grafana attempts to publish to"
+}
+
+variable "public_subnet_ids" {
+  description = "subnet_ids for public services, eg the ALB"
+  type        = list(string)
+}
+
+variable "private_subnet_ids" {
+  description = "subnet_ids for private services"
+  type        = list(string)
+}
+
+variable "grafana_ecs_assign_public_ip" {
+  default = "false"
+}
+
+variable "grafana_ecs_container_image" {
+  default     = "grafana/grafana-oss:8.4.6"
+  type        = string
+  description = "The image used to start the grafana container. Images in the Docker Hub registry available by default"
+}
+
+variable "grafana_ecs_container_memory" {
+  default = "512"
+}
+
+variable "grafana_ecs_launch_types" {
+  type    = string
+  default = "FARGATE"
+}
+
+variable "grafana_ecs_task_cpu" {
+  type    = number
+  default = "128"
+}
+
+variable "grafana_ecs_task_memory" {
+  type    = number
+  default = "512"
+}
+
+variable "grafana_managed_account_access_type" {
+  default = "CURRENT_ACCOUNT"
+  type    = string
+}
+
+variable "grafana_managed_authentication_providers" {
+  default     = ["AWS_SSO"]
+  type        = set(string)
+  description = "one or both of AWS_SSO, SAML"
+}
+
+variable "grafana_managed_permission_type" {
+  default     = "SERVICE_MANAGED"
+  type        = string
+  description = "one of SERVICE_MANAGED or CUSTOMER_MANAGED"
+}
+variable "grafana_managed_data_sources" {
+  default     = ["PROMETHEUS", "CLOUDWATCH"]
+  type        = set(string)
+  description = "The data sources for the workspace. Valid values are AMAZON_OPENSEARCH_SERVICE, CLOUDWATCH, PROMETHEUS, XRAY, TIMESTREAM, SITEWISE."
+}
+
+variable "prometheus_config" {
+  type        = string
+  description = "a prometheus config file passed in as one big string"
+}
+
+variable "prometheus_container_cpu" {
+  default = "246" # quarter a vCpu, look to reduce later if we possible
+}
+
+variable "prometheus_container_memory" {
+  default = "502"
+}
+
+
+variable "prometheus_image" {
+  default = "quay.io/prometheus/prometheus:v2.35.0"
+}
+
+variable "vpc_id" {}