diff --git a/aws/README.md b/aws/README.md index e53968e..650afc7 100644 --- a/aws/README.md +++ b/aws/README.md @@ -71,3 +71,7 @@ Typically an update is done by deploying all the stacks like so: ```bash AWS_PROFILE=cloud-snitch-dev npx cdk deploy '*-dev' ``` + +## Observability + +The CDK deploys a CloudWatch dashboard in us-east-1 with key metrics for all regions. diff --git a/aws/lib/environment.ts b/aws/lib/environment.ts index f5161be..e30df0e 100644 --- a/aws/lib/environment.ts +++ b/aws/lib/environment.ts @@ -80,12 +80,14 @@ export class Environment { } const globalApexStack = new GlobalApexStack(scope, `global-apex-${props.slug}`, { + allRegions: props.regions, cloudfrontDistributionId: globalBaseStack.cloudfrontDistributionId, crossRegionReferences: true, env: { account: props.accountId, region: 'us-east-1', }, + envSlug: props.slug, stackName: `cloud-snitch-global-apex-${props.slug}`, }); regionalStacks.forEach((s) => globalApexStack.addDependency(s)); diff --git a/aws/lib/global-apex-stack.ts b/aws/lib/global-apex-stack.ts index 1607ced..a6fdf16 100644 --- a/aws/lib/global-apex-stack.ts +++ b/aws/lib/global-apex-stack.ts @@ -1,8 +1,10 @@ -import { Stack, StackProps, custom_resources as cr } from 'aws-cdk-lib'; +import { aws_cloudwatch as cw, Duration, Stack, StackProps, custom_resources as cr } from 'aws-cdk-lib'; import { Construct } from 'constructs'; interface Props extends StackProps { + allRegions: string[]; cloudfrontDistributionId: string; + envSlug: string; } export class GlobalApexStack extends Stack { @@ -27,5 +29,140 @@ export class GlobalApexStack extends Stack { }, policy: cr.AwsCustomResourcePolicy.fromSdkCalls({ resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE }), }); + + new Dashboard(this, 'Dashboard', { + allRegions: props.allRegions, + envSlug: props.envSlug, + }); + } +} + +interface DashboardProps { + allRegions: string[]; + envSlug: string; +} + +class Dashboard extends Construct { + constructor(scope: Construct, id: string, props: DashboardProps) { + super(scope, id); + + const dashboard = new cw.Dashboard(this, 'Dashboard', { + dashboardName: `cloud-snitch-${props.envSlug}`, + defaultInterval: Duration.days(7), + }); + + const apiGatewayRequestsGraph = new cw.GraphWidget({ + height: 6, + width: 12, + title: 'API Requests', + leftYAxis: { + showUnits: false, + }, + rightYAxis: { + label: 'Errors', + showUnits: false, + }, + }); + for (const region of props.allRegions) { + apiGatewayRequestsGraph.addLeftMetric( + new cw.MathExpression({ + expression: `SEARCH('{AWS/ApiGateway, ApiId} MetricName="Count"', 'Sum', 300)`, + label: region, + period: Duration.minutes(5), + searchRegion: region, + usingMetrics: {}, + }), + ); + } + + const apiGatewayErrorsGraph = new cw.GraphWidget({ + height: 6, + width: 12, + title: 'API Errors', + leftYAxis: { + showUnits: false, + }, + }); + for (const region of props.allRegions) { + for (const metric of ['4xx', '5xx']) { + apiGatewayErrorsGraph.addLeftMetric( + new cw.MathExpression({ + expression: `SEARCH('{AWS/ApiGateway, ApiId} MetricName="${metric}"', 'Sum', 300)`, + label: region, + period: Duration.minutes(5), + searchRegion: region, + usingMetrics: {}, + }), + ); + } + } + + dashboard.addWidgets(apiGatewayRequestsGraph, apiGatewayErrorsGraph); + + const lambdaInvocationsGraph = new cw.GraphWidget({ + height: 6, + width: 12, + title: 'Lambda Invocations', + leftYAxis: { + showUnits: false, + }, + }); + for (const region of props.allRegions) { + lambdaInvocationsGraph.addLeftMetric( + new cw.MathExpression({ + expression: `SEARCH('{AWS/Lambda, FunctionName} MetricName="Invocations"', 'Sum', 300)`, + label: region, + period: Duration.minutes(5), + searchRegion: region, + usingMetrics: {}, + }), + ); + } + + const maxLambdaDurationGraph = new cw.GraphWidget({ + height: 6, + width: 12, + title: 'Maximum Lambda Duration', + leftYAxis: { + label: 'Seconds', + showUnits: false, + }, + }); + for (const region of props.allRegions) { + maxLambdaDurationGraph.addLeftMetric( + new cw.MathExpression({ + expression: `SEARCH('{AWS/Lambda, FunctionName} MetricName="Duration"', 'Maximum', 300) / 1000`, + label: region, + period: Duration.minutes(5), + searchRegion: region, + usingMetrics: {}, + }), + ); + } + + dashboard.addWidgets(lambdaInvocationsGraph, maxLambdaDurationGraph); + + const sqsOldestMessageAgeGraph = new cw.GraphWidget({ + height: 6, + width: 12, + title: 'SQS Oldest Message Age', + leftYAxis: { + label: 'Seconds', + showUnits: false, + }, + }); + for (const region of props.allRegions) { + sqsOldestMessageAgeGraph.addLeftMetric( + new cw.MathExpression({ + expression: `SEARCH('{AWS/SQS, QueueName} MetricName="ApproximateAgeOfOldestMessage"', 'Maximum', 300)`, + label: region, + period: Duration.minutes(5), + searchRegion: region, + usingMetrics: {}, + }), + ); + } + + dashboard.addWidgets(sqsOldestMessageAgeGraph); } }