From afcd18650e6c53d82b1e94870e6d4fa795c5dc52 Mon Sep 17 00:00:00 2001 From: Danny Steenman Date: Wed, 1 Apr 2026 16:56:15 +0200 Subject: [PATCH 1/2] fix(sdk): degrade gracefully on discovery dataset failures --- .changeset/good-bears-invent.md | 5 + .changeset/neat-socks-laugh.md | 5 + packages/cloudburn/src/formatters/output.ts | 49 +++-- packages/cloudburn/test/formatters.test.ts | 51 ++++++ packages/sdk/src/engine/run-live.ts | 49 ++++- packages/sdk/src/providers/aws/discovery.ts | 173 +++++++++++------- .../sdk/src/providers/aws/resources/utils.ts | 12 +- packages/sdk/src/types.ts | 3 +- .../sdk/test/providers/aws-discovery.test.ts | 92 ++++++++-- .../test/providers/aws-ebs-resource.test.ts | 44 +++-- .../providers/aws-resource-explorer.test.ts | 160 ++++++++-------- .../test/providers/aws-resource-utils.test.ts | 63 +++++++ packages/sdk/test/scanner.test.ts | 95 ++++++++++ 13 files changed, 602 insertions(+), 199 deletions(-) create mode 100644 .changeset/good-bears-invent.md create mode 100644 .changeset/neat-socks-laugh.md create mode 100644 packages/sdk/test/providers/aws-resource-utils.test.ts diff --git a/.changeset/good-bears-invent.md b/.changeset/good-bears-invent.md new file mode 100644 index 0000000..473b884 --- /dev/null +++ b/.changeset/good-bears-invent.md @@ -0,0 +1,5 @@ +--- +"@cloudburn/sdk": patch +--- + +Gracefully degrade AWS discovery when required datasets are throttled or otherwise unavailable by retrying longer and surfacing skipped-rule diagnostics instead of aborting the run. diff --git a/.changeset/neat-socks-laugh.md b/.changeset/neat-socks-laugh.md new file mode 100644 index 0000000..86d9ad4 --- /dev/null +++ b/.changeset/neat-socks-laugh.md @@ -0,0 +1,5 @@ +--- +"cloudburn": patch +--- + +Improve discover table output by separating diagnostics into a dedicated table so skipped rules and access-denied discovery results stay readable. diff --git a/packages/cloudburn/src/formatters/output.ts b/packages/cloudburn/src/formatters/output.ts index be75967..8619283 100644 --- a/packages/cloudburn/src/formatters/output.ts +++ b/packages/cloudburn/src/formatters/output.ts @@ -74,6 +74,16 @@ const scanColumns: ColumnSpec[] = [ { key: 'message', header: 'Message' }, ]; +const diagnosticColumns: ColumnSpec[] = [ + { key: 'provider', header: 'Provider' }, + { key: 'status', header: 'Status' }, + { key: 'ruleId', header: 'RuleId' }, + { key: 'source', header: 'Source' }, + { key: 'service', header: 'Service' }, + { key: 'region', header: 'Region' }, + { key: 'message', header: 'Message' }, +]; + const ruleListColumns: ColumnSpec[] = [ { key: 'ruleId', header: 'RuleId' }, { key: 'provider', header: 'Provider' }, @@ -176,8 +186,22 @@ const renderTable = (response: CliResponse): string => { case 'rule-list': return renderRuleTable(response.rules, response.emptyMessage); case 'scan-result': { - const rows = projectScanRows(response.result); - return rows.length === 0 ? 'No findings.' : renderAsciiTable(rows, scanColumns); + const findingRows = projectFindingRows(response.result); + const diagnosticRows = projectDiagnosticRows(response.result); + + if (findingRows.length === 0 && diagnosticRows.length === 0) { + return 'No findings.'; + } + + if (findingRows.length === 0) { + return `Diagnostics\n${renderAsciiTable(diagnosticRows, diagnosticColumns)}`; + } + + if (diagnosticRows.length === 0) { + return renderAsciiTable(findingRows, scanColumns); + } + + return `${renderAsciiTable(findingRows, scanColumns)}\n\nDiagnostics\n${renderAsciiTable(diagnosticRows, diagnosticColumns)}`; } case 'status': return renderAsciiTable( @@ -197,8 +221,8 @@ const renderTable = (response: CliResponse): string => { } }; -const projectScanRows = (result: ScanResult): RecordRow[] => [ - ...flattenScanResult(result).map(({ finding, message, provider, ruleId, service, source }) => ({ +const projectFindingRows = (result: ScanResult): RecordRow[] => + flattenScanResult(result).map(({ finding, message, provider, ruleId, service, source }) => ({ accountId: finding.accountId ?? '', message, path: finding.location?.path ?? '', @@ -210,21 +234,18 @@ const projectScanRows = (result: ScanResult): RecordRow[] => [ source, column: finding.location?.column ?? '', line: finding.location?.line ?? '', - })), - ...getScanDiagnostics(result).map((diagnostic) => ({ - accountId: '', - column: '', - line: '', + })); + +const projectDiagnosticRows = (result: ScanResult): RecordRow[] => + getScanDiagnostics(result).map((diagnostic) => ({ message: diagnostic.message, - path: '', provider: diagnostic.provider, region: diagnostic.region ?? '', - resourceId: '', - ruleId: '', + ruleId: diagnostic.ruleId ?? '', service: diagnostic.service, source: diagnostic.source, - })), -]; + status: diagnostic.status, + })); const inferColumns = (rows: RecordRow[]): ColumnSpec[] => { const keys = Array.from(new Set(rows.flatMap((row) => Object.keys(row)))).sort((left, right) => diff --git a/packages/cloudburn/test/formatters.test.ts b/packages/cloudburn/test/formatters.test.ts index 4cdff25..8b7b924 100644 --- a/packages/cloudburn/test/formatters.test.ts +++ b/packages/cloudburn/test/formatters.test.ts @@ -51,6 +51,36 @@ const resultWithLocation = { ], }; +const resultWithSkippedRuleDiagnostic = { + diagnostics: [ + { + details: + 'Amazon CloudWatch Logs DescribeMetricFilters failed in us-east-1 with ThrottlingException: Rate exceeded.', + message: 'Skipped rule CLDBRN-AWS-CLOUDWATCH-3 because required discovery datasets were unavailable.', + provider: 'aws' as const, + ruleId: 'CLDBRN-AWS-CLOUDWATCH-3', + service: 'cloudwatch', + source: 'discovery' as const, + status: 'skipped' as const, + }, + ], + providers: [], +}; + +const resultWithFindingAndDiagnostic = { + diagnostics: [ + { + message: 'Skipped lambda discovery in us-east-1 because access is denied by AWS permissions.', + provider: 'aws' as const, + region: 'us-east-1', + service: 'lambda', + source: 'discovery' as const, + status: 'access_denied' as const, + }, + ], + providers: resultWithoutLocation.providers, +}; + const withStdoutColumns = (columns: number, run: () => void): void => { const descriptor = Object.getOwnPropertyDescriptor(process.stdout, 'columns'); @@ -85,6 +115,27 @@ describe('renderResponse', () => { `); }); + it('renders skipped-rule diagnostics with their rule id in table mode', () => { + const output = renderResponse({ kind: 'scan-result', result: resultWithSkippedRuleDiagnostic }, 'table'); + + expect(output).toContain('Diagnostics'); + expect(output).toContain('Status'); + expect(output).toContain('CLDBRN-AWS-CLOUDWATCH-3'); + expect(output).toContain('Skipped rule CLDBRN-AWS-CLOUDWATCH-3'); + expect(output).not.toContain('ResourceId'); + expect(output).not.toContain('AccountId'); + }); + + it('renders diagnostics in a separate table when findings also exist', () => { + const output = renderResponse({ kind: 'scan-result', result: resultWithFindingAndDiagnostic }, 'table'); + + expect(output).toContain('CLDBRN-AWS-EBS-1'); + expect(output).toContain('vol-123'); + expect(output).toContain('Diagnostics'); + expect(output).toContain('access_denied'); + expect(output).toContain('Skipped lambda discovery in us-east-1'); + }); + it('wraps long status values to the available terminal width in table mode', () => { withStdoutColumns(60, () => { const output = renderResponse( diff --git a/packages/sdk/src/engine/run-live.ts b/packages/sdk/src/engine/run-live.ts index a8f6207..38f13e8 100644 --- a/packages/sdk/src/engine/run-live.ts +++ b/packages/sdk/src/engine/run-live.ts @@ -11,10 +11,23 @@ export const runLiveScan = async ( ): Promise => { const registry = buildRuleRegistry(config, 'discovery'); emitDebugLog(options?.debugLogger, `sdk: resolved ${registry.activeRules.length} active discovery rules`); - const { diagnostics = [], ...liveContext } = - options?.debugLogger === undefined - ? await discoverAwsResources(registry.activeRules, target) - : await discoverAwsResources(registry.activeRules, target, { debugLogger: options.debugLogger }); + const { + diagnostics = [], + unavailableDatasets = new Map(), + ...liveContext + } = options?.debugLogger === undefined + ? await discoverAwsResources(registry.activeRules, target) + : await discoverAwsResources(registry.activeRules, target, { debugLogger: options.debugLogger }); + const unresolvedUnavailableDatasets: unknown = unavailableDatasets; + const unavailableDatasetDiagnostics = + unresolvedUnavailableDatasets instanceof Map + ? unresolvedUnavailableDatasets + : new Map( + unresolvedUnavailableDatasets instanceof Set + ? [...unresolvedUnavailableDatasets].map((datasetKey) => [datasetKey, []] as const) + : [], + ); + const scanDiagnostics = [...diagnostics]; const findings = groupFindingsByProvider( registry.activeRules.map((rule) => { if (!rule.supports.includes('discovery') || !rule.evaluateLive) { @@ -24,6 +37,32 @@ export const runLiveScan = async ( }; } + const unavailableDependencies = (rule.discoveryDependencies ?? []).filter((dependency) => + unavailableDatasetDiagnostics.has(dependency), + ); + + if (unavailableDependencies.length > 0) { + scanDiagnostics.push({ + details: unavailableDependencies + .flatMap((dependency) => unavailableDatasetDiagnostics.get(dependency) ?? []) + .map((diagnostic) => diagnostic.details) + .filter((detail): detail is string => detail !== undefined) + .filter((detail, index, details) => details.indexOf(detail) === index) + .join('\n'), + message: `Skipped rule ${rule.id} because required discovery datasets were unavailable: ${unavailableDependencies.join(', ')}.`, + provider: rule.provider, + ruleId: rule.id, + service: rule.service, + source: 'discovery', + status: 'skipped', + }); + + return { + provider: rule.provider, + finding: null, + }; + } + return { provider: rule.provider, finding: rule.evaluateLive(liveContext), @@ -32,7 +71,7 @@ export const runLiveScan = async ( ); return { - ...(diagnostics.length > 0 ? { diagnostics } : {}), + ...(scanDiagnostics.length > 0 ? { diagnostics: scanDiagnostics } : {}), providers: findings, }; }; diff --git a/packages/sdk/src/providers/aws/discovery.ts b/packages/sdk/src/providers/aws/discovery.ts index 7465be3..c72013f 100644 --- a/packages/sdk/src/providers/aws/discovery.ts +++ b/packages/sdk/src/providers/aws/discovery.ts @@ -16,7 +16,7 @@ import type { } from '../../types.js'; import { assertValidAwsRegion, listEnabledAwsRegions, resolveCurrentAwsRegion } from './client.js'; import { getAwsDiscoveryDatasetDefinition } from './discovery-registry.js'; -import { AwsDiscoveryError, getAwsErrorCode, isAwsAccessDeniedError } from './errors.js'; +import { AwsDiscoveryError, getAwsErrorCode, isAwsAccessDeniedError, isAwsThrottlingError } from './errors.js'; import { buildAwsDiscoveryCatalog, createAwsResourceExplorerSetup, @@ -166,6 +166,7 @@ const collectDiscoveryDependencies = (rules: Rule[]): DiscoveryDatasetKey[] => { type LiveDiscoveryContext = LiveEvaluationContext & { diagnostics: ScanDiagnostic[]; + unavailableDatasets?: Map; }; const groupResourcesByRegion = (resources: T[]): Map => { @@ -189,6 +190,19 @@ const buildAccessDeniedDiagnosticMessage = (service: string, region: string, err ? `Skipped ${service} discovery in ${region} because access is denied by a service control policy (SCP).` : `Skipped ${service} discovery in ${region} because access is denied by AWS permissions.`; +const buildDatasetFailureDiagnostic = (service: string, region: string | undefined, err: unknown): ScanDiagnostic => ({ + code: getAwsErrorCode(err), + details: err instanceof Error ? err.message : String(err), + message: isAwsThrottlingError(err) + ? `Skipped ${service} discovery${region ? ` in ${region}` : ''} because AWS throttled the required dataset after retrying.` + : `Skipped ${service} discovery${region ? ` in ${region}` : ''} because a required dataset failed to load.`, + provider: 'aws', + ...(region ? { region } : {}), + service, + source: 'discovery', + status: isAwsThrottlingError(err) ? 'throttled' : 'error', +}); + const normalizeDatasetLoadResult = ( loadResult: unknown[] | { diagnostics?: ScanDiagnostic[]; resources: unknown[] }, ): { diagnostics: ScanDiagnostic[]; resources: unknown[] } => @@ -289,6 +303,7 @@ export const discoverAwsResources = async ( Promise<{ dataset: [DiscoveryDatasetKey, DiscoveryDatasetMap[DiscoveryDatasetKey]]; diagnostics: ScanDiagnostic[]; + unavailable: boolean; }> >(); const loadDataset = ( @@ -296,6 +311,7 @@ export const discoverAwsResources = async ( ): Promise<{ dataset: [K, DiscoveryDatasetMap[K]]; diagnostics: ScanDiagnostic[]; + unavailable: boolean; }> => { const cachedLoad = datasetLoadPromises.get(datasetKey); @@ -303,6 +319,7 @@ export const discoverAwsResources = async ( return cachedLoad as Promise<{ dataset: [K, DiscoveryDatasetMap[K]]; diagnostics: ScanDiagnostic[]; + unavailable: boolean; }>; } @@ -316,8 +333,8 @@ export const discoverAwsResources = async ( const loadPromise = (async () => { emitDebugLog(options?.debugLogger, `aws: loading dataset ${definition.datasetKey}`); - try { - if (definition.resourceTypes.length === 0) { + if (definition.resourceTypes.length === 0) { + try { const loadResult = normalizeDatasetLoadResult( await definition.load([], { loadDataset: async ( @@ -336,81 +353,94 @@ export const discoverAwsResources = async ( DiscoveryDatasetMap[K], ], diagnostics: loadResult.diagnostics, + unavailable: false, }; - } + } catch (err) { + emitDebugLog( + options?.debugLogger, + `aws: dataset ${definition.datasetKey} failed after ${formatElapsedMs(startedAtMs)}: ${err instanceof Error ? err.message : String(err)}`, + ); + emitDebugLog( + options?.debugLogger, + `aws: completed dataset ${definition.datasetKey} with 0 resources in ${formatElapsedMs(startedAtMs)}`, + ); - const matchingResources = definition.resourceTypes.flatMap( - (resourceType) => resourcesByType.get(resourceType) ?? [], - ); - const regionResourceGroups = groupResourcesByRegion(matchingResources); - const loadedResources: unknown[] = []; - const diagnostics: ScanDiagnostic[] = []; - - for (const [region, regionResources] of regionResourceGroups) { - const regionStartedAtMs = Date.now(); - - try { - emitDebugLog( - options?.debugLogger, - `aws: loading dataset ${definition.datasetKey} in ${region} from ${regionResources.length} resources`, - ); - const loadResult = normalizeDatasetLoadResult( - await definition.load(regionResources, { - loadDataset: async ( - nestedDatasetKey: T, - ): Promise => (await loadDataset(nestedDatasetKey)).dataset[1], - }), - ); - appendItems(loadedResources, loadResult.resources); - appendItems(diagnostics, loadResult.diagnostics); - emitDebugLog( - options?.debugLogger, - `aws: completed dataset ${definition.datasetKey} in ${region} with ${loadResult.resources.length} resources in ${formatElapsedMs(regionStartedAtMs)}`, - ); - } catch (err) { - if (!isAwsAccessDeniedError(err)) { - emitDebugLog( - options?.debugLogger, - `aws: dataset ${definition.datasetKey} failed in ${region} after ${formatElapsedMs(regionStartedAtMs)}: ${err instanceof Error ? err.message : String(err)}`, - ); - throw err; - } - - diagnostics.push({ - code: getAwsErrorCode(err), - details: err instanceof Error ? err.message : String(err), - message: buildAccessDeniedDiagnosticMessage(definition.service, region, err), - provider: 'aws', - region, - service: definition.service, - source: 'discovery', - status: 'access_denied', - }); - emitDebugLog( - options?.debugLogger, - `aws: completed dataset ${definition.datasetKey} in ${region} with 0 resources in ${formatElapsedMs(regionStartedAtMs)}`, - ); - } + return { + dataset: [definition.datasetKey, [] as DiscoveryDatasetMap[K]] as [K, DiscoveryDatasetMap[K]], + diagnostics: [buildDatasetFailureDiagnostic(definition.service, catalog.searchRegion, err)], + unavailable: true, + }; } + } + + const matchingResources = definition.resourceTypes.flatMap( + (resourceType) => resourcesByType.get(resourceType) ?? [], + ); + const regionResourceGroups = groupResourcesByRegion(matchingResources); + const loadedResources: unknown[] = []; + const diagnostics: ScanDiagnostic[] = []; + let unavailable = false; - if (regionResourceGroups.size > 1) { + for (const [region, regionResources] of regionResourceGroups) { + const regionStartedAtMs = Date.now(); + + try { + emitDebugLog( + options?.debugLogger, + `aws: loading dataset ${definition.datasetKey} in ${region} from ${regionResources.length} resources`, + ); + const loadResult = normalizeDatasetLoadResult( + await definition.load(regionResources, { + loadDataset: async ( + nestedDatasetKey: T, + ): Promise => (await loadDataset(nestedDatasetKey)).dataset[1], + }), + ); + appendItems(loadedResources, loadResult.resources); + appendItems(diagnostics, loadResult.diagnostics); + emitDebugLog( + options?.debugLogger, + `aws: completed dataset ${definition.datasetKey} in ${region} with ${loadResult.resources.length} resources in ${formatElapsedMs(regionStartedAtMs)}`, + ); + } catch (err) { + emitDebugLog( + options?.debugLogger, + `aws: dataset ${definition.datasetKey} failed in ${region} after ${formatElapsedMs(regionStartedAtMs)}: ${err instanceof Error ? err.message : String(err)}`, + ); + unavailable = true; + diagnostics.push( + isAwsAccessDeniedError(err) + ? { + code: getAwsErrorCode(err), + details: err instanceof Error ? err.message : String(err), + message: buildAccessDeniedDiagnosticMessage(definition.service, region, err), + provider: 'aws', + region, + service: definition.service, + source: 'discovery', + status: 'access_denied', + } + : buildDatasetFailureDiagnostic(definition.service, region, err), + ); emitDebugLog( options?.debugLogger, - `aws: completed dataset ${definition.datasetKey} with ${loadedResources.length} resources in ${formatElapsedMs(startedAtMs)}`, + `aws: completed dataset ${definition.datasetKey} in ${region} with 0 resources in ${formatElapsedMs(regionStartedAtMs)}`, ); } + } - return { - dataset: [definition.datasetKey, loadedResources as DiscoveryDatasetMap[K]] as [K, DiscoveryDatasetMap[K]], - diagnostics, - }; - } catch (err) { + if (regionResourceGroups.size > 1 || unavailable) { emitDebugLog( options?.debugLogger, - `aws: dataset ${definition.datasetKey} failed after ${formatElapsedMs(startedAtMs)}: ${err instanceof Error ? err.message : String(err)}`, + `aws: completed dataset ${definition.datasetKey} with ${loadedResources.length} resources in ${formatElapsedMs(startedAtMs)}`, ); - throw err; } + + return { + dataset: [definition.datasetKey, loadedResources as DiscoveryDatasetMap[K]] as [K, DiscoveryDatasetMap[K]], + diagnostics, + unavailable, + }; })(); datasetLoadPromises.set( @@ -418,21 +448,32 @@ export const discoverAwsResources = async ( loadPromise as Promise<{ dataset: [DiscoveryDatasetKey, DiscoveryDatasetMap[DiscoveryDatasetKey]]; diagnostics: ScanDiagnostic[]; + unavailable: boolean; }>, ); - return loadPromise; + return loadPromise as Promise<{ + dataset: [K, DiscoveryDatasetMap[K]]; + diagnostics: ScanDiagnostic[]; + unavailable: boolean; + }>; }; const datasetLoads = await Promise.all(datasetKeys.map((datasetKey) => loadDataset(datasetKey))); const allDatasetLoads = await Promise.all(datasetLoadPromises.values()); const resources = new LiveResourceBag( Object.fromEntries(datasetLoads.map((loadResult) => loadResult.dataset)) as Partial, ); + const unavailableDatasets = new Map( + allDatasetLoads + .filter((loadResult) => loadResult.unavailable) + .map((loadResult) => [loadResult.dataset[0], loadResult.diagnostics] as const), + ); return { catalog, diagnostics: allDatasetLoads.flatMap((loadResult) => loadResult.diagnostics), resources, + unavailableDatasets, }; }; diff --git a/packages/sdk/src/providers/aws/resources/utils.ts b/packages/sdk/src/providers/aws/resources/utils.ts index 4c0e630..9ff2da1 100644 --- a/packages/sdk/src/providers/aws/resources/utils.ts +++ b/packages/sdk/src/providers/aws/resources/utils.ts @@ -12,6 +12,12 @@ const sleep = async (delayMs: number): Promise => setTimeout(resolve, delayMs); }); +const calculateThrottleDelayMs = (initialDelayMs: number, attempt: number): number => { + const baseDelayMs = initialDelayMs * 2 ** (attempt - 1); + + return Math.round(baseDelayMs * (1 + Math.random())); +}; + /** * Splits an array into fixed-size chunks for batched AWS API calls. * @@ -79,8 +85,8 @@ export const withAwsServiceErrorContext = async ( execute: () => Promise, options: AwsServiceErrorContextOptions = {}, ): Promise => { - const maxAttempts = options.maxAttempts ?? 4; - const initialDelayMs = options.initialDelayMs ?? 200; + const maxAttempts = options.maxAttempts ?? 6; + const initialDelayMs = options.initialDelayMs ?? 500; for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { try { @@ -91,7 +97,7 @@ export const withAwsServiceErrorContext = async ( } if (attempt < maxAttempts && isAwsThrottlingError(err)) { - const delayMs = initialDelayMs * 2 ** (attempt - 1); + const delayMs = calculateThrottleDelayMs(initialDelayMs, attempt); options.onRetry?.({ attempt, delayMs, error: err, maxAttempts }); await sleep(delayMs); continue; diff --git a/packages/sdk/src/types.ts b/packages/sdk/src/types.ts index 40b931a..0623f16 100644 --- a/packages/sdk/src/types.ts +++ b/packages/sdk/src/types.ts @@ -166,11 +166,12 @@ export type ScanDiagnostic = { provider: CloudProvider; service: string; source: Source; - status: 'access_denied'; + status: 'access_denied' | 'error' | 'skipped' | 'throttled'; message: string; code?: string; details?: string; region?: string; + ruleId?: string; }; /** Result of a scan execution containing provider-grouped lean rule findings. */ diff --git a/packages/sdk/test/providers/aws-discovery.test.ts b/packages/sdk/test/providers/aws-discovery.test.ts index f4bf3cf..04b2e0b 100644 --- a/packages/sdk/test/providers/aws-discovery.test.ts +++ b/packages/sdk/test/providers/aws-discovery.test.ts @@ -2495,6 +2495,54 @@ describe('discoverAwsResources', () => { ]); }); + it('records a non-fatal throttling diagnostic instead of aborting the run when a dataset exhausts retries', async () => { + mockedBuildAwsDiscoveryCatalog.mockResolvedValue({ + indexType: 'LOCAL', + resources: [catalog.resources[7]], + searchRegion: 'eu-central-1', + }); + mockedHydrateAwsCloudWatchLogMetricFilterCoverage.mockRejectedValue( + Object.assign( + new Error( + 'Amazon CloudWatch Logs DescribeMetricFilters failed in eu-central-1 with ThrottlingException: Rate exceeded Request ID: req-metric-filters.', + ), + { + code: 'ThrottlingException', + name: 'ThrottlingException', + $metadata: { + httpStatusCode: 400, + requestId: 'req-metric-filters', + }, + }, + ), + ); + + const result = await discoverAwsResources( + [ + createRule({ + discoveryDependencies: ['aws-cloudwatch-log-metric-filter-coverage'], + service: 'cloudwatch', + }), + ], + { mode: 'regions', regions: ['eu-central-1'] }, + ); + + expect(result.resources.get('aws-cloudwatch-log-metric-filter-coverage')).toEqual([]); + expect(result.diagnostics).toEqual([ + { + code: 'ThrottlingException', + details: + 'Amazon CloudWatch Logs DescribeMetricFilters failed in eu-central-1 with ThrottlingException: Rate exceeded Request ID: req-metric-filters.', + message: 'Skipped cloudwatch discovery in us-east-1 because AWS throttled the required dataset after retrying.', + provider: 'aws', + region: 'us-east-1', + service: 'cloudwatch', + source: 'discovery', + status: 'throttled', + }, + ]); + }); + it('merges very large dataset loads without overflowing the call stack', async () => { mockedBuildAwsDiscoveryCatalog.mockResolvedValue({ indexType: 'LOCAL', @@ -2644,7 +2692,7 @@ describe('discoverAwsResources', () => { ).toBe(true); }); - it('emits dataset failure timing before surfacing non-access-denied errors', async () => { + it('emits dataset failure timing when a dataset is downgraded into a non-fatal diagnostic', async () => { mockedBuildAwsDiscoveryCatalog.mockResolvedValue({ indexType: 'LOCAL', resources: [catalog.resources[1]], @@ -2653,28 +2701,38 @@ describe('discoverAwsResources', () => { mockedHydrateAwsEc2Instances.mockRejectedValue(new Error('boom')); const debugLines: string[] = []; - await expect( - discoverAwsResources( - [ - createRule({ - discoveryDependencies: ['aws-ec2-instances'], - }), - ], - { mode: 'regions', regions: ['us-east-1'] }, - { - debugLogger: (message) => debugLines.push(message), - }, - ), - ).rejects.toThrow('boom'); + const result = await discoverAwsResources( + [ + createRule({ + discoveryDependencies: ['aws-ec2-instances'], + }), + ], + { mode: 'regions', regions: ['us-east-1'] }, + { + debugLogger: (message) => debugLines.push(message), + }, + ); expect(debugLines).toContain('aws: loading dataset aws-ec2-instances'); expect(debugLines).toContain('aws: loading dataset aws-ec2-instances in us-east-1 from 1 resources'); expect( debugLines.some((line) => /^aws: dataset aws-ec2-instances failed in us-east-1 after \d+ms: boom$/.test(line)), ).toBe(true); - expect(debugLines.some((line) => /^aws: dataset aws-ec2-instances failed after \d+ms: boom$/.test(line))).toBe( - true, - ); + expect( + debugLines.some((line) => /^aws: completed dataset aws-ec2-instances with 0 resources in \d+ms$/.test(line)), + ).toBe(true); + expect(result.resources.get('aws-ec2-instances')).toEqual([]); + expect(result.diagnostics).toEqual([ + { + details: 'boom', + message: 'Skipped ec2 discovery in us-east-1 because a required dataset failed to load.', + provider: 'aws', + region: 'us-east-1', + service: 'ec2', + source: 'discovery', + status: 'error', + }, + ]); }); it('fails fast when a discovery rule has an evaluator but no discoveryDependencies metadata', async () => { diff --git a/packages/sdk/test/providers/aws-ebs-resource.test.ts b/packages/sdk/test/providers/aws-ebs-resource.test.ts index 530bb8f..9772664 100644 --- a/packages/sdk/test/providers/aws-ebs-resource.test.ts +++ b/packages/sdk/test/providers/aws-ebs-resource.test.ts @@ -79,20 +79,22 @@ describe('hydrateAwsEbsVolumes', () => { }); it('preserves EC2 API context when volume hydration is throttled', async () => { - mockedCreateEc2Client.mockReturnValue({ - send: vi.fn().mockRejectedValue( - Object.assign(new Error('Rate exceeded'), { - name: 'RequestLimitExceeded', - $metadata: { - httpStatusCode: 503, - requestId: 'request-456', - }, - }), - ), - } as never); - - await expect( - hydrateAwsEbsVolumes([ + vi.useFakeTimers(); + + try { + mockedCreateEc2Client.mockReturnValue({ + send: vi.fn().mockRejectedValue( + Object.assign(new Error('Rate exceeded'), { + name: 'RequestLimitExceeded', + $metadata: { + httpStatusCode: 503, + requestId: 'request-456', + }, + }), + ), + } as never); + + const resultPromise = hydrateAwsEbsVolumes([ { accountId: '123456789012', arn: 'arn:aws:ec2:eu-central-1:123456789012:volume/vol-123', @@ -101,10 +103,16 @@ describe('hydrateAwsEbsVolumes', () => { resourceType: 'ec2:volume', service: 'ec2', }, - ]), - ).rejects.toThrow( - 'Amazon EC2 DescribeVolumes failed in eu-central-1 with RequestLimitExceeded: Rate exceeded Request ID: request-456.', - ); + ]); + + const assertion = expect(resultPromise).rejects.toThrow( + 'Amazon EC2 DescribeVolumes failed in eu-central-1 with RequestLimitExceeded: Rate exceeded Request ID: request-456.', + ); + await vi.runAllTimersAsync(); + await assertion; + } finally { + vi.useRealTimers(); + } }); it('preserves EC2 error identity when volume hydration is access denied', async () => { diff --git a/packages/sdk/test/providers/aws-resource-explorer.test.ts b/packages/sdk/test/providers/aws-resource-explorer.test.ts index 2da350d..a38349e 100644 --- a/packages/sdk/test/providers/aws-resource-explorer.test.ts +++ b/packages/sdk/test/providers/aws-resource-explorer.test.ts @@ -178,84 +178,94 @@ describe('resource explorer discovery', () => { }); it('preserves Resource Explorer operation context when list resources is throttled', async () => { - vi.spyOn(clientModule, 'resolveCurrentAwsRegion').mockResolvedValue('eu-central-1'); - vi.spyOn(clientModule, 'createResourceExplorerClient').mockReturnValue({ - send: vi - .fn() - .mockImplementationOnce(async (command) => { - expect(command.input.Regions).toEqual(['eu-central-1']); - - return { - Indexes: [ - { - Region: 'eu-central-1', - Type: 'AGGREGATOR', - }, - ], - }; - }) - .mockResolvedValueOnce({ - ViewArn: 'arn:aws:resource-explorer-2:eu-central-1:123456789012:view/default', - }) - .mockResolvedValueOnce({ - View: { - Filters: { - FilterString: '', - }, + vi.useFakeTimers(); + + try { + vi.spyOn(clientModule, 'resolveCurrentAwsRegion').mockResolvedValue('eu-central-1'); + vi.spyOn(clientModule, 'createResourceExplorerClient').mockReturnValue({ + send: vi + .fn() + .mockImplementationOnce(async (command) => { + expect(command.input.Regions).toEqual(['eu-central-1']); + + return { + Indexes: [ + { + Region: 'eu-central-1', + Type: 'AGGREGATOR', + }, + ], + }; + }) + .mockResolvedValueOnce({ ViewArn: 'arn:aws:resource-explorer-2:eu-central-1:123456789012:view/default', - }, - }) - .mockRejectedValueOnce( - Object.assign(new Error('Rate exceeded'), { - name: 'ThrottlingException', - $metadata: { - httpStatusCode: 429, - requestId: 'request-123', - }, - }), - ) - .mockRejectedValueOnce( - Object.assign(new Error('Rate exceeded'), { - name: 'ThrottlingException', - $metadata: { - httpStatusCode: 429, - requestId: 'request-123', - }, - }), - ) - .mockRejectedValueOnce( - Object.assign(new Error('Rate exceeded'), { - name: 'ThrottlingException', - $metadata: { - httpStatusCode: 429, - requestId: 'request-123', - }, - }), - ) - .mockRejectedValueOnce( - Object.assign(new Error('Rate exceeded'), { - name: 'ThrottlingException', - $metadata: { - httpStatusCode: 429, - requestId: 'request-123', - }, - }), - ) - .mockRejectedValueOnce( - Object.assign(new Error('Rate exceeded'), { - name: 'ThrottlingException', - $metadata: { - httpStatusCode: 429, - requestId: 'request-123', + }) + .mockResolvedValueOnce({ + View: { + Filters: { + FilterString: '', + }, + ViewArn: 'arn:aws:resource-explorer-2:eu-central-1:123456789012:view/default', }, - }), - ), - } as never); + }) + .mockRejectedValueOnce( + Object.assign(new Error('Rate exceeded'), { + name: 'ThrottlingException', + $metadata: { + httpStatusCode: 429, + requestId: 'request-123', + }, + }), + ) + .mockRejectedValueOnce( + Object.assign(new Error('Rate exceeded'), { + name: 'ThrottlingException', + $metadata: { + httpStatusCode: 429, + requestId: 'request-123', + }, + }), + ) + .mockRejectedValueOnce( + Object.assign(new Error('Rate exceeded'), { + name: 'ThrottlingException', + $metadata: { + httpStatusCode: 429, + requestId: 'request-123', + }, + }), + ) + .mockRejectedValueOnce( + Object.assign(new Error('Rate exceeded'), { + name: 'ThrottlingException', + $metadata: { + httpStatusCode: 429, + requestId: 'request-123', + }, + }), + ) + .mockRejectedValueOnce( + Object.assign(new Error('Rate exceeded'), { + name: 'ThrottlingException', + $metadata: { + httpStatusCode: 429, + requestId: 'request-123', + }, + }), + ), + } as never); - await expect(buildAwsDiscoveryCatalog({ mode: 'current' }, ['ec2:volume'])).rejects.toMatchObject({ - message: - 'AWS Resource Explorer ListResources failed in eu-central-1 with ThrottlingException: Rate exceeded Request ID: request-123.', - }); + const resultPromise = buildAwsDiscoveryCatalog({ mode: 'current' }, ['ec2:volume']); + + const assertion = expect(resultPromise).rejects.toMatchObject({ + message: + 'AWS Resource Explorer ListResources failed in eu-central-1 with ThrottlingException: Rate exceeded Request ID: request-123.', + }); + await vi.runAllTimersAsync(); + await assertion; + } finally { + vi.useRealTimers(); + } }); it('retries throttled list resources calls before succeeding', async () => { diff --git a/packages/sdk/test/providers/aws-resource-utils.test.ts b/packages/sdk/test/providers/aws-resource-utils.test.ts new file mode 100644 index 0000000..d75349a --- /dev/null +++ b/packages/sdk/test/providers/aws-resource-utils.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it, vi } from 'vitest'; +import { withAwsServiceErrorContext } from '../../src/providers/aws/resources/utils.js'; + +const createThrottlingError = (): Error => + Object.assign(new Error('Rate exceeded'), { + $metadata: { + httpStatusCode: 400, + requestId: 'req-throttle', + }, + code: 'ThrottlingException', + name: 'ThrottlingException', + }); + +describe('withAwsServiceErrorContext', () => { + it('retries throttled AWS calls with exponential backoff and jitter before succeeding', async () => { + vi.useFakeTimers(); + vi.spyOn(Math, 'random').mockReturnValue(0.5); + + const execute = vi + .fn<() => Promise>() + .mockRejectedValueOnce(createThrottlingError()) + .mockRejectedValueOnce(createThrottlingError()) + .mockResolvedValue('ok'); + const onRetry = vi.fn(); + + const resultPromise = withAwsServiceErrorContext( + 'Amazon CloudWatch Logs', + 'DescribeMetricFilters', + 'eu-central-1', + execute, + { + initialDelayMs: 200, + maxAttempts: 5, + onRetry, + }, + ); + + await vi.advanceTimersByTimeAsync(300); + await Promise.resolve(); + expect(execute).toHaveBeenCalledTimes(2); + + await vi.advanceTimersByTimeAsync(600); + await expect(resultPromise).resolves.toBe('ok'); + + expect(execute).toHaveBeenCalledTimes(3); + expect(onRetry).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + attempt: 1, + delayMs: 300, + maxAttempts: 5, + }), + ); + expect(onRetry).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + attempt: 2, + delayMs: 600, + maxAttempts: 5, + }), + ); + }); +}); diff --git a/packages/sdk/test/scanner.test.ts b/packages/sdk/test/scanner.test.ts index 9eb772a..e2a0d04 100644 --- a/packages/sdk/test/scanner.test.ts +++ b/packages/sdk/test/scanner.test.ts @@ -249,6 +249,101 @@ describe('CloudBurnClient', () => { }); }); + it('skips rules whose required discovery datasets were unavailable', async () => { + mockedDiscoverAwsResources.mockResolvedValue({ + catalog: discoveryCatalog, + diagnostics: [ + { + code: 'ThrottlingException', + details: + 'Amazon CloudWatch Logs DescribeMetricFilters failed in us-east-1 with ThrottlingException: Rate exceeded Request ID: req-metric-filters.', + message: + 'Skipped cloudwatch discovery in us-east-1 because AWS throttled the required dataset after retrying.', + provider: 'aws', + region: 'us-east-1', + service: 'cloudwatch', + source: 'discovery', + status: 'throttled' as const, + }, + ], + resources: new LiveResourceBag({ + 'aws-cloudwatch-log-groups': [ + { + accountId: '123456789012', + logGroupArn: 'arn:aws:logs:us-east-1:123456789012:log-group:/aws/lambda/app', + logGroupClass: 'STANDARD', + logGroupName: '/aws/lambda/app', + region: 'us-east-1', + storedBytes: 1_073_741_824, + }, + ], + } as never), + unavailableDatasets: new Map([ + [ + 'aws-cloudwatch-log-metric-filter-coverage', + [ + { + code: 'ThrottlingException', + details: + 'Amazon CloudWatch Logs DescribeMetricFilters failed in us-east-1 with ThrottlingException: Rate exceeded Request ID: req-metric-filters.', + message: + 'Skipped cloudwatch discovery in us-east-1 because AWS throttled the required dataset after retrying.', + provider: 'aws' as const, + region: 'us-east-1', + service: 'cloudwatch', + source: 'discovery' as const, + status: 'throttled' as const, + }, + ], + ], + ]), + }); + + const scanner = new CloudBurnClient(); + + const result = await scanner.discover({ + config: { + discovery: { + enabledRules: ['CLDBRN-AWS-CLOUDWATCH-3'], + }, + iac: {}, + }, + target: { + mode: 'regions', + regions: ['us-east-1'], + }, + }); + + expect(result).toEqual({ + diagnostics: [ + { + code: 'ThrottlingException', + details: + 'Amazon CloudWatch Logs DescribeMetricFilters failed in us-east-1 with ThrottlingException: Rate exceeded Request ID: req-metric-filters.', + message: + 'Skipped cloudwatch discovery in us-east-1 because AWS throttled the required dataset after retrying.', + provider: 'aws', + region: 'us-east-1', + service: 'cloudwatch', + source: 'discovery', + status: 'throttled', + }, + { + details: + 'Amazon CloudWatch Logs DescribeMetricFilters failed in us-east-1 with ThrottlingException: Rate exceeded Request ID: req-metric-filters.', + message: + 'Skipped rule CLDBRN-AWS-CLOUDWATCH-3 because required discovery datasets were unavailable: aws-cloudwatch-log-metric-filter-coverage.', + provider: 'aws', + ruleId: 'CLDBRN-AWS-CLOUDWATCH-3', + service: 'cloudwatch', + source: 'discovery', + status: 'skipped', + }, + ], + providers: [], + }); + }); + it('defaults discover to the current region target when none is provided', async () => { mockedDiscoverAwsResources.mockResolvedValue({ catalog: discoveryCatalog, From 0efcc6000081d1526317c6df3cfa29b6fc81fdc0 Mon Sep 17 00:00:00 2001 From: Danny Steenman Date: Thu, 2 Apr 2026 11:14:21 +0200 Subject: [PATCH 2/2] fix(sdk): address PR review findings --- packages/sdk/src/providers/aws/discovery.ts | 9 +- .../sdk/test/providers/aws-discovery.test.ts | 141 +++++++++++++++++- .../test/providers/aws-resource-utils.test.ts | 7 +- 3 files changed, 150 insertions(+), 7 deletions(-) diff --git a/packages/sdk/src/providers/aws/discovery.ts b/packages/sdk/src/providers/aws/discovery.ts index c72013f..0c55d45 100644 --- a/packages/sdk/src/providers/aws/discovery.ts +++ b/packages/sdk/src/providers/aws/discovery.ts @@ -367,7 +367,7 @@ export const discoverAwsResources = async ( return { dataset: [definition.datasetKey, [] as DiscoveryDatasetMap[K]] as [K, DiscoveryDatasetMap[K]], - diagnostics: [buildDatasetFailureDiagnostic(definition.service, catalog.searchRegion, err)], + diagnostics: [buildDatasetFailureDiagnostic(definition.service, undefined, err)], unavailable: true, }; } @@ -407,9 +407,12 @@ export const discoverAwsResources = async ( options?.debugLogger, `aws: dataset ${definition.datasetKey} failed in ${region} after ${formatElapsedMs(regionStartedAtMs)}: ${err instanceof Error ? err.message : String(err)}`, ); - unavailable = true; + const isAccessDenied = isAwsAccessDeniedError(err); + if (!isAccessDenied) { + unavailable = true; + } diagnostics.push( - isAwsAccessDeniedError(err) + isAccessDenied ? { code: getAwsErrorCode(err), details: err instanceof Error ? err.message : String(err), diff --git a/packages/sdk/test/providers/aws-discovery.test.ts b/packages/sdk/test/providers/aws-discovery.test.ts index 04b2e0b..e20738c 100644 --- a/packages/sdk/test/providers/aws-discovery.test.ts +++ b/packages/sdk/test/providers/aws-discovery.test.ts @@ -2495,10 +2495,107 @@ describe('discoverAwsResources', () => { ]); }); + it('continues loading accessible regions when one region is access denied during dataset hydration', async () => { + mockedBuildAwsDiscoveryCatalog.mockResolvedValue({ + indexType: 'LOCAL', + resources: [ + { + accountId: '123456789012', + arn: 'arn:aws:logs:us-east-1:123456789012:log-group:/aws/lambda/app-us-east-1', + properties: [], + region: 'us-east-1', + resourceType: 'logs:log-group', + service: 'logs', + }, + { + accountId: '123456789012', + arn: 'arn:aws:logs:eu-west-1:123456789012:log-group:/aws/lambda/app-eu-west-1', + properties: [], + region: 'eu-west-1', + resourceType: 'logs:log-group', + service: 'logs', + }, + ], + searchRegion: 'us-east-1', + }); + mockedHydrateAwsCloudWatchLogMetricFilterCoverage.mockImplementation(async (resources) => { + const region = resources[0]?.region; + if (region === 'eu-west-1') { + const accessDeniedCause = Object.assign(new Error('Access denied by SCP.'), { + name: 'AccessDeniedException', + $metadata: { + httpStatusCode: 403, + requestId: 'req-log-groups', + }, + }); + + throw new Error( + 'Amazon CloudWatch Logs DescribeMetricFilters failed in eu-west-1 with AccessDeniedException: Access denied by SCP. Request ID: req-log-groups.', + { + cause: accessDeniedCause, + }, + ); + } + + return [ + { + accountId: '123456789012', + hasMetricFilters: true, + logGroupArn: 'arn:aws:logs:us-east-1:123456789012:log-group:/aws/lambda/app-us-east-1', + logGroupName: '/aws/lambda/app-us-east-1', + region: 'us-east-1', + }, + ]; + }); + + const result = await discoverAwsResources( + [ + createRule({ + discoveryDependencies: ['aws-cloudwatch-log-metric-filter-coverage'], + service: 'cloudwatch', + }), + ], + { mode: 'regions', regions: ['us-east-1', 'eu-west-1'] }, + ); + + expect(result.resources.get('aws-cloudwatch-log-metric-filter-coverage')).toEqual([ + { + accountId: '123456789012', + hasMetricFilters: true, + logGroupArn: 'arn:aws:logs:us-east-1:123456789012:log-group:/aws/lambda/app-us-east-1', + logGroupName: '/aws/lambda/app-us-east-1', + region: 'us-east-1', + }, + ]); + expect(result.diagnostics).toEqual([ + { + code: 'AccessDeniedException', + details: + 'Amazon CloudWatch Logs DescribeMetricFilters failed in eu-west-1 with AccessDeniedException: Access denied by SCP. Request ID: req-log-groups.', + message: + 'Skipped cloudwatch discovery in eu-west-1 because access is denied by a service control policy (SCP).', + provider: 'aws', + region: 'eu-west-1', + service: 'cloudwatch', + source: 'discovery', + status: 'access_denied', + }, + ]); + }); + it('records a non-fatal throttling diagnostic instead of aborting the run when a dataset exhausts retries', async () => { mockedBuildAwsDiscoveryCatalog.mockResolvedValue({ indexType: 'LOCAL', - resources: [catalog.resources[7]], + resources: [ + { + accountId: '123456789012', + arn: 'arn:aws:logs:eu-central-1:123456789012:log-group:/aws/lambda/app', + properties: [], + region: 'eu-central-1', + resourceType: 'logs:log-group', + service: 'logs', + }, + ], searchRegion: 'eu-central-1', }); mockedHydrateAwsCloudWatchLogMetricFilterCoverage.mockRejectedValue( @@ -2533,9 +2630,10 @@ describe('discoverAwsResources', () => { code: 'ThrottlingException', details: 'Amazon CloudWatch Logs DescribeMetricFilters failed in eu-central-1 with ThrottlingException: Rate exceeded Request ID: req-metric-filters.', - message: 'Skipped cloudwatch discovery in us-east-1 because AWS throttled the required dataset after retrying.', + message: + 'Skipped cloudwatch discovery in eu-central-1 because AWS throttled the required dataset after retrying.', provider: 'aws', - region: 'us-east-1', + region: 'eu-central-1', service: 'cloudwatch', source: 'discovery', status: 'throttled', @@ -2543,6 +2641,43 @@ describe('discoverAwsResources', () => { ]); }); + it('omits the region from account-scoped dataset diagnostics when the loader fails', async () => { + mockedResolveCurrentAwsRegion.mockResolvedValue('eu-west-1'); + mockedHydrateAwsCostUsage.mockRejectedValue( + Object.assign(new Error('AWS Cost Explorer GetCostAndUsage failed with ThrottlingException: Rate exceeded.'), { + code: 'ThrottlingException', + name: 'ThrottlingException', + $metadata: { + httpStatusCode: 400, + requestId: 'req-cost', + }, + }), + ); + + const result = await discoverAwsResources( + [ + createRule({ + service: 'costexplorer', + discoveryDependencies: ['aws-cost-usage'], + }), + ], + { mode: 'regions', regions: ['eu-west-1'] }, + ); + + expect(result.resources.get('aws-cost-usage')).toEqual([]); + expect(result.diagnostics).toEqual([ + { + code: 'ThrottlingException', + details: 'AWS Cost Explorer GetCostAndUsage failed with ThrottlingException: Rate exceeded.', + message: 'Skipped costexplorer discovery because AWS throttled the required dataset after retrying.', + provider: 'aws', + service: 'costexplorer', + source: 'discovery', + status: 'throttled', + }, + ]); + }); + it('merges very large dataset loads without overflowing the call stack', async () => { mockedBuildAwsDiscoveryCatalog.mockResolvedValue({ indexType: 'LOCAL', diff --git a/packages/sdk/test/providers/aws-resource-utils.test.ts b/packages/sdk/test/providers/aws-resource-utils.test.ts index d75349a..703de27 100644 --- a/packages/sdk/test/providers/aws-resource-utils.test.ts +++ b/packages/sdk/test/providers/aws-resource-utils.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import { withAwsServiceErrorContext } from '../../src/providers/aws/resources/utils.js'; const createThrottlingError = (): Error => @@ -12,6 +12,11 @@ const createThrottlingError = (): Error => }); describe('withAwsServiceErrorContext', () => { + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + it('retries throttled AWS calls with exponential backoff and jitter before succeeding', async () => { vi.useFakeTimers(); vi.spyOn(Math, 'random').mockReturnValue(0.5);