From ba3ad251174fa694ae52d079a49801de84db2bc1 Mon Sep 17 00:00:00 2001 From: Automation-Dude <144459672+Automation-Dude@users.noreply.github.com> Date: Tue, 24 Mar 2026 13:39:25 -0600 Subject: [PATCH] feat(scanner): add inline suppression annotations Add two comment-based suppression directives to CodeScanner that let developers mark intentional AI SDK usage, preventing false positives without having to exclude entire directories via .ai-bomignore. New annotations --------------- # ai-bom: ignore place at end of any line; suppresses SDK component detection on that line only. # ai-bom: ignore-file place anywhere in the first 5 lines of a file; suppresses SDK component detection for the entire file. Scope of suppression (deliberate design decision) -------------------------------------------------- Suppression applies ONLY to SDK/component detection (shadow AI, import patterns, usage patterns). Hardcoded API key detection is unconditional and fires regardless of any annotation. Why this boundary? Our first instinct was to suppress everything on an annotated line -- if a developer says 'ignore this', ignore everything. We challenged that thinking: a hardcoded credential leak is a security finding, not a false positive. A developer tagging an import as intentional should not inadvertently silence a leaked key sitting on the same line. The two concerns are categorically different: - SDK detection: 'Is this library present?' Can be intentional and already reviewed. False positives here are routine. - Hardcoded key: 'Is there a live credential in source?' Cannot be intentional in any safe codebase. False positives here are rare and the cost of a missed true positive is severe. Suppressing shadow AI detection does not break the security intent of the tool. Suppressing credential detection would. This mirrors how comparable tools handle the same tension: - bandit's #nosec suppresses rule violations but the project README explicitly warns against suppressing credential rules - detect-secrets has a separate allow-list mechanism specifically to prevent inline suppression of secrets - semgrep's nosemgrep follows the same pattern Idiomatic precedent ------------------- The annotation pattern follows established Python ecosystem conventions: flake8 / ruff -> # noqa mypy -> # type: ignore bandit -> # nosec Use cases --------- - Tool self-scans: pattern definition files contain SDK names as string literals (e.g. ai-bom's own detectors/ directory). These are the detection engine, not live SDK usage. - Test harnesses that import AI SDKs to test detection behavior. - Compatibility shim code that references SDK names for feature detection without loading them at runtime. - Documentation generators that embed SDK names as examples. Tests added (8 new cases in TestInlineSuppression) --------------------------------------------------- - Line-level suppression suppresses SDK detection - Line-level suppression does NOT suppress hardcoded API key - File-level suppression does NOT suppress hardcoded API key - Suppressed lines do not affect other lines in the same file - File-level suppression skips all SDK detection in the file - File-level annotation honoured on lines 2-5, not only line 1 - File-level annotation after line 5 has no effect - Files without any annotation are unaffected --- src/ai_bom/scanners/code_scanner.py | 23 ++++- tests/test_scanners/test_code_scanner.py | 104 +++++++++++++++++++++++ 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/src/ai_bom/scanners/code_scanner.py b/src/ai_bom/scanners/code_scanner.py index f7c1625..958a22e 100644 --- a/src/ai_bom/scanners/code_scanner.py +++ b/src/ai_bom/scanners/code_scanner.py @@ -208,8 +208,14 @@ def _scan_single_source_file( except Exception: return components lines = content.splitlines() + + # File-level suppression: any of the first 5 lines contains "# ai-bom: ignore-file". + # Only SDK component detection is suppressed -- API key detection always runs. + suppress_sdk_file = any("# ai-bom: ignore-file" in ln for ln in lines[:5]) + file_seen_sdks: set[str] = set() for line_num, line in enumerate(lines, start=1): + # API key detection always runs; it cannot be suppressed by inline annotations. api_key_results = detect_api_key(line) for _, provider, _ in api_key_results: component = AIComponent( @@ -226,6 +232,12 @@ def _scan_single_source_file( source="code", ) components.append(component) + + # Inline suppression: "# ai-bom: ignore" skips SDK detection on this line only. + # File-level "# ai-bom: ignore-file" skips SDK detection for the entire file. + if suppress_sdk_file or "# ai-bom: ignore" in line: + continue + for pat in LLM_PATTERNS: import_matched = any(re.search(ip, line) for ip in pat.import_patterns) usage_matched = any(re.search(up, line) for up in pat.usage_patterns) @@ -429,12 +441,16 @@ def _scan_source_files( lines = content.splitlines() + # File-level suppression: "# ai-bom: ignore-file" in first 5 lines. + # Only SDK component detection is suppressed -- API key detection always runs. + suppress_sdk_file = any("# ai-bom: ignore-file" in ln for ln in lines[:5]) + # Track seen SDKs in this file for deduplication file_seen_sdks: set[str] = set() # Scan file line by line for line_num, line in enumerate(lines, start=1): - # Check for API keys + # API key detection always runs; it cannot be suppressed by inline annotations. api_key_results = detect_api_key(line) for _, provider, _ in api_key_results: component = AIComponent( @@ -452,6 +468,11 @@ def _scan_source_files( ) components.append(component) + # Inline suppression: "# ai-bom: ignore" skips SDK detection on this line only. + # File-level "# ai-bom: ignore-file" skips SDK detection for the entire file. + if suppress_sdk_file or "# ai-bom: ignore" in line: + continue + # Check each LLM pattern for llm_pat in LLM_PATTERNS: # Check import patterns diff --git a/tests/test_scanners/test_code_scanner.py b/tests/test_scanners/test_code_scanner.py index 3fe9d2c..b9c4841 100644 --- a/tests/test_scanners/test_code_scanner.py +++ b/tests/test_scanners/test_code_scanner.py @@ -293,6 +293,110 @@ def test_determine_component_type_llm_default(self, scanner): assert comp_type == ComponentType.llm_provider +class TestInlineSuppression: + """Tests for # ai-bom: ignore and # ai-bom: ignore-file suppression annotations. + + These annotations let developers mark intentional AI usage so the scanner + skips those lines or files, eliminating false positives without excluding + entire directory subtrees via .ai-bomignore. + + Syntax: + - ``# ai-bom: ignore`` -- place at end of any line; skips that line only + - ``# ai-bom: ignore-file`` -- place in first 5 lines; skips the entire file + """ + + def test_inline_ignore_suppresses_sdk_detection(self, scanner, tmp_path): + """A line tagged with # ai-bom: ignore should not produce any component.""" + f = tmp_path / "app.py" + f.write_text("import openai # ai-bom: ignore\n") + components = scanner.scan(tmp_path) + assert not any("openai" in c.name.lower() for c in components) + + def test_inline_ignore_only_suppresses_tagged_line(self, scanner, tmp_path): + """Untagged lines in the same file are still detected normally.""" + f = tmp_path / "app.py" + f.write_text( + "import openai # ai-bom: ignore\n" + "import anthropic\n" + ) + components = scanner.scan(tmp_path) + names_lower = [c.name.lower() for c in components] + assert not any("openai" in n for n in names_lower), "suppressed openai should not appear" + assert any("anthropic" in n for n in names_lower), "unsuppressed anthropic should appear" + + def test_inline_ignore_does_not_suppress_hardcoded_api_key(self, scanner, tmp_path): + """# ai-bom: ignore suppresses SDK detection but NEVER suppresses API key findings. + + Security findings (hardcoded_api_key) are unconditional -- they fire regardless + of any suppression annotation. A developer annotating an import as intentional + must not inadvertently silence a credential leak on the same line. + """ + f = tmp_path / "app.py" + f.write_text( + 'API_KEY = "sk-test1234567890abcdefghijklmnopqrstuvwxyz" # ai-bom: ignore\n' + ) + components = scanner.scan(tmp_path) + # The hardcoded key MUST still be reported even though the line is annotated + assert any("hardcoded_api_key" in c.flags for c in components) + + def test_ignore_file_does_not_suppress_hardcoded_api_key(self, scanner, tmp_path): + """# ai-bom: ignore-file suppresses SDK detection but NEVER suppresses API key findings.""" + f = tmp_path / "app.py" + f.write_text( + "# ai-bom: ignore-file\n" + 'API_KEY = "sk-test1234567890abcdefghijklmnopqrstuvwxyz"\n' + ) + components = scanner.scan(tmp_path) + assert any("hardcoded_api_key" in c.flags for c in components) + + def test_ignore_file_annotation_suppresses_entire_file(self, scanner, tmp_path): + """# ai-bom: ignore-file in the first 5 lines causes the whole file to be skipped.""" + f = tmp_path / "app.py" + f.write_text( + "# ai-bom: ignore-file\n" + "import openai\n" + "import anthropic\n" + "from langchain import LangChain\n" + ) + components = scanner.scan(tmp_path) + assert components == [], "file-level suppression should produce zero components" + + def test_ignore_file_works_within_first_five_lines(self, scanner, tmp_path): + """# ai-bom: ignore-file is honoured when placed on lines 2-5, not just line 1.""" + f = tmp_path / "app.py" + f.write_text( + '"""Module docstring."""\n' + "# ai-bom: ignore-file\n" + "import openai\n" + ) + components = scanner.scan(tmp_path) + assert components == [] + + def test_ignore_file_after_line_five_is_not_honoured(self, scanner, tmp_path): + """# ai-bom: ignore-file placed after line 5 should NOT suppress the file.""" + f = tmp_path / "app.py" + f.write_text( + "# line 1\n" + "# line 2\n" + "# line 3\n" + "# line 4\n" + "# line 5\n" + "# ai-bom: ignore-file\n" # line 6 -- too late + "import openai\n" + ) + components = scanner.scan(tmp_path) + assert any("openai" in c.name.lower() for c in components) + + def test_files_without_annotation_unaffected(self, scanner, tmp_path): + """Normal files without any annotation continue to be scanned as before.""" + f = tmp_path / "app.py" + f.write_text("import openai\nimport anthropic\n") + components = scanner.scan(tmp_path) + names_lower = [c.name.lower() for c in components] + assert any("openai" in n for n in names_lower) + assert any("anthropic" in n for n in names_lower) + + class TestIsModelPinned: def test_is_model_pinned_with_date(self, scanner): assert scanner._is_model_pinned("gpt-4-0314")