From 3289ce4a14e8bd6fe66e208e53b4f0d243a435a1 Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Mon, 23 Feb 2026 10:31:09 +0100 Subject: [PATCH 1/8] static-analysis selects all or important queries --- .claude-plugin/marketplace.json | 2 +- .../.claude-plugin/plugin.json | 2 +- .../static-analysis/agents/semgrep-scanner.md | 2 +- .../static-analysis/agents/semgrep-triager.md | 2 +- .../static-analysis/skills/codeql/SKILL.md | 14 +- .../codeql/references/important-only-suite.md | 129 ++++++++++++++ .../skills/codeql/workflows/run-analysis.md | 158 ++++++++++++++---- .../skills/sarif-parsing/SKILL.md | 4 +- .../static-analysis/skills/semgrep/SKILL.md | 98 +++++++++-- .../skills/semgrep/references/scan-modes.md | 108 ++++++++++++ .../semgrep/references/scanner-task-prompt.md | 9 +- 11 files changed, 462 insertions(+), 66 deletions(-) create mode 100644 plugins/static-analysis/skills/codeql/references/important-only-suite.md create mode 100644 plugins/static-analysis/skills/semgrep/references/scan-modes.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 2a0970e..421b3c8 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -187,7 +187,7 @@ }, { "name": "static-analysis", - "version": "1.1.0", + "version": "1.2.0", "description": "Static analysis toolkit with CodeQL, Semgrep, and SARIF parsing for security vulnerability detection", "author": { "name": "Axel Mierczuk & Paweł Płatek" diff --git a/plugins/static-analysis/.claude-plugin/plugin.json b/plugins/static-analysis/.claude-plugin/plugin.json index 1e69cf1..b768b1d 100644 --- a/plugins/static-analysis/.claude-plugin/plugin.json +++ b/plugins/static-analysis/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "static-analysis", - "version": "1.1.0", + "version": "1.2.0", "description": "Static analysis toolkit with CodeQL, Semgrep, and SARIF parsing for security vulnerability detection", "author": { "name": "Axel Mierczuk & Paweł Płatek" diff --git a/plugins/static-analysis/agents/semgrep-scanner.md b/plugins/static-analysis/agents/semgrep-scanner.md index 4c97c43..d01bd7e 100644 --- a/plugins/static-analysis/agents/semgrep-scanner.md +++ b/plugins/static-analysis/agents/semgrep-scanner.md @@ -68,4 +68,4 @@ After all scans complete, report: For the complete scanner task prompt template with variable substitutions and examples, see: -`{baseDir}/skills/semgrep/references/scanner-task-prompt.md` +`skills/semgrep/references/scanner-task-prompt.md` diff --git a/plugins/static-analysis/agents/semgrep-triager.md b/plugins/static-analysis/agents/semgrep-triager.md index 0b31480..c6798cd 100644 --- a/plugins/static-analysis/agents/semgrep-triager.md +++ b/plugins/static-analysis/agents/semgrep-triager.md @@ -104,4 +104,4 @@ After triage, provide a summary: For the complete triage task prompt template with variable substitutions and examples, see: -`{baseDir}/skills/semgrep/references/triage-task-prompt.md` +`skills/semgrep/references/triage-task-prompt.md` diff --git a/plugins/static-analysis/skills/codeql/SKILL.md b/plugins/static-analysis/skills/codeql/SKILL.md index 7d87d2a..8d88699 100644 --- a/plugins/static-analysis/skills/codeql/SKILL.md +++ b/plugins/static-analysis/skills/codeql/SKILL.md @@ -2,11 +2,13 @@ name: codeql description: >- Runs CodeQL static analysis for security vulnerability detection - using interprocedural data flow and taint tracking. Applicable when - finding vulnerabilities, running a security scan, performing a security - audit, running CodeQL, building a CodeQL database, selecting query - rulesets, creating data extension models, or processing CodeQL SARIF - output. NOT for writing custom QL queries or CI/CD pipeline setup. + using interprocedural data flow and taint tracking. Supports two + scan modes - "run all" (all queries from all packs, unfiltered) and + "important only" (security vulnerabilities filtered by precision and severity). Applicable + when finding vulnerabilities, running a security scan, performing a + security audit, running CodeQL, building a CodeQL database, selecting + query rulesets, creating data extension models, or processing CodeQL + SARIF output. NOT for writing custom QL queries or CI/CD pipeline setup. allowed-tools: - Bash - Read @@ -24,7 +26,7 @@ allowed-tools: Supported languages: Python, JavaScript/TypeScript, Go, Java/Kotlin, C/C++, C#, Ruby, Swift. -**Skill resources:** Reference files and templates are located at `{baseDir}/references/` and `{baseDir}/workflows/`. Use `{baseDir}` to resolve paths to these files at runtime. +**Skill resources:** Reference files and templates are located at `references/` and `workflows/` (relative to this skill directory). ## Quick Start diff --git a/plugins/static-analysis/skills/codeql/references/important-only-suite.md b/plugins/static-analysis/skills/codeql/references/important-only-suite.md new file mode 100644 index 0000000..cd6af79 --- /dev/null +++ b/plugins/static-analysis/skills/codeql/references/important-only-suite.md @@ -0,0 +1,129 @@ +# Important-Only Query Suite + +In important-only mode, generate a custom `.qls` query suite file at runtime. This applies the same precision/severity filtering to **all** packs (official + third-party). + +## Why a Custom Suite + +The built-in `security-extended` suite only applies to the official `codeql/-queries` pack. Third-party packs (Trail of Bits, Community Packs) run unfiltered when passed directly to `codeql database analyze`. A custom `.qls` suite loads queries from all packs and applies a single set of `include`/`exclude` filters uniformly. + +## Metadata Criteria + +Queries are included if they match **any** of these blocks (OR logic across blocks, AND logic within): + +| Block | kind | precision | problem.severity | tags | +|-------|------|-----------|-----------------|------| +| 1 | `problem`, `path-problem` | `high`, `very-high` | *(any)* | must contain `security` | +| 2 | `problem`, `path-problem` | `medium` | `error` only | must contain `security` | + +Excluded: deprecated queries, model editor/generator queries. Experimental queries are **included**. + +**Key difference from `security-extended`:** Medium-precision queries require `error` severity (not `warning`). This tightens the filter to only include medium-precision findings that indicate likely incorrect behavior. + +## Suite Template + +Generate this file as `important-only.qls` in the results directory before running analysis: + +```yaml +- description: Important-only — security vulnerabilities, medium-high confidence +# Official queries +- queries: . + from: codeql/-queries +# Third-party packs (include only if installed, one entry per pack) +# - queries: . +# from: trailofbits/-queries +# - queries: . +# from: GitHubSecurityLab/CodeQL-Community-Packs- +# Filtering: security only, high/very-high precision (any severity), +# medium precision (error only). Experimental queries included. +- include: + kind: + - problem + - path-problem + precision: + - high + - very-high + tags contain: + - security +- include: + kind: + - problem + - path-problem + precision: + - medium + problem.severity: + - error + tags contain: + - security +- exclude: + deprecated: // +- exclude: + tags contain: + - modeleditor + - modelgenerator +``` + +## Generation Script + +The agent should generate the suite file dynamically based on installed packs: + +```bash +RESULTS_DIR="${DB_NAME%.db}-results" +SUITE_FILE="$RESULTS_DIR/important-only.qls" + +cat > "$SUITE_FILE" << 'HEADER' +- description: Important-only — security vulnerabilities, medium-high confidence +HEADER + +# Always include official pack +echo "- queries: . + from: codeql/${LANG}-queries" >> "$SUITE_FILE" + +# Add each installed third-party pack +for PACK in $INSTALLED_THIRD_PARTY_PACKS; do + echo "- queries: . + from: ${PACK}" >> "$SUITE_FILE" +done + +# Append the filtering rules +cat >> "$SUITE_FILE" << 'FILTERS' +- include: + kind: + - problem + - path-problem + precision: + - high + - very-high + tags contain: + - security +- include: + kind: + - problem + - path-problem + precision: + - medium + problem.severity: + - error + tags contain: + - security +- exclude: + deprecated: // +- exclude: + tags contain: + - modeleditor + - modelgenerator +FILTERS + +# Verify the suite resolves correctly +codeql resolve queries "$SUITE_FILE" | head -20 +echo "Suite generated: $SUITE_FILE" +``` + +## How Filtering Works on Third-Party Queries + +CodeQL query suite filters match on query metadata (`@precision`, `@problem.severity`, `@tags`). Third-party queries that: + +- **Have proper metadata**: Filtered normally (kept if they match the include criteria) +- **Lack `@precision`**: Excluded by `include` blocks (they require precision to match). This is correct — if a query doesn't declare its precision, we cannot assess its confidence. +- **Lack `@tags security`**: Excluded. Non-security queries are not relevant to important-only mode. + +This is a stricter-than-necessary filter for third-party packs, but it ensures only well-annotated, high-confidence security queries run in important-only mode. diff --git a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md index f2c7735..14ff9c2 100644 --- a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md +++ b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md @@ -2,13 +2,39 @@ Execute CodeQL security queries on an existing database with ruleset selection and result formatting. +## Scan Modes + +Two modes control analysis scope. Select mode in Step 2 (before pack selection). + +| Mode | Packs | Filtering | +|------|-------|-----------| +| **Run all** | All installed packs (official + Trail of Bits + Community) | None — all queries run | +| **Important only** | All installed packs (official + Trail of Bits + Community) | Custom suite: security-only, medium-high precision, error severity for medium precision | + +**Run all** passes all installed query packs directly to `codeql database analyze` without suite filtering. Every query in every pack runs. + +**Important only** generates a custom `.qls` query suite at runtime that loads all installed packs and applies uniform filtering. See [important-only-suite.md](../references/important-only-suite.md) for the suite template and generation script. + +| Metadata | Important-only criteria | +|---|---| +| `@tags` | Must contain `security` (excludes correctness, maintainability, readability) | +| `@precision` high/very-high | Included at any `@problem.severity` | +| `@precision` medium | Included only with `@problem.severity: error` (not `warning`) | +| `@precision` low | Excluded | +| Experimental | Included (both modes run experimental queries) | +| Diagnostic / metric | Excluded (both modes skip non-alert queries) | + +Third-party queries without `@precision` or `@tags security` metadata are excluded — if a query doesn't declare its confidence, we cannot assess it for important-only mode. + +--- + ## Task System Create these tasks on workflow start: ``` TaskCreate: "Select database and detect language" (Step 1) -TaskCreate: "Check additional query packs and detect model packs" (Step 2) - blockedBy: Step 1 +TaskCreate: "Select scan mode, check additional packs" (Step 2) - blockedBy: Step 1 TaskCreate: "Select query packs, model packs, and threat models" (Step 3) - blockedBy: Step 2 TaskCreate: "Execute analysis" (Step 4) - blockedBy: Step 3 TaskCreate: "Process and report results" (Step 5) - blockedBy: Step 4 @@ -18,7 +44,7 @@ TaskCreate: "Process and report results" (Step 5) - blockedBy: Step 4 | Task | Gate Type | Cannot Proceed Until | |------|-----------|---------------------| -| Step 2 | **SOFT GATE** | User confirms installed/ignored for each missing pack | +| Step 2 | **SOFT GATE** | User selects mode; confirms installed/ignored for each missing pack | | Step 3 | **HARD GATE** | User approves query packs, model packs, and threat model selection | --- @@ -83,11 +109,28 @@ fi --- -### Step 2: Check Additional Query Packs and Detect Model Packs +### Step 2: Select Scan Mode, Check Additional Packs + +#### 2a: Select Scan Mode + +Use `AskUserQuestion`: + +``` +header: "Scan Mode" +question: "Which scan mode should be used?" +multiSelect: false +options: + - label: "Run all (Recommended)" + description: "Maximum coverage — all queries from all installed packs, unfiltered" + - label: "Important only" + description: "Security vulnerabilities only — all packs filtered by custom suite (medium-high precision, error severity)" +``` + +Record the selected mode. It affects Steps 3 and 4. -Check if recommended third-party query packs are installed and detect available model packs. For each missing pack, prompt user to install or ignore. +In both modes, check and install third-party packs below. Both modes use all installed packs — the difference is whether filtering is applied. -#### 2a: Query Packs +#### 2b: Query Packs **Available packs by language** (see [ruleset-catalog.md](../references/ruleset-catalog.md)): @@ -127,7 +170,7 @@ codeql pack download **On "Ignore":** Mark pack as skipped, continue to next pack. -#### 2b: Detect Model Packs +#### 2c: Detect Model Packs Model packs contain data extensions (custom sources, sinks, flow summaries) that improve CodeQL's data flow analysis for project-specific or framework-specific APIs. To create new extensions, run the [create-data-extensions](create-data-extensions.md) workflow first. @@ -159,7 +202,7 @@ rg -l '^extensions:' --glob '*.yml' --glob '!codeql_*.db/**' | head -20 codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' ``` -**Record all detected model packs for presentation in Step 3.** If no model packs are found, note this and proceed — model packs are optional. +**Record all detected model packs for presentation in Step 3.** If no model packs are found, note this and proceed — model packs are optional. Model packs are included in both scan modes since they improve data flow analysis quality without adding noise. --- @@ -169,26 +212,40 @@ codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' > > Present all available packs as checklists. Query packs first, then model packs. -#### 3a: Select Query Packs +#### 3a: Confirm Query Packs -Use `AskUserQuestion` tool with `multiSelect: true`: +**If scan mode is "Important only":** All installed packs will be included with metadata filtering via a custom query suite. Inform the user: + +``` +**Scan mode: Important only** +All installed packs included, filtered by custom query suite: +- Official: codeql/-queries (security queries, medium-high precision) +- Trail of Bits: trailofbits/-queries [if installed] +- Community: GitHubSecurityLab/CodeQL-Community-Packs- [if installed] + +Filtering: security tag required, high/very-high precision (any severity), +medium precision (error severity only). Experimental queries included. +Third-party queries without @precision or @tags metadata are excluded. +``` + +See [important-only-suite.md](../references/important-only-suite.md) for the suite template. + +Proceed directly to 3b (model packs). + +**If scan mode is "Run all":** All installed packs run without query suite filtering. Use `AskUserQuestion` to confirm: ``` header: "Query Packs" -question: "Select query packs to run:" +question: "All installed query packs will run unfiltered. Confirm or select individually:" multiSelect: false options: - label: "Use all (Recommended)" - description: "Run all installed query packs for maximum coverage" - - label: "security-extended" - description: "codeql/-queries - Core security queries, low false positives" - - label: "security-and-quality" - description: "Includes code quality checks - more findings, more noise" + description: "Run all queries from all installed packs — maximum coverage" - label: "Select individually" description: "Choose specific packs from the full list" ``` -**If "Use all":** Include all installed query packs: `security-extended` + Trail of Bits + Community Packs (whichever are installed). +**If "Use all":** Include all installed packs: official `codeql/-queries` + Trail of Bits + Community Packs. No suite filtering — every query runs. **If "Select individually":** Follow up with a `multiSelect: true` question listing all installed packs: @@ -197,27 +254,23 @@ header: "Query Packs" question: "Select query packs to run:" multiSelect: true options: - - label: "security-extended" - description: "codeql/-queries - Core security queries, low false positives" - - label: "security-and-quality" - description: "Includes code quality checks - more findings, more noise" - - label: "security-experimental" - description: "Bleeding-edge queries - may have higher false positives" + - label: "codeql/-queries" + description: "Official CodeQL queries (all queries, no suite filtering)" - label: "Trail of Bits" description: "trailofbits/-queries - Memory safety, domain expertise" - label: "Community Packs" description: "GitHubSecurityLab/CodeQL-Community-Packs- - Additional security queries" ``` -**Only show built-in and third-party packs that are installed (from Step 2a)** +**Only show packs that are installed (from Step 2b)** **⛔ STOP: Await user selection** #### 3b: Select Model Packs (if any detected) -**Skip this sub-step if no model packs were detected in Step 2b.** +**Skip this sub-step if no model packs were detected in Step 2c.** -Present detected model packs from Step 2b. Categorize by source: +Present detected model packs from Step 2c. Categorize by source: Use `AskUserQuestion` tool: @@ -234,7 +287,7 @@ options: description: "Run without model packs" ``` -**If "Use all":** Include all model packs and data extensions detected in Step 2b. +**If "Use all":** Include all model packs and data extensions detected in Step 2c. **If "Select individually":** Follow up with a `multiSelect: true` question: @@ -243,13 +296,13 @@ header: "Model Packs" question: "Select model packs to include:" multiSelect: true options: - # For each in-repo model pack found in 2b: + # For each in-repo model pack found in 2c: - label: "" description: "In-repo model pack at - custom data flow models" - # For each standalone data extension found in 2b: + # For each standalone data extension found in 2c: - label: "In-repo extensions" description: " data extension files found in codebase (auto-discovered)" - # For each installed model pack found in 2b: + # For each installed model pack found in 2c: - label: "" description: "Installed model pack - " ``` @@ -313,15 +366,45 @@ THREAT_MODEL_FLAG="" # or "--threat-models=remote,local" etc. ### Step 4: Execute Analysis -Run analysis with **only** the packs selected by user in Step 3. +Run analysis using the approach determined by scan mode. + +#### Important-only mode: Generate custom suite + +Generate the custom `.qls` suite file that includes all installed packs with filtering. See [important-only-suite.md](../references/important-only-suite.md) for the full template and generation script. + +```bash +RESULTS_DIR="${DB_NAME%.db}-results" +mkdir -p "$RESULTS_DIR" +SUITE_FILE="$RESULTS_DIR/important-only.qls" + +# Generate suite — see important-only-suite.md for complete script +# The suite loads all installed packs and applies security+precision filtering + +# Verify suite resolves correctly before running +codeql resolve queries "$SUITE_FILE" | wc -l +``` + +Then run analysis with the generated suite: + +```bash +codeql database analyze $DB_NAME \ + --format=sarif-latest \ + --output="$RESULTS_DIR/results.sarif" \ + --threads=0 \ + $THREAT_MODEL_FLAG \ + $MODEL_PACK_FLAGS \ + $ADDITIONAL_PACK_FLAGS \ + -- "$SUITE_FILE" +``` + +#### Run-all mode: Pass packs directly ```bash -# Results directory matches database name RESULTS_DIR="${DB_NAME%.db}-results" mkdir -p "$RESULTS_DIR" -# Build pack list from user selections in Step 3a -PACKS="" +# Build pack list — all installed packs, no suite filtering +PACKS="" # Build model pack flags from user selections in Step 3b # --model-packs for installed model packs @@ -350,7 +433,7 @@ codeql database analyze $DB_NAME \ | In-repo model packs (with `qlpack.yml`) | `--additional-packs` | `--additional-packs=./lib/codeql-models` | | In-repo standalone extensions (`.yml`) | `--additional-packs` | `--additional-packs=.` | -**Example (C++ with query packs and model packs):** +**Example (C++ run-all mode):** ```bash codeql database analyze codeql_1.db \ @@ -358,12 +441,12 @@ codeql database analyze codeql_1.db \ --output=codeql_1-results/results.sarif \ --threads=0 \ --additional-packs=./codeql-models \ - -- codeql/cpp-queries:codeql-suites/cpp-security-extended.qls \ + -- codeql/cpp-queries \ trailofbits/cpp-queries \ GitHubSecurityLab/CodeQL-Community-Packs-CPP ``` -**Example (Python with installed model pack):** +**Example (Python important-only mode with custom suite):** ```bash codeql database analyze codeql_1.db \ @@ -371,7 +454,7 @@ codeql database analyze codeql_1.db \ --output=codeql_1-results/results.sarif \ --threads=0 \ --model-packs=myorg/python-models \ - -- codeql/python-queries:codeql-suites/python-security-extended.qls + -- codeql_1-results/important-only.qls ``` ### Performance Flags @@ -421,6 +504,7 @@ Report to user: **Database:** $DB_NAME **Language:** +**Scan mode:** Run all | Important only **Query packs:** **Model packs:** **Threat models:** diff --git a/plugins/static-analysis/skills/sarif-parsing/SKILL.md b/plugins/static-analysis/skills/sarif-parsing/SKILL.md index 577c6dc..e3d7a5e 100644 --- a/plugins/static-analysis/skills/sarif-parsing/SKILL.md +++ b/plugins/static-analysis/skills/sarif-parsing/SKILL.md @@ -459,11 +459,11 @@ def check_for_regressions(baseline: str, current: str) -> int: ## Skill Resources -For ready-to-use query templates, see [{baseDir}/resources/jq-queries.md]({baseDir}/resources/jq-queries.md): +For ready-to-use query templates, see [resources/jq-queries.md](resources/jq-queries.md): - 40+ jq queries for common SARIF operations - Severity filtering, rule extraction, aggregation patterns -For Python utilities, see [{baseDir}/resources/sarif_helpers.py]({baseDir}/resources/sarif_helpers.py): +For Python utilities, see [resources/sarif_helpers.py](resources/sarif_helpers.py): - `normalize_path()` - Handle tool-specific path formats - `compute_fingerprint()` - Stable fingerprinting ignoring paths - `deduplicate_results()` - Remove duplicates across runs diff --git a/plugins/static-analysis/skills/semgrep/SKILL.md b/plugins/static-analysis/skills/semgrep/SKILL.md index f91fd8c..144de44 100644 --- a/plugins/static-analysis/skills/semgrep/SKILL.md +++ b/plugins/static-analysis/skills/semgrep/SKILL.md @@ -1,9 +1,11 @@ --- name: semgrep -description: Run Semgrep static analysis scan on a codebase using parallel subagents. Automatically - detects and uses Semgrep Pro for cross-file analysis when available. Use when asked to scan - code for vulnerabilities, run a security audit with Semgrep, find bugs, or perform - static analysis. Spawns parallel workers for multi-language codebases and triage. +description: Run Semgrep static analysis scan on a codebase using parallel subagents. Supports + two scan modes - "run all" (full coverage) and "important only" (high-confidence security + vulnerabilities). Automatically detects and uses Semgrep Pro for cross-file analysis when + available. Use when asked to scan code for vulnerabilities, run a security audit with Semgrep, + find bugs, or perform static analysis. Spawns parallel workers for multi-language codebases + and triage. allowed-tools: - Bash - Read @@ -61,6 +63,23 @@ Pro enables: cross-file taint tracking, inter-procedural analysis, and additiona --- +## Scan Modes + +Two modes control scan scope and result filtering. Select mode early in the workflow (Step 2). + +| Mode | Coverage | Findings Reported | +|------|----------|-------------------| +| **Run all** | All rulesets, all severity levels | Everything (triaged for true/false positives) | +| **Important only** | All rulesets, but pre-filtered and post-filtered | Security vulnerabilities only, medium-high confidence and impact | + +**Important only** applies two layers of filtering: +1. **Pre-filter**: `--severity MEDIUM --severity HIGH --severity CRITICAL` (CLI flag, excludes LOW/INFO at scan time) +2. **Post-filter**: JSON metadata filtering — keeps only findings where `category=security`, `confidence∈{MEDIUM,HIGH}`, `impact∈{MEDIUM,HIGH}` + +See [scan-modes.md](references/scan-modes.md) for detailed metadata criteria and jq filter commands. + +--- + ## Orchestration Architecture This skill uses **parallel Task subagents** for maximum efficiency: @@ -69,11 +88,11 @@ This skill uses **parallel Task subagents** for maximum efficiency: ┌─────────────────────────────────────────────────────────────────┐ │ MAIN AGENT │ │ 1. Detect languages + check Pro availability │ -│ 2. Select rulesets based on detection (ref: rulesets.md) │ +│ 2. Select scan mode + rulesets (ref: rulesets.md, scan-modes.md)│ │ 3. Present plan + rulesets, get approval [⛔ HARD GATE] │ -│ 4. Spawn parallel scan Tasks (with approved rulesets) │ +│ 4. Spawn parallel scan Tasks (with approved rulesets + mode) │ │ 5. Spawn parallel triage Tasks │ -│ 6. Collect and report results │ +│ 6. Collect and report results (mode-dependent filtering) │ └─────────────────────────────────────────────────────────────────┘ │ Step 4 │ Step 5 ▼ ▼ @@ -96,11 +115,11 @@ This skill uses the **Task system** to enforce workflow compliance. On invocatio ``` TaskCreate: "Detect languages and Pro availability" (Step 1) -TaskCreate: "Select rulesets based on detection" (Step 2) - blockedBy: Step 1 +TaskCreate: "Select scan mode and rulesets" (Step 2) - blockedBy: Step 1 TaskCreate: "Present plan with rulesets, get approval" (Step 3) - blockedBy: Step 2 -TaskCreate: "Execute scans with approved rulesets" (Step 4) - blockedBy: Step 3 +TaskCreate: "Execute scans with approved rulesets and mode" (Step 4) - blockedBy: Step 3 TaskCreate: "Triage findings" (Step 5) - blockedBy: Step 4 -TaskCreate: "Report results" (Step 6) - blockedBy: Step 5 +TaskCreate: "Report results (with mode-dependent filtering)" (Step 6) - blockedBy: Step 5 ``` ### Mandatory Gates @@ -168,9 +187,24 @@ Map findings to categories: | `.tf` | Terraform | | k8s manifests | Kubernetes | -### Step 2: Select Rulesets Based on Detection +### Step 2: Select Scan Mode and Rulesets + +**First, select scan mode** using `AskUserQuestion`: -Using the detected languages and frameworks from Step 1, select rulesets by following the **Ruleset Selection Algorithm** in [rulesets.md]({baseDir}/references/rulesets.md). +``` +header: "Scan Mode" +question: "Which scan mode should be used?" +multiSelect: false +options: + - label: "Run all (Recommended)" + description: "Full coverage — all rulesets, all severity levels, triaged for true/false positives" + - label: "Important only" + description: "Security vulnerabilities only — medium-high confidence and impact, no code quality" +``` + +Record the selected mode. It affects Steps 4 and 6. + +**Then, select rulesets.** Using the detected languages and frameworks from Step 1, select rulesets by following the **Ruleset Selection Algorithm** in [rulesets.md](references/rulesets.md). The algorithm covers: 1. Security baseline (always included) @@ -207,6 +241,7 @@ Present plan to user with **explicit ruleset listing**: **Target:** /path/to/codebase **Output directory:** ./semgrep-results-001/ **Engine:** Semgrep Pro (cross-file analysis) | Semgrep OSS (single-file) +**Scan mode:** Run all | Important only (security vulns, medium-high confidence/impact) ### Detected Languages/Technologies: - Python (1,234 files) - Django framework detected @@ -301,7 +336,11 @@ echo "Output directory: $OUTPUT_DIR" **Spawn N Tasks in a SINGLE message** (one per language category) using `subagent_type: static-analysis:semgrep-scanner`. -Use the scanner task prompt template from [scanner-task-prompt.md]({baseDir}/references/scanner-task-prompt.md). +Use the scanner task prompt template from [scanner-task-prompt.md](references/scanner-task-prompt.md). + +**Mode-dependent scanner flags:** +- **Run all**: No additional flags +- **Important only**: Add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every `semgrep` command (set `[SEVERITY_FLAGS]` in the template) **Example - 3 Language Scan (with approved rulesets):** @@ -323,16 +362,43 @@ Spawn these 3 Tasks in a SINGLE message: After scan Tasks complete, spawn triage Tasks using `subagent_type: static-analysis:semgrep-triager` (triage requires reading code context, not just running commands). -Use the triage task prompt template from [triage-task-prompt.md]({baseDir}/references/triage-task-prompt.md). +Use the triage task prompt template from [triage-task-prompt.md](references/triage-task-prompt.md). ### Step 6: Collect Results (Main Agent) -After all Tasks complete, generate merged SARIF and report: +After all Tasks complete, apply mode-dependent filtering (if applicable), then generate merged SARIF and report. + +**Important-only mode: Post-filter before triage/merge** + +In important-only mode, filter each scan result JSON to remove non-security and low-confidence findings before triage. See [scan-modes.md](references/scan-modes.md) for the complete jq filter. + +```bash +# Apply important-only filter to all scan result JSON files +for f in "$OUTPUT_DIR"/*-*.json; do + [[ "$f" == *-triage.json || "$f" == *-important.json ]] && continue + jq '{ + results: [.results[] | + ((.extra.metadata.category // "security") | ascii_downcase) as $cat | + ((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf | + ((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp | + select( + ($cat == "security") and + ($conf == "MEDIUM" or $conf == "HIGH") and + ($imp == "MEDIUM" or $imp == "HIGH") + ) + ], + errors: .errors, + paths: .paths + }' "$f" > "${f%.json}-important.json" +done +``` + +Then use the `-important.json` files as input for triage instead of the raw scan files. **Generate merged SARIF with only triaged true positives:** ```bash -uv run {baseDir}/scripts/merge_triaged_sarif.py [OUTPUT_DIR] +uv run scripts/merge_triaged_sarif.py [OUTPUT_DIR] ``` This script: diff --git a/plugins/static-analysis/skills/semgrep/references/scan-modes.md b/plugins/static-analysis/skills/semgrep/references/scan-modes.md new file mode 100644 index 0000000..5a2b171 --- /dev/null +++ b/plugins/static-analysis/skills/semgrep/references/scan-modes.md @@ -0,0 +1,108 @@ +# Scan Modes Reference + +## Mode: Run All + +Full scan with all rulesets and severity levels. Current default behavior. No filtering applied — all findings are reported and triaged. + +## Mode: Important Only + +Focused on high-confidence security vulnerabilities. Excludes code quality, best practices, and low-confidence audit findings. + +### Pre-Filter: CLI Severity Flag + +Add these flags to every `semgrep` command: + +```bash +--severity MEDIUM --severity HIGH --severity CRITICAL +``` + +This excludes LOW/INFO severity findings at scan time, reducing output volume before post-filtering. + +### Post-Filter: Metadata Criteria + +After scanning, filter each JSON result file to keep only findings matching ALL of: + +| Metadata Field | Accepted Values | Rationale | +|---|---|---| +| `extra.metadata.category` | `"security"` | Excludes correctness, best-practice, maintainability, performance | +| `extra.metadata.confidence` | `"MEDIUM"`, `"HIGH"` | Excludes low-precision rules (high false positive rate) | +| `extra.metadata.impact` | `"MEDIUM"`, `"HIGH"` | Excludes low-impact informational findings | + +**Third-party rules** (Trail of Bits, 0xdea, Decurity, etc.) may not have `confidence`/`impact`/`category` metadata. Findings **without** these metadata fields are **kept** — we cannot filter what is not annotated, and third-party rules are typically security-focused. + +### Semgrep Metadata Background + +Semgrep security rules have these metadata fields (required for `category: security` in the official registry): + +| Field | Purpose | Values | +|---|---|---| +| `severity` (top-level) | Overall rule severity, derived from likelihood × impact | `LOW`, `MEDIUM`, `HIGH`, `CRITICAL` | +| `category` | Rule category | `security`, `correctness`, `best-practice`, `maintainability`, `performance` | +| `confidence` | True positive rate of the rule (precision) | `LOW`, `MEDIUM`, `HIGH` | +| `impact` | Potential damage if vulnerability is exploited | `LOW`, `MEDIUM`, `HIGH` | +| `likelihood` | How likely the vulnerability is exploitable | `LOW`, `MEDIUM`, `HIGH` | +| `subcategory` | Finding type | `vuln`, `audit`, `secure default` | + +Key relationship: `severity = f(likelihood, impact)` while `confidence` is independent (describes rule quality, not vulnerability severity). + +### Post-Filter jq Command + +Apply to each JSON result file after scanning: + +```bash +# Filter a single result file +jq '{ + results: [.results[] | + ((.extra.metadata.category // "security") | ascii_downcase) as $cat | + ((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf | + ((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp | + select( + ($cat == "security") and + ($conf == "MEDIUM" or $conf == "HIGH") and + ($imp == "MEDIUM" or $imp == "HIGH") + ) + ], + errors: .errors, + paths: .paths +}' "$f" > "${f%.json}-important.json" +``` + +Default values (`// "security"`, `// "HIGH"`) handle third-party rules without metadata — they pass all filters by default. + +### Filter All Result Files in a Directory + +```bash +# Apply important-only filter to all scan result JSON files +for f in "$OUTPUT_DIR"/*-*.json; do + [[ "$f" == *-triage.json || "$f" == *-important.json ]] && continue + jq '{ + results: [.results[] | + ((.extra.metadata.category // "security") | ascii_downcase) as $cat | + ((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf | + ((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp | + select( + ($cat == "security") and + ($conf == "MEDIUM" or $conf == "HIGH") and + ($imp == "MEDIUM" or $imp == "HIGH") + ) + ], + errors: .errors, + paths: .paths + }' "$f" > "${f%.json}-important.json" + BEFORE=$(jq '.results | length' "$f") + AFTER=$(jq '.results | length' "${f%.json}-important.json") + echo "$f: $BEFORE → $AFTER findings (filtered $(( BEFORE - AFTER )))" +done +``` + +### Scanner Task Modifications + +In important-only mode, add `[SEVERITY_FLAGS]` to the scanner template: + +```bash +semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/[lang]-[ruleset].sarif [TARGET] & +``` + +Where `[SEVERITY_FLAGS]` is: +- **Run all**: *(empty)* +- **Important only**: `--severity MEDIUM --severity HIGH --severity CRITICAL` diff --git a/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md b/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md index c8029c9..ddcaaa7 100644 --- a/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md +++ b/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md @@ -12,6 +12,8 @@ Run Semgrep scans for [LANGUAGE] files and save results to [OUTPUT_DIR]. ## Pro Engine Status: [PRO_AVAILABLE: true/false] +## Scan Mode: [SCAN_MODE: run-all/important-only] + ## APPROVED RULESETS (from user-confirmed plan) [LIST EXACT RULESETS USER APPROVED - DO NOT SUBSTITUTE] @@ -26,7 +28,7 @@ Example: ### Generate commands for EACH approved ruleset: ```bash -semgrep [--pro if available] --metrics=off --config [RULESET] --json -o [OUTPUT_DIR]/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/[lang]-[ruleset].sarif [TARGET] & +semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/[lang]-[ruleset].sarif [TARGET] & ``` Wait for all to complete: @@ -38,6 +40,8 @@ wait - Use ONLY the rulesets listed above - do not add or remove any - Always use --metrics=off (prevents sending telemetry to Semgrep servers) - Use --pro when Pro is available (enables cross-file taint tracking) +- If scan mode is **important-only**, add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every command +- If scan mode is **run-all**, do NOT add severity flags - Run all rulesets in parallel with & and wait - For GitHub URLs, clone the repo first if not cached locally @@ -57,6 +61,7 @@ Report: | `[LANGUAGE]` | Specific language | Python, TypeScript, Go | | `[OUTPUT_DIR]` | Results directory with run number | semgrep-results-001 | | `[PRO_AVAILABLE]` | Whether Pro engine is available | true, false | +| `[SEVERITY_FLAGS]` | Severity pre-filter flags | *(empty)* for run-all, `--severity MEDIUM --severity HIGH --severity CRITICAL` for important-only | | `[RULESET]` | Semgrep ruleset identifier | p/python, https://github.com/... | | `[TARGET]` | Directory to scan | . (current dir) | @@ -70,6 +75,8 @@ Run Semgrep scans for Python files and save results to semgrep-results-001. ## Pro Engine Status: true +## Scan Mode: run-all + ## APPROVED RULESETS (from user-confirmed plan) - p/python - p/django From 5e709026372111303436b1ac7e20850bf4eb3e5d Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Mon, 23 Feb 2026 11:38:14 +0100 Subject: [PATCH 2/8] make semgrep/codeql better --- .../static-analysis/agents/semgrep-scanner.md | 25 +- .../static-analysis/agents/semgrep-triager.md | 107 ------- .../static-analysis/skills/codeql/SKILL.md | 12 +- .../codeql/references/important-only-suite.md | 51 +++- .../skills/codeql/references/run-all-suite.md | 87 ++++++ .../skills/codeql/references/threat-models.md | 27 +- .../skills/codeql/workflows/build-database.md | 281 +++++++++++++++++- .../skills/codeql/workflows/run-analysis.md | 128 ++++++-- .../static-analysis/skills/semgrep/SKILL.md | 93 +++--- .../semgrep/references/scanner-task-prompt.md | 51 +++- .../semgrep/references/triage-task-prompt.md | 122 -------- .../semgrep/scripts/merge_triaged_sarif.py | 137 ++------- 12 files changed, 646 insertions(+), 475 deletions(-) delete mode 100644 plugins/static-analysis/agents/semgrep-triager.md create mode 100644 plugins/static-analysis/skills/codeql/references/run-all-suite.md delete mode 100644 plugins/static-analysis/skills/semgrep/references/triage-task-prompt.md diff --git a/plugins/static-analysis/agents/semgrep-scanner.md b/plugins/static-analysis/agents/semgrep-scanner.md index d01bd7e..1b91bd4 100644 --- a/plugins/static-analysis/agents/semgrep-scanner.md +++ b/plugins/static-analysis/agents/semgrep-scanner.md @@ -40,13 +40,32 @@ After launching all rulesets: wait ``` +## Language Scoping + +For language-specific rulesets (e.g., `p/python`, `p/java`), +add `--include` to restrict parsing to relevant files: + +```bash +--include="*.java" --include="*.jsp" # for Java +--include="*.py" # for Python +--include="*.js" --include="*.jsx" # for JavaScript +``` + +Do NOT add `--include` to cross-language rulesets like +`p/security-audit`, `p/secrets`, or third-party repos that +contain rules for multiple languages. + ## GitHub URL Rulesets For rulesets specified as GitHub URLs (e.g., `https://github.com/trailofbits/semgrep-rules`): -- Clone the repository first if not already cached locally -- Use the local path as the `--config` value, or pass the - URL directly to semgrep (it handles GitHub URLs natively) +- Clone into `[OUTPUT_DIR]/repos/[repo-name]` so cloned + repos stay inside the results directory +- Use the local path as the `--config` value (do NOT pass + the URL directly — semgrep's URL handling is unreliable + for repos with non-standard YAML) +- After all scans complete, delete the cloned repos: + `rm -rf [OUTPUT_DIR]/repos` ## Output Requirements diff --git a/plugins/static-analysis/agents/semgrep-triager.md b/plugins/static-analysis/agents/semgrep-triager.md deleted file mode 100644 index c6798cd..0000000 --- a/plugins/static-analysis/agents/semgrep-triager.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -name: semgrep-triager -description: "Classifies semgrep scan findings as true or false positives by reading source context. Use when triaging static analysis results to separate real vulnerabilities from noise." -tools: Read, Grep, Glob, Write ---- - -# Semgrep Triage Agent - -You are a security finding triager responsible for classifying -Semgrep scan results as true or false positives by reading -source code context. - -## Task - -For each finding in the provided JSON result files: - -1. Read the JSON finding (rule ID, file, line number) -2. Read source code context (at least 5 lines before/after) -3. Classify as `TRUE_POSITIVE` or `FALSE_POSITIVE` -4. Write a brief reason for the classification - -## Decision Tree - -Apply these checks in order. The first match determines -the classification: - -``` -Finding - |-- In a test file? - | -> FALSE_POSITIVE (note: add to .semgrepignore) - |-- In example/documentation code? - | -> FALSE_POSITIVE - |-- Has nosemgrep comment? - | -> FALSE_POSITIVE (already acknowledged) - |-- Input sanitized/validated upstream? - | Check 10-20 lines before for validation - | -> FALSE_POSITIVE if validated - |-- Code path reachable? - | Check if function is called/exported - | -> FALSE_POSITIVE if dead code - |-- None of the above - -> TRUE_POSITIVE -``` - -## Classification Guidelines - -**TRUE_POSITIVE indicators:** -- User input flows to sensitive sink without sanitization -- Hardcoded credentials or API keys in source (not test) code -- Known-vulnerable function usage in production paths -- Missing security controls (no CSRF, no auth check) - -**FALSE_POSITIVE indicators:** -- Test files with mock/fixture data -- Input is validated before reaching the flagged line -- Code is behind a feature flag or compile-time guard -- Dead code (unreachable function, commented-out caller) -- Documentation or example snippets - -## Output Format - -Write a triage file to `[OUTPUT_DIR]/[lang]-triage.json`: - -```json -{ - "file": "[lang]-[ruleset].json", - "total": 45, - "true_positives": [ - { - "rule": "rule.id.here", - "file": "path/to/file.py", - "line": 42, - "reason": "User input in raw SQL without parameterization" - } - ], - "false_positives": [ - { - "rule": "rule.id.here", - "file": "tests/test_file.py", - "line": 15, - "reason": "Test file with mock data" - } - ] -} -``` - -## Report - -After triage, provide a summary: -- Total findings examined -- True positives count -- False positives count with breakdown by reason category - (test files, sanitized inputs, dead code, etc.) - -## Important - -- Read actual source code for every finding. Never classify - based solely on the rule name or file path. -- When uncertain, classify as TRUE_POSITIVE. False negatives - are worse than false positives in security triage. -- Process all input JSON files for the language category. - -## Full Reference - -For the complete triage task prompt template with variable -substitutions and examples, see: -`skills/semgrep/references/triage-task-prompt.md` diff --git a/plugins/static-analysis/skills/codeql/SKILL.md b/plugins/static-analysis/skills/codeql/SKILL.md index 8d88699..612ec0b 100644 --- a/plugins/static-analysis/skills/codeql/SKILL.md +++ b/plugins/static-analysis/skills/codeql/SKILL.md @@ -3,12 +3,12 @@ name: codeql description: >- Runs CodeQL static analysis for security vulnerability detection using interprocedural data flow and taint tracking. Supports two - scan modes - "run all" (all queries from all packs, unfiltered) and - "important only" (security vulnerabilities filtered by precision and severity). Applicable + scan modes - "run all" (all queries from all packs via security-and-quality suite) and + "important only" (security vulnerabilities filtered by precision and security-severity score). Applicable when finding vulnerabilities, running a security scan, performing a security audit, running CodeQL, building a CodeQL database, selecting query rulesets, creating data extension models, or processing CodeQL - SARIF output. NOT for writing custom QL queries or CI/CD pipeline setup. + SARIF output. allowed-tools: - Bash - Read @@ -64,9 +64,11 @@ These shortcuts lead to missed findings. Do not accept them: - **"security-extended is enough"** - It is the baseline. Always check if Trail of Bits packs and Community Packs are available for the language. They catch categories `security-extended` misses entirely. - **"The database built, so it's good"** - A database that builds does not mean it extracted well. Always run Step 4 (quality assessment) and check file counts against expected source files. A cached build produces zero useful extraction. - **"Data extensions aren't needed for standard frameworks"** - Even Django/Spring apps have custom wrappers around ORM calls, request parsing, or shell execution that CodeQL does not model. Skipping the extensions workflow means missing vulnerabilities in project-specific code. -- **"build-mode=none is fine for compiled languages"** - It produces severely incomplete analysis. No interprocedural data flow through compiled code is traced. Only use as an absolute last resort and clearly flag the limitation. +- **"build-mode=none is fine for compiled languages"** - It produces severely incomplete analysis. No interprocedural data flow through compiled code is traced. Only use as an absolute last resort and clearly flag the limitation. On macOS Apple Silicon, try the arm64 toolchain workaround (Method 2m) or Rosetta before falling back to `build-mode=none`. +- **"The build fails on macOS, just use build-mode=none"** - On Apple Silicon Macs, exit code 137 during tracing is caused by `arm64e`/`arm64` architecture mismatch in `libtrace.dylib`, not a fundamental build failure. Try Homebrew arm64 tools (Method 2m-a), then Rosetta (Method 2m-b) before accepting `build-mode=none`. - **"No findings means the code is secure"** - Zero findings can indicate poor database quality, missing models, or wrong query packs. Investigate before reporting clean results. -- **"I'll just run the default suite"** - The default suite varies by how CodeQL is invoked. Always explicitly specify the suite (e.g., `security-extended`) so results are reproducible. +- **"I'll just run the default suite"** - The default suite varies by how CodeQL is invoked. Passing a pack name directly (e.g., `-- codeql/cpp-queries`) uses the pack's `defaultSuiteFile` from `qlpack.yml` (typically `code-scanning.qls`), which silently applies strict filters and can produce zero results. Always use an explicit suite reference or generate a custom `.qls` file. +- **"I'll just pass the pack names directly"** - Same issue as above. Each pack's `defaultSuiteFile` applies hidden filters. Always generate a custom suite that explicitly references the desired built-in suite (e.g., `security-and-quality`) or loads queries with known filtering. --- diff --git a/plugins/static-analysis/skills/codeql/references/important-only-suite.md b/plugins/static-analysis/skills/codeql/references/important-only-suite.md index cd6af79..35fca3f 100644 --- a/plugins/static-analysis/skills/codeql/references/important-only-suite.md +++ b/plugins/static-analysis/skills/codeql/references/important-only-suite.md @@ -8,16 +8,32 @@ The built-in `security-extended` suite only applies to the official `codeql/= 6.0 | **Keep** | +| medium | < 6.0 or missing | **Drop** | + +This ensures medium-precision queries with meaningful security impact (e.g., `cpp/path-injection` at 7.5, `cpp/world-writable-file-creation` at 7.8) are included, while noisy low-severity medium-precision findings are filtered out. Excluded: deprecated queries, model editor/generator queries. Experimental queries are **included**. -**Key difference from `security-extended`:** Medium-precision queries require `error` severity (not `warning`). This tightens the filter to only include medium-precision findings that indicate likely incorrect behavior. +**Key difference from `security-extended`:** The `security-extended` suite includes medium-precision queries at any severity. Important-only mode adds a security-severity threshold to reduce noise from medium-precision queries that flag low-impact issues. ## Suite Template @@ -34,7 +50,8 @@ Generate this file as `important-only.qls` in the results directory before runni # - queries: . # from: GitHubSecurityLab/CodeQL-Community-Packs- # Filtering: security only, high/very-high precision (any severity), -# medium precision (error only). Experimental queries included. +# medium precision (any severity — low-severity filtered post-analysis by security-severity score). +# Experimental queries included. - include: kind: - problem @@ -50,8 +67,6 @@ Generate this file as `important-only.qls` in the results directory before runni - path-problem precision: - medium - problem.severity: - - error tags contain: - security - exclude: @@ -62,6 +77,8 @@ Generate this file as `important-only.qls` in the results directory before runni - modelgenerator ``` +> **Post-analysis step required:** After running the analysis, apply the jq filter from [run-analysis.md](../workflows/run-analysis.md) Step 5 to remove medium-precision results with `security-severity` < 6.0. + ## Generation Script The agent should generate the suite file dynamically based on installed packs: @@ -70,21 +87,25 @@ The agent should generate the suite file dynamically based on installed packs: RESULTS_DIR="${DB_NAME%.db}-results" SUITE_FILE="$RESULTS_DIR/important-only.qls" -cat > "$SUITE_FILE" << 'HEADER' +# NOTE: LANG must be set before running this script (e.g., LANG=cpp) +# NOTE: INSTALLED_THIRD_PARTY_PACKS must be a space-separated list of pack names + +# Use a heredoc WITHOUT quotes so ${LANG} expands +cat > "$SUITE_FILE" << HEADER - description: Important-only — security vulnerabilities, medium-high confidence +- queries: . + from: codeql/${LANG}-queries HEADER -# Always include official pack -echo "- queries: . - from: codeql/${LANG}-queries" >> "$SUITE_FILE" - # Add each installed third-party pack for PACK in $INSTALLED_THIRD_PARTY_PACKS; do - echo "- queries: . - from: ${PACK}" >> "$SUITE_FILE" + cat >> "$SUITE_FILE" << PACK_ENTRY +- queries: . + from: ${PACK} +PACK_ENTRY done -# Append the filtering rules +# Append the filtering rules (quoted heredoc — no variable expansion needed) cat >> "$SUITE_FILE" << 'FILTERS' - include: kind: @@ -101,8 +122,6 @@ cat >> "$SUITE_FILE" << 'FILTERS' - path-problem precision: - medium - problem.severity: - - error tags contain: - security - exclude: @@ -126,4 +145,4 @@ CodeQL query suite filters match on query metadata (`@precision`, `@problem.seve - **Lack `@precision`**: Excluded by `include` blocks (they require precision to match). This is correct — if a query doesn't declare its precision, we cannot assess its confidence. - **Lack `@tags security`**: Excluded. Non-security queries are not relevant to important-only mode. -This is a stricter-than-necessary filter for third-party packs, but it ensures only well-annotated, high-confidence security queries run in important-only mode. +This is a stricter-than-necessary filter for third-party packs, but it ensures only well-annotated security queries run in important-only mode. The post-analysis jq filter then further narrows medium-precision results to those with `security-severity` >= 6.0. diff --git a/plugins/static-analysis/skills/codeql/references/run-all-suite.md b/plugins/static-analysis/skills/codeql/references/run-all-suite.md new file mode 100644 index 0000000..feab430 --- /dev/null +++ b/plugins/static-analysis/skills/codeql/references/run-all-suite.md @@ -0,0 +1,87 @@ +# Run-All Query Suite + +In run-all mode, generate a custom `.qls` query suite file at runtime. This ensures all queries from all installed packs actually execute, avoiding the silent filtering caused by each pack's `defaultSuiteFile`. + +## Why a Custom Suite + +When you pass a pack name directly to `codeql database analyze` (e.g., `-- codeql/cpp-queries`), CodeQL uses the pack's `defaultSuiteFile` field from `qlpack.yml`. For official packs, this is typically `codeql-suites/-code-scanning.qls`, which applies strict precision and severity filters. This silently drops many queries and can produce zero results for small codebases. + +The run-all suite explicitly references the broadest built-in suite (`security-and-quality`) for official packs and loads third-party packs with minimal filtering. + +## Suite Template + +Generate this file as `run-all.qls` in the results directory before running analysis: + +```yaml +- description: Run-all — all security and quality queries from all installed packs +# Official queries: use security-and-quality suite (broadest built-in suite) +- import: codeql-suites/-security-and-quality.qls + from: codeql/-queries +# Third-party packs (include only if installed, one entry per pack) +# - queries: . +# from: trailofbits/-queries +# - queries: . +# from: GitHubSecurityLab/CodeQL-Community-Packs- +# Minimal filtering — only select alert-type queries +- include: + kind: + - problem + - path-problem +- exclude: + deprecated: // +- exclude: + tags contain: + - modeleditor + - modelgenerator +``` + +## Generation Script + +```bash +RESULTS_DIR="${DB_NAME%.db}-results" +SUITE_FILE="$RESULTS_DIR/run-all.qls" + +# NOTE: LANG must be set before running this script (e.g., LANG=cpp) +# NOTE: INSTALLED_THIRD_PARTY_PACKS must be a space-separated list of pack names + +cat > "$SUITE_FILE" << HEADER +- description: Run-all — all security and quality queries from all installed packs +- import: codeql-suites/${LANG}-security-and-quality.qls + from: codeql/${LANG}-queries +HEADER + +# Add each installed third-party pack +for PACK in $INSTALLED_THIRD_PARTY_PACKS; do + cat >> "$SUITE_FILE" << PACK_ENTRY +- queries: . + from: ${PACK} +PACK_ENTRY +done + +# Append minimal filtering rules (quoted heredoc — no expansion needed) +cat >> "$SUITE_FILE" << 'FILTERS' +- include: + kind: + - problem + - path-problem +- exclude: + deprecated: // +- exclude: + tags contain: + - modeleditor + - modelgenerator +FILTERS + +# Verify the suite resolves correctly +codeql resolve queries "$SUITE_FILE" | wc -l +echo "Suite generated: $SUITE_FILE" +``` + +## How This Differs From Important-Only + +| Aspect | Run all | Important only | +|--------|---------|----------------| +| Official pack suite | `security-and-quality` (all security + code quality) | All queries loaded, filtered by precision | +| Third-party packs | All `problem`/`path-problem` queries | Only `security`-tagged queries with precision metadata | +| Precision filter | None | high/very-high always; medium only if security-severity >= 6.0 | +| Post-analysis filter | None | Drops medium-precision results with security-severity < 6.0 | diff --git a/plugins/static-analysis/skills/codeql/references/threat-models.md b/plugins/static-analysis/skills/codeql/references/threat-models.md index 9795e81..88e3594 100644 --- a/plugins/static-analysis/skills/codeql/references/threat-models.md +++ b/plugins/static-analysis/skills/codeql/references/threat-models.md @@ -14,31 +14,38 @@ Control which source categories are active during CodeQL analysis. By default, o ## Default Behavior -With no `--threat-models` flag, CodeQL uses `remote` only. This is correct for most web applications and APIs. Expanding beyond `remote` is useful when the application's trust boundary extends to local inputs. +With no `--threat-model` flag, CodeQL uses `remote` only (the `default` group). This is correct for most web applications and APIs. Expanding beyond `remote` is useful when the application's trust boundary extends to local inputs. ## Usage -Enable additional threat models with the `--threat-models` flag: +Enable additional threat models with the `--threat-model` flag (singular, NOT `--threat-models`): ```bash -# Web service (default — remote only) +# Web service (default — remote only, no flag needed) codeql database analyze codeql.db \ - -- codeql/python-queries + -- results/suite.qls # CLI tool — local users can provide malicious input codeql database analyze codeql.db \ - --threat-models=remote,local \ - -- codeql/python-queries + --threat-model local \ + -- results/suite.qls # Container app reading env vars from untrusted orchestrator codeql database analyze codeql.db \ - --threat-models=remote,environment \ - -- codeql/python-queries + --threat-model local --threat-model environment \ + -- results/suite.qls # Full coverage — audit mode for all input vectors codeql database analyze codeql.db \ - --threat-models=remote,local,environment,database,file \ - -- codeql/python-queries + --threat-model all \ + -- results/suite.qls + +# Enable all except database (to reduce noise) +codeql database analyze codeql.db \ + --threat-model all --threat-model '!database' \ + -- results/suite.qls ``` +The `--threat-model` flag can be repeated. Each invocation adds (or removes with `!` prefix) a threat model group. The `remote` group is always enabled by default — use `--threat-model '!default'` to disable it (rare). The `all` group enables everything, and `!` disables a specific model. + Multiple models can be combined. Each additional model expands the set of sources CodeQL considers tainted, increasing coverage but potentially increasing false positives. Start with the narrowest set that matches the application's actual threat model, then expand if needed. diff --git a/plugins/static-analysis/skills/codeql/workflows/build-database.md b/plugins/static-analysis/skills/codeql/workflows/build-database.md index f02d5fa..d0133c8 100644 --- a/plugins/static-analysis/skills/codeql/workflows/build-database.md +++ b/plugins/static-analysis/skills/codeql/workflows/build-database.md @@ -31,9 +31,12 @@ Database creation differs by language type: - Try build methods in order until one succeeds: 1. **Autobuild** - CodeQL auto-detects and runs the build 2. **Custom Command** - Explicit build command for the detected build system + 2m. **macOS arm64 Toolchain** - Homebrew compiler + multi-step tracing (Apple Silicon workaround, see Step 2a) 3. **Multi-step** - Fine-grained control with init → trace-command → finalize 4. **No-build fallback** - `--build-mode=none` (partial analysis, last resort) +> **macOS Apple Silicon:** On arm64 Macs, system tools (`/usr/bin/make`, `/usr/bin/clang`, `/usr/bin/ar`) are built for `arm64e` (pointer-authenticated ABI), but CodeQL's `libtrace.dylib` only has `arm64`. macOS kills any `arm64e` process with a non-`arm64e` injected dylib (SIGKILL, exit 137). Step 2a detects this and routes to Method 2m which uses Homebrew tools (plain `arm64`) or Rosetta (`x86_64`). + --- ## Database Naming @@ -220,10 +223,46 @@ fi ### For Compiled Languages (Java, C/C++, C#, Rust, Swift) +#### Step 2a: macOS arm64e Detection (C/C++ only) + +On macOS with Apple Silicon, CodeQL's build tracer (`preload_tracer`) injects `libtrace.dylib` into every spawned process via `DYLD_INSERT_LIBRARIES`. This dylib ships with `x86_64` + `arm64` slices, but Apple's system binaries (`/usr/bin/make`, `/usr/bin/clang`, `/usr/bin/ar`, `/bin/mkdir`, etc.) are built for `arm64e` (pointer-authenticated ABI). macOS kills any `arm64e` process that tries to load a non-`arm64e` injected dylib with **SIGKILL (signal 9, exit code 137)**. + +**This affects C/C++ builds on macOS Apple Silicon when the build invokes any `arm64e` system tool under tracing.** Java, Swift, and other languages may also be affected if their build tools are `arm64e`. + +**Detection:** + +```bash +IS_MACOS_ARM64E=false +if [[ "$(uname -s)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then + # Check if libtrace.dylib lacks arm64e + LIBTRACE=$(find "$(dirname "$(command -v codeql)")" -name libtrace.dylib 2>/dev/null | head -1) + if [ -n "$LIBTRACE" ]; then + LIBTRACE_ARCHS=$(lipo -archs "$LIBTRACE" 2>/dev/null) + if [[ "$LIBTRACE_ARCHS" != *"arm64e"* ]]; then + # Check if system tools are arm64e + MAKE_ARCHS=$(lipo -archs /usr/bin/make 2>/dev/null) + if [[ "$MAKE_ARCHS" == *"arm64e"* ]]; then + IS_MACOS_ARM64E=true + log_step "DETECTED: macOS arm64e tracer incompatibility" + log_result "libtrace.dylib archs: $LIBTRACE_ARCHS | /usr/bin/make archs: $MAKE_ARCHS" + fi + fi + fi +fi +``` + +**If `IS_MACOS_ARM64E=true`:** Skip Method 1 (autobuild) and Method 2 (custom command) — they will fail with exit code 137. Go directly to **Method 2m (macOS arm64 toolchain)**. + +**If `IS_MACOS_ARM64E=false`:** Proceed with Method 1, 2, 3 in normal order. + +--- + Try build methods in sequence until one succeeds: #### Method 1: Autobuild +> **Skip if `IS_MACOS_ARM64E=true`** — autobuild spawns system tools that will be killed. + ```bash log_step "METHOD 1: Autobuild" CMD="codeql database create $DB_NAME --language= --source-root=. --overwrite" @@ -240,6 +279,8 @@ fi #### Method 2: Custom Command +> **Skip if `IS_MACOS_ARM64E=true`** — custom command wraps the entire build in the tracer, which will inject `libtrace.dylib` into `arm64e` system tools called by make/cmake/etc. + Detect build system and use explicit command: | Build System | Detection | Command | @@ -281,10 +322,192 @@ else fi ``` +#### Method 2m: macOS arm64 Toolchain (Apple Silicon workaround) + +> **Use this method when `IS_MACOS_ARM64E=true`.** It replaces Methods 1 and 2 on affected systems. + +The strategy is to use Homebrew-installed tools (which are plain `arm64`, not `arm64e`) so `libtrace.dylib` can be injected successfully. Try these sub-methods in order: + +##### Sub-method 2m-a: Homebrew clang/gcc with multi-step tracing + +Trace only the compiler invocations individually, avoiding system tools (`/usr/bin/ar`, `/bin/mkdir`) that would be killed. This requires a multi-step build: init → trace each compiler call → finalize. + +```bash +log_step "METHOD 2m-a: macOS arm64 — Homebrew compiler with multi-step tracing" + +# 1. Find Homebrew C/C++ compiler (arm64, not arm64e) +BREW_CC="" +# Prefer Homebrew clang +if [ -x "/opt/homebrew/opt/llvm/bin/clang" ]; then + BREW_CC="/opt/homebrew/opt/llvm/bin/clang" +# Try Homebrew GCC (e.g. gcc-14, gcc-13) +elif command -v gcc-14 >/dev/null 2>&1; then + BREW_CC="$(command -v gcc-14)" +elif command -v gcc-13 >/dev/null 2>&1; then + BREW_CC="$(command -v gcc-13)" +fi + +if [ -z "$BREW_CC" ]; then + log_result "No Homebrew C/C++ compiler found — skipping 2m-a" + # Fall through to 2m-b +else + # Verify it's arm64 (not arm64e) + BREW_CC_ARCH=$(lipo -archs "$BREW_CC" 2>/dev/null) + if [[ "$BREW_CC_ARCH" == *"arm64e"* ]]; then + log_result "Homebrew compiler is arm64e — skipping 2m-a" + else + log_step "Using Homebrew compiler: $BREW_CC (arch: $BREW_CC_ARCH)" + + # 2. Run the build normally (without tracing) to create build dirs and artifacts + # Use Homebrew make (gmake) if available, otherwise system make outside tracer + if command -v gmake >/dev/null 2>&1; then + MAKE_CMD="gmake" + else + MAKE_CMD="make" + fi + $MAKE_CMD clean 2>/dev/null || true + $MAKE_CMD CC="$BREW_CC" 2>&1 | tee -a "$LOG_FILE" + + # 3. Extract compiler commands from the Makefile / build system + # Use make's dry-run mode to get the exact compiler invocations + $MAKE_CMD clean 2>/dev/null || true + COMPILE_CMDS=$($MAKE_CMD CC="$BREW_CC" --dry-run 2>/dev/null \ + | grep -E "^\s*$BREW_CC\b.*\s-c\s" \ + | sed 's/^[[:space:]]*//') + + if [ -z "$COMPILE_CMDS" ]; then + log_result "Could not extract compile commands from dry-run — skipping 2m-a" + else + # 4. Init database + codeql database init $DB_NAME --language=cpp --source-root=. --overwrite 2>&1 \ + | tee -a "$LOG_FILE" + + # 5. Ensure build directories exist (outside tracer — avoids arm64e mkdir) + $MAKE_CMD clean 2>/dev/null || true + # Parse -o flags to find output dirs, or just create common dirs + echo "$COMPILE_CMDS" | grep -oP '(?<=-o\s)\S+' | xargs -I{} dirname {} \ + | sort -u | xargs mkdir -p 2>/dev/null || true + + # 6. Trace each compiler invocation individually + TRACE_OK=true + while IFS= read -r cmd; do + [ -z "$cmd" ] && continue + log_cmd "codeql database trace-command $DB_NAME -- $cmd" + if ! codeql database trace-command $DB_NAME -- $cmd 2>&1 | tee -a "$LOG_FILE"; then + log_result "FAILED on: $cmd" + TRACE_OK=false + break + fi + done <<< "$COMPILE_CMDS" + + if $TRACE_OK; then + # 7. Finalize + codeql database finalize $DB_NAME 2>&1 | tee -a "$LOG_FILE" + if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then + log_result "SUCCESS (macOS arm64 multi-step)" + # Done — skip to Step 4 + else + log_result "FAILED (finalize failed)" + fi + fi + fi + fi +fi +``` + +##### Sub-method 2m-b: Rosetta x86_64 emulation + +Force the entire CodeQL pipeline to run under Rosetta, which uses the `x86_64` slice of both `libtrace.dylib` and system tools — no `arm64e` mismatch. + +```bash +log_step "METHOD 2m-b: macOS arm64 — Rosetta x86_64 emulation" + +# Check if Rosetta is available +if ! arch -x86_64 /usr/bin/true 2>/dev/null; then + log_result "Rosetta not available — skipping 2m-b" +else + BUILD_CMD="" # e.g. "make clean && make -j4" + CMD="arch -x86_64 codeql database create $DB_NAME --language= --source-root=. --command='$BUILD_CMD' --overwrite" + log_cmd "$CMD" + + arch -x86_64 codeql database create $DB_NAME --language= --source-root=. \ + --command="$BUILD_CMD" --overwrite 2>&1 | tee -a "$LOG_FILE" + + if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then + log_result "SUCCESS (Rosetta x86_64)" + else + log_result "FAILED (Rosetta)" + fi +fi +``` + +##### Sub-method 2m-c: System compiler (direct attempt) + +As a verification step, try the standard autobuild with the system compiler. This will likely fail with exit code 137 on affected systems, but confirms the arm64e issue is the cause. + +> **This sub-method is optional.** Skip it if arm64e incompatibility was already confirmed in Step 2a. + +```bash +log_step "METHOD 2m-c: System compiler (expected to fail on arm64e)" +CMD="codeql database create $DB_NAME --language= --source-root=. --overwrite" +log_cmd "$CMD" + +$CMD 2>&1 | tee -a "$LOG_FILE" + +EXIT_CODE=$? +if [ $EXIT_CODE -eq 137 ] || [ $EXIT_CODE -eq 134 ]; then + log_result "FAILED: exit code $EXIT_CODE confirms arm64e/libtrace incompatibility" +elif codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then + log_result "SUCCESS (unexpected — system compiler worked)" +else + log_result "FAILED (exit code: $EXIT_CODE)" +fi +``` + +##### Sub-method 2m-d: Ask user + +If all macOS workarounds fail, present options: + +``` +AskUserQuestion: + header: "macOS Build" + question: "Build tracing failed due to macOS arm64e incompatibility. How to proceed?" + multiSelect: false + options: + - label: "Use build-mode=none (Recommended)" + description: "Source-level analysis only. Misses some interprocedural data flow but catches most C/C++ vulnerabilities (format strings, buffer overflows, unsafe functions)." + - label: "Install arm64 tools and retry" + description: "Run: brew install llvm make — then retry with Homebrew toolchain" + - label: "Install Rosetta and retry" + description: "Run: softwareupdate --install-rosetta — then retry under x86_64 emulation" + - label: "Abort" + description: "Stop database creation" +``` + +**If "Use build-mode=none":** Proceed to Method 4. + +**If "Install arm64 tools and retry":** +```bash +log_step "Installing Homebrew arm64 toolchain" +brew install llvm make 2>&1 | tee -a "$LOG_FILE" +# Retry Method 2m-a +``` + +**If "Install Rosetta and retry":** +```bash +log_step "Installing Rosetta" +softwareupdate --install-rosetta --agree-to-license 2>&1 | tee -a "$LOG_FILE" +# Retry Method 2m-b +``` + +--- + #### Method 3: Multi-step Build For complex builds needing fine-grained control: +> **On macOS with `IS_MACOS_ARM64E=true`:** Only trace compiler commands (arm64 Homebrew binaries). Do NOT trace system tools like `make`, `ar`, `mkdir` — they are arm64e and will be killed. Run non-compiler build steps outside the tracer. + ```bash log_step "METHOD 3: Multi-step build" @@ -469,27 +692,40 @@ echo "Finalized: $FINALIZED" ### 4b. Compare Against Expected Source -Estimate the expected source file count from the working directory and compare: +Estimate the expected source file count from the working directory and compare. + +> **Compiled languages (C/C++, Java, C#):** The source archive (`src.zip`) includes system headers and SDK files alongside project source files. For C/C++, this can inflate the archive count 10-20x (e.g., 111 archive files for 5 project source files). Compare against **project-relative files only** by filtering the archive listing. ```bash # Count source files in the project (adjust extensions per language) -EXPECTED=$(fd -t f -e java -e kt --exclude 'codeql_*.db' \ - --exclude node_modules --exclude vendor --exclude .git . | wc -l) +# C/C++: -e c -e cpp -e h -e hpp +# Java: -e java -e kt +# Python: -e py +# JS/TS: -e js -e ts -e jsx -e tsx +EXPECTED=$(fd -t f -e c -e cpp -e h -e hpp -e java -e kt -e py -e js -e ts \ + --exclude 'codeql_*.db' --exclude node_modules --exclude vendor --exclude .git . \ + 2>/dev/null | wc -l) echo "Expected source files: $EXPECTED" -echo "Extracted source files: $SRC_FILE_COUNT" -# Baseline LOC from database metadata +# Count PROJECT files in source archive (exclude system/SDK paths) +# For compiled languages, src.zip contains system headers under SDK paths +PROJECT_SRC_COUNT=$(unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \ + | grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' | wc -l) +echo "Project files in source archive: $PROJECT_SRC_COUNT" +echo "Total files in source archive: $SRC_FILE_COUNT (includes system headers for compiled langs)" + +# Baseline LOC from database metadata (most reliable single metric) DB_LOC=$(grep '^baselineLinesOfCode:' "$DB_NAME/codeql-database.yml" \ | awk '{print $2}') echo "Baseline LoC: $DB_LOC" -# Error ratio -if [ "$SRC_FILE_COUNT" -gt 0 ]; then - ERROR_RATIO=$(python3 -c "print(f'{$EXTRACTOR_ERRORS/$SRC_FILE_COUNT*100:.1f}%')") +# Error ratio — use project file count for compiled langs, total for interpreted +if [ "$PROJECT_SRC_COUNT" -gt 0 ]; then + ERROR_RATIO=$(python3 -c "print(f'{$EXTRACTOR_ERRORS/$PROJECT_SRC_COUNT*100:.1f}%')") else ERROR_RATIO="N/A (no files)" fi -echo "Error ratio: $ERROR_RATIO ($EXTRACTOR_ERRORS errors / $SRC_FILE_COUNT files)" +echo "Error ratio: $ERROR_RATIO ($EXTRACTOR_ERRORS errors / $PROJECT_SRC_COUNT project files)" ``` ### 4c. Log Assessment @@ -497,12 +733,15 @@ echo "Error ratio: $ERROR_RATIO ($EXTRACTOR_ERRORS errors / $SRC_FILE_COUNT file ```bash log_step "Quality assessment results" log_result "Baseline LoC: $DB_LOC" -log_result "Source archive files: $SRC_FILE_COUNT (expected: ~$EXPECTED)" +log_result "Project source files: $PROJECT_SRC_COUNT (expected: ~$EXPECTED)" +log_result "Total archive files: $SRC_FILE_COUNT (includes system headers for compiled langs)" log_result "Extractor errors: $EXTRACTOR_ERRORS (ratio: $ERROR_RATIO)" log_result "Finalized: $FINALIZED" -# Sample extracted files -unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null | head -20 >> "$LOG_FILE" +# Sample extracted project files (exclude system paths) +unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \ + | grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' \ + | head -20 >> "$LOG_FILE" ``` ### Quality Criteria @@ -510,12 +749,14 @@ unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null | head -20 >> "$LOG_FILE" | Metric | Source | Good | Poor | |--------|--------|------|------| | Baseline LoC | `print-baseline` / `baseline-info.json` | > 0, proportional to project size | 0 or far below expected | -| Source archive files | `src.zip` | Close to expected source file count | 0 or < 50% of expected | -| Extractor errors | `diagnostic/extractors/*.jsonl` | 0 or < 5% of files | > 5% of files | +| Project source files | `src.zip` (filtered) | Close to expected source file count | 0 or < 50% of expected | +| Extractor errors | `diagnostic/extractors/*.jsonl` | 0 or < 5% of project files | > 5% of project files | | Finalized | `codeql-database.yml` | `true` | `false` (incomplete build) | | Key directories | `src.zip` listing | Application code directories present | Missing `src/main`, `lib/`, `app/` etc. | | "No source code seen" | build log | Absent | Present (cached build — compiled languages) | +**Interpreting archive file counts for compiled languages:** C/C++ databases include system headers (e.g., ``, SDK headers) in `src.zip`. A project with 5 source files may have 100+ files in the archive. Always filter to project-relative paths when comparing against expected counts. Use `baselineLinesOfCode` as the primary quality indicator. + **Interpreting baseline LoC:** A small number of extractor errors is normal and does not significantly impact analysis. However, if `baselineLinesOfCode` is 0 or the source archive contains no files, the database is empty — likely a cached build (compiled languages) or wrong `--source-root`. --- @@ -666,4 +907,16 @@ codeql database create $DB_NAME --language=python --threads=0 \ | Rust | Cargo | `cargo clean && cargo build` | | C# | .NET | `dotnet clean && dotnet build` | +### macOS Apple Silicon (arm64e workaround) + +| Priority | Method | Command | +|----------|--------|---------| +| 1st | Homebrew clang + multi-step | `codeql database init` → `codeql database trace-command -- /opt/homebrew/opt/llvm/bin/clang -c file.c` (per file) → `codeql database finalize` | +| 2nd | Rosetta x86_64 | `arch -x86_64 codeql database create --command='make'` | +| 3rd | `build-mode=none` | `codeql database create --build-mode=none` (source-level only) | + +**Why:** CodeQL's `libtrace.dylib` has `x86_64`+`arm64` slices but Apple system tools are `arm64e`. macOS kills `arm64e` processes that load non-`arm64e` injected dylibs. + +**Key constraint:** Only trace `arm64` binaries (Homebrew tools). Never trace `arm64e` binaries (`/usr/bin/*`, `/bin/*`) — they will be killed with signal 9. + See [language-details.md](../references/language-details.md) for more. diff --git a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md index 14ff9c2..a27226f 100644 --- a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md +++ b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md @@ -8,10 +8,12 @@ Two modes control analysis scope. Select mode in Step 2 (before pack selection). | Mode | Packs | Filtering | |------|-------|-----------| -| **Run all** | All installed packs (official + Trail of Bits + Community) | None — all queries run | -| **Important only** | All installed packs (official + Trail of Bits + Community) | Custom suite: security-only, medium-high precision, error severity for medium precision | +| **Run all** | All installed packs (official + Trail of Bits + Community) | Uses `security-and-quality` suite for official pack; third-party packs run via custom suite without precision filtering | +| **Important only** | All installed packs (official + Trail of Bits + Community) | Custom suite: security-only, medium-high precision, with security-severity threshold for medium precision | -**Run all** passes all installed query packs directly to `codeql database analyze` without suite filtering. Every query in every pack runs. +**Run all** generates a custom `.qls` suite that references the official `security-and-quality` suite and loads all third-party packs with only `kind: problem/path-problem` filtering (no precision or severity restrictions). See [run-all-suite.md](../references/run-all-suite.md) for the suite template. + +> **WARNING:** Do NOT pass pack names directly to `codeql database analyze` (e.g., `-- codeql/cpp-queries`). Each pack has a `defaultSuiteFile` in its `qlpack.yml` (typically `code-scanning.qls`) that applies strict filters — this silently drops queries and can produce zero results. Always use an explicit suite reference. **Important only** generates a custom `.qls` query suite at runtime that loads all installed packs and applies uniform filtering. See [important-only-suite.md](../references/important-only-suite.md) for the suite template and generation script. @@ -19,7 +21,7 @@ Two modes control analysis scope. Select mode in Step 2 (before pack selection). |---|---| | `@tags` | Must contain `security` (excludes correctness, maintainability, readability) | | `@precision` high/very-high | Included at any `@problem.severity` | -| `@precision` medium | Included only with `@problem.severity: error` (not `warning`) | +| `@precision` medium | Included only if `@security-severity` >= 6.0 (checked post-analysis; suite includes all medium-precision security queries, low-severity ones are filtered from results) | | `@precision` low | Excluded | | Experimental | Included (both modes run experimental queries) | | Diagnostic / metric | Excluded (both modes skip non-alert queries) | @@ -40,12 +42,14 @@ TaskCreate: "Execute analysis" (Step 4) - blockedBy: Step 3 TaskCreate: "Process and report results" (Step 5) - blockedBy: Step 4 ``` -### Mandatory Gates +### Gates | Task | Gate Type | Cannot Proceed Until | |------|-----------|---------------------| | Step 2 | **SOFT GATE** | User selects mode; confirms installed/ignored for each missing pack | -| Step 3 | **HARD GATE** | User approves query packs, model packs, and threat model selection | +| Step 3 | **SOFT GATE** | User approves query packs, model packs, and threat model selection | + +**Auto-skip rule:** If the user already specified a choice in the invocation arguments or conversation prompt, skip the corresponding `AskUserQuestion` and use the provided value directly. For example, if the user said "run important only mode", skip the scan mode selection in Step 2a. If the user said "use all packs" or "skip extensions", skip the corresponding gates in Step 3. Only prompt for information not already provided. --- @@ -113,7 +117,9 @@ fi #### 2a: Select Scan Mode -Use `AskUserQuestion`: +**Skip if the user already specified a scan mode** (e.g., "important only", "run all", "full scan") in the invocation arguments or prompt. Use the provided value directly. + +Otherwise, use `AskUserQuestion`: ``` header: "Scan Mode" @@ -121,9 +127,9 @@ question: "Which scan mode should be used?" multiSelect: false options: - label: "Run all (Recommended)" - description: "Maximum coverage — all queries from all installed packs, unfiltered" + description: "Maximum coverage — all queries from all installed packs via security-and-quality suite" - label: "Important only" - description: "Security vulnerabilities only — all packs filtered by custom suite (medium-high precision, error severity)" + description: "Security vulnerabilities only — all packs filtered by custom suite (medium-high precision, security-severity threshold)" ``` Record the selected mode. It affects Steps 3 and 4. @@ -206,11 +212,10 @@ codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' --- -### Step 3: CRITICAL GATE - Select Query Packs and Model Packs +### Step 3: Select Query Packs and Model Packs -> **⛔ MANDATORY CHECKPOINT - DO NOT SKIP** -> -> Present all available packs as checklists. Query packs first, then model packs. +> **CHECKPOINT** — Present available packs to user for confirmation. +> **Skip if the user already specified pack preferences** in the invocation (e.g., "use all packs", "skip extensions"). Use the provided values directly. #### 3a: Confirm Query Packs @@ -357,9 +362,14 @@ options: **Build the threat model flag:** ```bash -# Only add --threat-models when non-default models are selected +# Only add --threat-model when non-default models are selected # Default (remote only) needs no flag -THREAT_MODEL_FLAG="" # or "--threat-models=remote,local" etc. +# NOTE: The flag is --threat-model (singular), NOT --threat-models +THREAT_MODEL_FLAG="" +# Examples: +# THREAT_MODEL_FLAG="--threat-model local" # adds local group +# THREAT_MODEL_FLAG="--threat-model local --threat-model file" # adds local + file +# THREAT_MODEL_FLAG="--threat-model all" # enables everything ``` --- @@ -397,14 +407,48 @@ codeql database analyze $DB_NAME \ -- "$SUITE_FILE" ``` -#### Run-all mode: Pass packs directly +#### Run-all mode: Generate custom suite with explicit suite references + +> **WARNING:** Do NOT pass pack names directly (e.g., `-- codeql/cpp-queries`). Each pack has a `defaultSuiteFile` (typically `code-scanning.qls`) that silently applies strict precision/severity filters, dropping many queries. Always use explicit suite references. + +Generate a custom `.qls` suite that references the official `security-and-quality` suite (which includes all security + code quality queries) and loads third-party packs with minimal filtering: ```bash RESULTS_DIR="${DB_NAME%.db}-results" mkdir -p "$RESULTS_DIR" +SUITE_FILE="$RESULTS_DIR/run-all.qls" + +# Generate the run-all suite +cat > "$SUITE_FILE" << HEADER +- description: Run-all — all security and quality queries from all installed packs +HEADER + +# Official pack: use security-and-quality suite (broadest built-in suite) +echo "- import: codeql-suites/${LANG}-security-and-quality.qls + from: codeql/${LANG}-queries" >> "$SUITE_FILE" + +# Third-party packs: include all problem/path-problem queries (no precision filter) +for PACK in $INSTALLED_THIRD_PARTY_PACKS; do + echo "- queries: . + from: ${PACK}" >> "$SUITE_FILE" +done -# Build pack list — all installed packs, no suite filtering -PACKS="" +# Minimal filtering — only select alert-type queries and exclude deprecated +cat >> "$SUITE_FILE" << 'FILTERS' +- include: + kind: + - problem + - path-problem +- exclude: + deprecated: // +- exclude: + tags contain: + - modeleditor + - modelgenerator +FILTERS + +echo "Suite generated: $SUITE_FILE" +codeql resolve queries "$SUITE_FILE" | wc -l # Build model pack flags from user selections in Step 3b # --model-packs for installed model packs @@ -422,7 +466,7 @@ codeql database analyze $DB_NAME \ $THREAT_MODEL_FLAG \ $MODEL_PACK_FLAGS \ $ADDITIONAL_PACK_FLAGS \ - -- $PACKS + -- "$SUITE_FILE" ``` **Flag reference for model packs:** @@ -441,9 +485,7 @@ codeql database analyze codeql_1.db \ --output=codeql_1-results/results.sarif \ --threads=0 \ --additional-packs=./codeql-models \ - -- codeql/cpp-queries \ - trailofbits/cpp-queries \ - GitHubSecurityLab/CodeQL-Community-Packs-CPP + -- codeql_1-results/run-all.qls ``` **Example (Python important-only mode with custom suite):** @@ -463,6 +505,8 @@ If codebase is large then read [../references/performance-tuning.md](../referenc ### Step 5: Process and Report Results +> **SARIF structure note:** `security-severity` and `level` are stored on rule definitions (`.runs[].tool.driver.rules[]`), NOT on individual result objects. Results reference rules by `ruleIndex`. The jq commands below join results with their rule metadata. + **Count findings:** ```bash @@ -472,7 +516,12 @@ jq '.runs[].results | length' "$RESULTS_DIR/results.sarif" **Summary by SARIF level:** ```bash -jq -r '.runs[].results[] | .level' "$RESULTS_DIR/results.sarif" \ +jq -r ' + .runs[] | + . as $run | + .results[] | + ($run.tool.driver.rules[.ruleIndex].defaultConfiguration.level // "unknown") +' "$RESULTS_DIR/results.sarif" \ | sort | uniq -c | sort -rn ``` @@ -480,8 +529,13 @@ jq -r '.runs[].results[] | .level' "$RESULTS_DIR/results.sarif" \ ```bash jq -r ' - .runs[].results[] | - (.properties["security-severity"] // "none") + " " + + .runs[] | + . as $run | + .results[] | + ($run.tool.driver.rules[.ruleIndex].properties["security-severity"] // "none") + " | " + + .ruleId + " | " + + (.locations[0].physicalLocation.artifactLocation.uri // "?") + ":" + + ((.locations[0].physicalLocation.region.startLine // 0) | tostring) + " | " + (.message.text // "no message" | .[0:80]) ' "$RESULTS_DIR/results.sarif" | sort -rn | head -20 ``` @@ -493,6 +547,30 @@ jq -r '.runs[].results[] | .ruleId' "$RESULTS_DIR/results.sarif" \ | sort | uniq -c | sort -rn ``` +**Important-only post-filter:** If scan mode is "important only", filter out medium-precision results with `security-severity` < 6.0 from the report. The suite includes all medium-precision security queries to let CodeQL evaluate them, but low-severity medium-precision findings are noise: + +```bash +# Filter important-only results: drop medium-precision findings with security-severity < 6.0 +# Medium-precision queries without a security-severity score default to 0.0 (excluded). +# Non-medium queries are always kept regardless of security-severity. +jq ' + .runs[] |= ( + . as $run | + .results = [ + .results[] | + ($run.tool.driver.rules[.ruleIndex].properties.precision // "unknown") as $prec | + ($run.tool.driver.rules[.ruleIndex].properties["security-severity"] // null) as $raw_sev | + (if $prec == "medium" then ($raw_sev // "0" | tonumber) else 10 end) as $sev | + select( + ($prec == "high") or ($prec == "very-high") or ($prec == "unknown") or + ($prec == "medium" and $sev >= 6.0) + ) + ] + ) +' "$RESULTS_DIR/results.sarif" > "$RESULTS_DIR/results-filtered.sarif" +mv "$RESULTS_DIR/results-filtered.sarif" "$RESULTS_DIR/results.sarif" +``` + --- ## Final Output diff --git a/plugins/static-analysis/skills/semgrep/SKILL.md b/plugins/static-analysis/skills/semgrep/SKILL.md index 144de44..3cb93a5 100644 --- a/plugins/static-analysis/skills/semgrep/SKILL.md +++ b/plugins/static-analysis/skills/semgrep/SKILL.md @@ -4,8 +4,7 @@ description: Run Semgrep static analysis scan on a codebase using parallel subag two scan modes - "run all" (full coverage) and "important only" (high-confidence security vulnerabilities). Automatically detects and uses Semgrep Pro for cross-file analysis when available. Use when asked to scan code for vulnerabilities, run a security audit with Semgrep, - find bugs, or perform static analysis. Spawns parallel workers for multi-language codebases - and triage. + find bugs, or perform static analysis. Spawns parallel workers for multi-language codebases. allowed-tools: - Bash - Read @@ -22,7 +21,7 @@ allowed-tools: # Semgrep Security Scan -Run a complete Semgrep scan with automatic language detection, parallel execution via Task subagents, and parallel triage. Automatically uses Semgrep Pro for cross-file taint analysis when available. +Run a complete Semgrep scan with automatic language detection, parallel execution via Task subagents, and merged SARIF output. Automatically uses Semgrep Pro for cross-file taint analysis when available. ## Prerequisites @@ -69,7 +68,7 @@ Two modes control scan scope and result filtering. Select mode early in the work | Mode | Coverage | Findings Reported | |------|----------|-------------------| -| **Run all** | All rulesets, all severity levels | Everything (triaged for true/false positives) | +| **Run all** | All rulesets, all severity levels | Everything | | **Important only** | All rulesets, but pre-filtered and post-filtered | Security vulnerabilities only, medium-high confidence and impact | **Important only** applies two layers of filtering: @@ -91,20 +90,19 @@ This skill uses **parallel Task subagents** for maximum efficiency: │ 2. Select scan mode + rulesets (ref: rulesets.md, scan-modes.md)│ │ 3. Present plan + rulesets, get approval [⛔ HARD GATE] │ │ 4. Spawn parallel scan Tasks (with approved rulesets + mode) │ -│ 5. Spawn parallel triage Tasks │ -│ 6. Collect and report results (mode-dependent filtering) │ +│ 5. Merge results and report │ └─────────────────────────────────────────────────────────────────┘ - │ Step 4 │ Step 5 - ▼ ▼ -┌─────────────────┐ ┌─────────────────┐ -│ Scan Tasks │ │ Triage Tasks │ -│ (parallel) │ │ (parallel) │ -├─────────────────┤ ├─────────────────┤ -│ Python scanner │ │ Python triager │ -│ JS/TS scanner │ │ JS/TS triager │ -│ Go scanner │ │ Go triager │ -│ Docker scanner │ │ Docker triager │ -└─────────────────┘ └─────────────────┘ + │ Step 4 + ▼ +┌─────────────────┐ +│ Scan Tasks │ +│ (parallel) │ +├─────────────────┤ +│ Python scanner │ +│ JS/TS scanner │ +│ Go scanner │ +│ Docker scanner │ +└─────────────────┘ ``` --- @@ -118,8 +116,7 @@ TaskCreate: "Detect languages and Pro availability" (Step 1) TaskCreate: "Select scan mode and rulesets" (Step 2) - blockedBy: Step 1 TaskCreate: "Present plan with rulesets, get approval" (Step 3) - blockedBy: Step 2 TaskCreate: "Execute scans with approved rulesets and mode" (Step 4) - blockedBy: Step 3 -TaskCreate: "Triage findings" (Step 5) - blockedBy: Step 4 -TaskCreate: "Report results (with mode-dependent filtering)" (Step 6) - blockedBy: Step 5 +TaskCreate: "Merge results and report" (Step 5) - blockedBy: Step 4 ``` ### Mandatory Gates @@ -127,14 +124,13 @@ TaskCreate: "Report results (with mode-dependent filtering)" (Step 6) - blockedB | Task | Gate Type | Cannot Proceed Until | |------|-----------|---------------------| | Step 3: Get approval | **HARD GATE** | User explicitly approves rulesets + plan | -| Step 5: Triage | **SOFT GATE** | All scan JSON files exist | **Step 3 is a HARD GATE**: Mark as `completed` ONLY after user says "yes", "proceed", "approved", or equivalent. ### Task Flow Example ``` -1. Create all 6 tasks with dependencies +1. Create all 5 tasks with dependencies 2. TaskUpdate Step 1 → in_progress, execute detection 3. TaskUpdate Step 1 → completed 4. TaskUpdate Step 2 → in_progress, select rulesets @@ -197,12 +193,12 @@ question: "Which scan mode should be used?" multiSelect: false options: - label: "Run all (Recommended)" - description: "Full coverage — all rulesets, all severity levels, triaged for true/false positives" + description: "Full coverage — all rulesets, all severity levels" - label: "Important only" description: "Security vulnerabilities only — medium-high confidence and impact, no code quality" ``` -Record the selected mode. It affects Steps 4 and 6. +Record the selected mode. It affects Steps 4 and 5. **Then, select rulesets.** Using the detected languages and frameworks from Step 1, select rulesets by following the **Ruleset Selection Algorithm** in [rulesets.md](references/rulesets.md). @@ -277,7 +273,6 @@ Present plan to user with **explicit ruleset listing**: - Total rulesets: 9 - [If Pro] Cross-file taint tracking enabled - Scan agent: `static-analysis:semgrep-scanner` -- Triage agent: `static-analysis:semgrep-triager` **Want to modify rulesets?** Tell me which to add or remove. **Ready to scan?** Say "proceed" or "yes". @@ -319,7 +314,7 @@ Before marking Step 3 complete, verify: - [ ] User given opportunity to modify rulesets - [ ] User explicitly approved (quote their confirmation) - [ ] **Final ruleset list captured for Step 4** -- [ ] Agent types listed: `static-analysis:semgrep-scanner` and `static-analysis:semgrep-triager` +- [ ] Agent type listed: `static-analysis:semgrep-scanner` ### Step 4: Spawn Parallel Scan Tasks @@ -358,24 +353,18 @@ Spawn these 3 Tasks in a SINGLE message: - Approved rulesets: p/dockerfile - Output: semgrep-results-001/docker-*.json -### Step 5: Spawn Parallel Triage Tasks +### Step 5: Merge Results and Report (Main Agent) -After scan Tasks complete, spawn triage Tasks using `subagent_type: static-analysis:semgrep-triager` (triage requires reading code context, not just running commands). +After all scan Tasks complete, apply mode-dependent filtering (if applicable), then generate merged SARIF and report. -Use the triage task prompt template from [triage-task-prompt.md](references/triage-task-prompt.md). +**Important-only mode: Post-filter before merge** -### Step 6: Collect Results (Main Agent) - -After all Tasks complete, apply mode-dependent filtering (if applicable), then generate merged SARIF and report. - -**Important-only mode: Post-filter before triage/merge** - -In important-only mode, filter each scan result JSON to remove non-security and low-confidence findings before triage. See [scan-modes.md](references/scan-modes.md) for the complete jq filter. +In important-only mode, filter each scan result JSON to remove non-security and low-confidence findings before merging. See [scan-modes.md](references/scan-modes.md) for the complete jq filter. ```bash # Apply important-only filter to all scan result JSON files for f in "$OUTPUT_DIR"/*-*.json; do - [[ "$f" == *-triage.json || "$f" == *-important.json ]] && continue + [[ "$f" == *-important.json ]] && continue jq '{ results: [.results[] | ((.extra.metadata.category // "security") | ascii_downcase) as $cat | @@ -393,9 +382,7 @@ for f in "$OUTPUT_DIR"/*-*.json; do done ``` -Then use the `-important.json` files as input for triage instead of the raw scan files. - -**Generate merged SARIF with only triaged true positives:** +**Generate merged SARIF:** ```bash uv run scripts/merge_triaged_sarif.py [OUTPUT_DIR] @@ -404,9 +391,8 @@ uv run scripts/merge_triaged_sarif.py [OUTPUT_DIR] This script: 1. Attempts to use [SARIF Multitool](https://www.npmjs.com/package/@microsoft/sarif-multitool) for merging (if `npx` is available) 2. Falls back to pure Python merge if Multitool unavailable -3. Reads all `*-triage.json` files to extract true positive findings -4. Filters merged SARIF to include only triaged true positives -5. Writes output to `[OUTPUT_DIR]/findings-triaged.sarif` +3. Merges all `*.sarif` files into a single SARIF output +4. Writes output to `[OUTPUT_DIR]/findings.sarif` **Optional: Install SARIF Multitool for better merge quality:** @@ -421,8 +407,7 @@ npm install -g @microsoft/sarif-multitool **Scanned:** 1,804 files **Rulesets used:** 9 (including Trail of Bits) -**Total raw findings:** 156 -**After triage:** 32 true positives +**Total findings:** 156 ### By Severity: - ERROR: 5 @@ -437,9 +422,8 @@ npm install -g @microsoft/sarif-multitool - Code quality: 8 Results written to: -- semgrep-results-001/findings-triaged.sarif (SARIF, true positives only) -- semgrep-results-001/*-triage.json (triage details per language) -- semgrep-results-001/*.json (raw scan results) +- semgrep-results-001/findings.sarif (merged SARIF) +- semgrep-results-001/*.json (raw scan results per ruleset) - semgrep-results-001/*.sarif (raw SARIF per ruleset) ``` @@ -452,29 +436,28 @@ Results written to: | Running without `--metrics=off` | Always use `--metrics=off` to prevent telemetry | | Running rulesets sequentially | Run in parallel with `&` and `wait` | | Not scoping rulesets to languages | Use `--include="*.py"` for language-specific rules | -| Reporting raw findings without triage | Always triage to remove false positives | | Single-threaded for multi-lang | Spawn parallel Tasks per language | | Sequential Tasks | Spawn all Tasks in SINGLE message for parallelism | | Using OSS when Pro is available | Check login status; use `--pro` for deeper analysis | | Assuming Pro is unavailable | Always check with login detection before scanning | +| Passing GitHub URLs directly to `--config` | Clone repos into `[OUTPUT_DIR]/repos/` first; semgrep's URL handling fails on repos with non-standard YAML | +| Leaving cloned repos on disk after scan | Delete `[OUTPUT_DIR]/repos/` after all scans complete | +| Using `.` or relative path as `[TARGET]` | Always use an absolute path for `[TARGET]` to avoid ambiguity in subagents | ## Limitations 1. **OSS mode:** Cannot track data flow across files (login with `semgrep login` and run `semgrep install-semgrep-pro` to enable) 2. **Pro mode:** Cross-file analysis uses `-j 1` (single job) which is slower per ruleset, but parallel rulesets compensate -3. Triage requires reading code context - parallelized via Tasks -4. Some false positive patterns require human judgment ## Agents -This plugin provides two specialized agents for the scan and triage phases: +This plugin provides a specialized agent for the scan phase: | Agent | Tools | Purpose | |-------|-------|---------| | `static-analysis:semgrep-scanner` | Bash | Executes parallel semgrep scans for a language category | -| `static-analysis:semgrep-triager` | Read, Grep, Glob, Write | Classifies findings as true/false positives by reading source context | -Use `subagent_type: static-analysis:semgrep-scanner` in Step 4 and `subagent_type: static-analysis:semgrep-triager` in Step 5 when spawning Task subagents. +Use `subagent_type: static-analysis:semgrep-scanner` in Step 4 when spawning Task subagents. ## Rationalizations to Reject @@ -487,11 +470,11 @@ Use `subagent_type: static-analysis:semgrep-scanner` in Step 4 and `subagent_typ | "Add extra rulesets without asking" | Modifying approved list without consent breaks trust | | "Skip showing ruleset list" | User can't make informed decision without seeing what will run | | "Third-party rulesets are optional" | Trail of Bits, 0xdea, Decurity rules catch vulnerabilities not in official registry - they are REQUIRED when language matches | -| "Skip triage, report everything" | Floods user with noise; true issues get lost | | "Run one ruleset at a time" | Wastes time; parallel execution is faster | | "Use --config auto" | Sends metrics; less control over rulesets | -| "Triage later" | Findings without context are harder to evaluate | | "One Task at a time" | Defeats parallelism; spawn all Tasks together | | "Pro is too slow, skip --pro" | Cross-file analysis catches 250% more true positives; worth the time | | "Don't bother checking for Pro" | Missing Pro = missing critical cross-file vulnerabilities | | "OSS is good enough" | OSS misses inter-file taint flows; always prefer Pro when available | +| "Semgrep handles GitHub URLs natively" | URL handling is unreliable for repos with non-standard YAML (floats as keys, etc.); always clone first | +| "Cleanup is optional" | Cloned repos left behind pollute the user's workspace and accumulate across runs | diff --git a/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md b/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md index ddcaaa7..ca88851 100644 --- a/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md +++ b/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md @@ -26,9 +26,16 @@ Example: ## Commands to Run (in parallel) +### Clone GitHub URL rulesets first: +```bash +mkdir -p [OUTPUT_DIR]/repos +# For each GitHub URL ruleset, clone into [OUTPUT_DIR]/repos/[name]: +git clone --depth 1 https://github.com/org/repo [OUTPUT_DIR]/repos/repo-name +``` + ### Generate commands for EACH approved ruleset: ```bash -semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/[lang]-[ruleset].sarif [TARGET] & +semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] [INCLUDE_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/[lang]-[ruleset].sarif [TARGET] & ``` Wait for all to complete: @@ -36,6 +43,11 @@ Wait for all to complete: wait ``` +### Clean up cloned repos: +```bash +rm -rf [OUTPUT_DIR]/repos +``` + ## Critical Rules - Use ONLY the rulesets listed above - do not add or remove any - Always use --metrics=off (prevents sending telemetry to Semgrep servers) @@ -43,7 +55,9 @@ wait - If scan mode is **important-only**, add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every command - If scan mode is **run-all**, do NOT add severity flags - Run all rulesets in parallel with & and wait -- For GitHub URLs, clone the repo first if not cached locally +- For GitHub URL rulesets, always clone into [OUTPUT_DIR]/repos/ and use the local path as --config (do NOT pass URLs directly to semgrep — its URL handling is unreliable for repos with non-standard YAML) +- Add `--include` flags for language-specific rulesets (e.g., `--include="*.py"` for p/python). Do NOT add `--include` to cross-language rulesets like p/security-audit, p/secrets, or third-party repos +- After all scans complete, delete [OUTPUT_DIR]/repos/ to avoid leaving cloned repos behind ## Output Report: @@ -62,8 +76,9 @@ Report: | `[OUTPUT_DIR]` | Results directory with run number | semgrep-results-001 | | `[PRO_AVAILABLE]` | Whether Pro engine is available | true, false | | `[SEVERITY_FLAGS]` | Severity pre-filter flags | *(empty)* for run-all, `--severity MEDIUM --severity HIGH --severity CRITICAL` for important-only | -| `[RULESET]` | Semgrep ruleset identifier | p/python, https://github.com/... | -| `[TARGET]` | Directory to scan | . (current dir) | +| `[INCLUDE_FLAGS]` | File extension filter for language-specific rulesets | `--include="*.py"` for Python rulesets, *(empty)* for cross-language rulesets like p/security-audit, p/secrets, or third-party repos | +| `[RULESET]` | Semgrep ruleset identifier or local clone path | p/python, [OUTPUT_DIR]/repos/semgrep-rules | +| `[TARGET]` | Absolute path to directory to scan | /path/to/codebase | ## Example: Python Scanner Task @@ -71,7 +86,7 @@ Report: You are a Semgrep scanner for Python. ## Task -Run Semgrep scans for Python files and save results to semgrep-results-001. +Run Semgrep scans for Python files and save results to /path/to/semgrep-results-001. ## Pro Engine Status: true @@ -85,20 +100,36 @@ Run Semgrep scans for Python files and save results to semgrep-results-001. - https://github.com/trailofbits/semgrep-rules ## Commands to Run (in parallel) + +### Clone GitHub URL rulesets first: ```bash -semgrep --pro --metrics=off --config p/python --json -o semgrep-results-001/python-python.json --sarif-output=semgrep-results-001/python-python.sarif . & -semgrep --pro --metrics=off --config p/django --json -o semgrep-results-001/python-django.json --sarif-output=semgrep-results-001/python-django.sarif . & -semgrep --pro --metrics=off --config p/security-audit --json -o semgrep-results-001/python-security-audit.json --sarif-output=semgrep-results-001/python-security-audit.sarif . & -semgrep --pro --metrics=off --config p/secrets --json -o semgrep-results-001/python-secrets.json --sarif-output=semgrep-results-001/python-secrets.sarif . & -semgrep --pro --metrics=off --config https://github.com/trailofbits/semgrep-rules --json -o semgrep-results-001/python-trailofbits.json --sarif-output=semgrep-results-001/python-trailofbits.sarif . & +mkdir -p /path/to/semgrep-results-001/repos +git clone --depth 1 https://github.com/trailofbits/semgrep-rules /path/to/semgrep-results-001/repos/trailofbits +``` + +### Run scans: +```bash +semgrep --pro --metrics=off --include="*.py" --config p/python --json -o /path/to/semgrep-results-001/python-python.json --sarif-output=/path/to/semgrep-results-001/python-python.sarif /path/to/codebase & +semgrep --pro --metrics=off --include="*.py" --config p/django --json -o /path/to/semgrep-results-001/python-django.json --sarif-output=/path/to/semgrep-results-001/python-django.sarif /path/to/codebase & +semgrep --pro --metrics=off --config p/security-audit --json -o /path/to/semgrep-results-001/python-security-audit.json --sarif-output=/path/to/semgrep-results-001/python-security-audit.sarif /path/to/codebase & +semgrep --pro --metrics=off --config p/secrets --json -o /path/to/semgrep-results-001/python-secrets.json --sarif-output=/path/to/semgrep-results-001/python-secrets.sarif /path/to/codebase & +semgrep --pro --metrics=off --config /path/to/semgrep-results-001/repos/trailofbits --json -o /path/to/semgrep-results-001/python-trailofbits.json --sarif-output=/path/to/semgrep-results-001/python-trailofbits.sarif /path/to/codebase & wait ``` +### Clean up cloned repos: +```bash +rm -rf /path/to/semgrep-results-001/repos +``` + ## Critical Rules - Use ONLY the rulesets listed above - do not add or remove any - Always use --metrics=off - Use --pro when Pro is available - Run all rulesets in parallel with & and wait +- Clone GitHub URL rulesets into the output dir repos/ subfolder, use local path as --config +- Add --include="*.py" to language-specific rulesets (p/python, p/django) but NOT to p/security-audit, p/secrets, or third-party repos +- Delete repos/ after scanning ## Output Report: diff --git a/plugins/static-analysis/skills/semgrep/references/triage-task-prompt.md b/plugins/static-analysis/skills/semgrep/references/triage-task-prompt.md deleted file mode 100644 index a476063..0000000 --- a/plugins/static-analysis/skills/semgrep/references/triage-task-prompt.md +++ /dev/null @@ -1,122 +0,0 @@ -# Triage Subagent Task Prompt - -Use this prompt template when spawning triage Tasks in Step 5. Use `subagent_type: static-analysis:semgrep-triager`. - -## Template - -``` -You are a security finding triager for [LANGUAGE_CATEGORY]. - -## Input Files -[LIST OF JSON FILES TO TRIAGE] - -## Output Directory -[OUTPUT_DIR] - -## Task -For each finding: -1. Read the JSON finding -2. Read source code context (5 lines before/after) -3. Classify as TRUE_POSITIVE or FALSE_POSITIVE - -## False Positive Criteria -- Test files (should add to .semgrepignore) -- Sanitized inputs (context shows validation) -- Dead code paths -- Example/documentation code -- Already has nosemgrep comment - -## Output Format -Create: [OUTPUT_DIR]/[lang]-triage.json - -```json -{ - "file": "[lang]-[ruleset].json", - "total": 45, - "true_positives": [ - {"rule": "...", "file": "...", "line": N, "reason": "..."} - ], - "false_positives": [ - {"rule": "...", "file": "...", "line": N, "reason": "..."} - ] -} -``` - -## Report -Return summary: -- Total findings: N -- True positives: N -- False positives: N (with breakdown by reason) -``` - -## Variable Substitutions - -| Variable | Description | Example | -|----------|-------------|---------| -| `[LANGUAGE_CATEGORY]` | Language group being triaged | Python, JavaScript, Docker | -| `[OUTPUT_DIR]` | Results directory with run number | semgrep-results-001 | - -## Example: Python Triage Task - -``` -You are a security finding triager for Python. - -## Input Files -- semgrep-results-001/python-python.json -- semgrep-results-001/python-django.json -- semgrep-results-001/python-security-audit.json -- semgrep-results-001/python-secrets.json -- semgrep-results-001/python-trailofbits.json - -## Output Directory -semgrep-results-001 - -## Task -For each finding: -1. Read the JSON finding -2. Read source code context (5 lines before/after) -3. Classify as TRUE_POSITIVE or FALSE_POSITIVE - -## False Positive Criteria -- Test files (should add to .semgrepignore) -- Sanitized inputs (context shows validation) -- Dead code paths -- Example/documentation code -- Already has nosemgrep comment - -## Output Format -Create: semgrep-results-001/python-triage.json - -```json -{ - "file": "python-django.json", - "total": 45, - "true_positives": [ - {"rule": "python.django.security.injection.sql-injection", "file": "views.py", "line": 42, "reason": "User input directly in raw SQL query"} - ], - "false_positives": [ - {"rule": "python.django.security.injection.sql-injection", "file": "tests/test_views.py", "line": 15, "reason": "Test file with mock data"} - ] -} -``` - -## Report -Return summary: -- Total findings: 45 -- True positives: 12 -- False positives: 33 (18 test files, 10 sanitized inputs, 5 dead code) -``` - -## Triage Decision Tree - -``` -Finding -├── Is it in a test file? → FALSE_POSITIVE (add to .semgrepignore) -├── Is it in example/docs? → FALSE_POSITIVE -├── Does it have nosemgrep comment? → FALSE_POSITIVE (already acknowledged) -├── Is the input sanitized/validated upstream? -│ └── Check 10-20 lines before for validation → FALSE_POSITIVE if validated -├── Is the code path reachable? -│ └── Check if function is called/exported → FALSE_POSITIVE if dead code -└── None of the above → TRUE_POSITIVE -``` diff --git a/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py b/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py index 5bb4622..10baca4 100644 --- a/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py +++ b/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py @@ -2,13 +2,13 @@ # requires-python = ">=3.11" # dependencies = [] # /// -"""Merge and filter SARIF files to include only triaged true positives. +"""Merge SARIF files into a single consolidated output. Usage: uv run merge_triaged_sarif.py OUTPUT_DIR -Reads *-triage.json and *.sarif files from OUTPUT_DIR, produces -OUTPUT_DIR/findings-triaged.sarif containing only true positives. +Reads *.sarif files from OUTPUT_DIR, produces +OUTPUT_DIR/findings.sarif containing all findings merged. Attempts to use SARIF Multitool for merging if available, falls back to pure Python implementation. @@ -24,53 +24,6 @@ from pathlib import Path -def load_true_positives(triage_dir: Path) -> set[tuple[str, str, int]]: - """Load true positives from all triage files as (rule_id, file, line) tuples.""" - true_positives: set[tuple[str, str, int]] = set() - - for triage_file in triage_dir.glob("*-triage.json"): - try: - data = json.loads(triage_file.read_text()) - except json.JSONDecodeError as e: - print(f"Warning: Failed to parse {triage_file}: {e}", file=sys.stderr) - continue - - for tp in data.get("true_positives", []): - rule = tp.get("rule", "") - file_path = tp.get("file", "") - line = tp.get("line", 0) - if rule and file_path and line: - true_positives.add((rule, file_path, line)) - - return true_positives - - -def extract_result_key(result: dict) -> tuple[str, str, int] | None: - """Extract (rule_id, file, line) from a SARIF result.""" - rule_id = result.get("ruleId", "") - locations = result.get("locations", []) - if not locations: - return None - - phys_loc = locations[0].get("physicalLocation", {}) - artifact_loc = phys_loc.get("artifactLocation", {}) - uri = artifact_loc.get("uri", "") - region = phys_loc.get("region", {}) - line = region.get("startLine", 0) - - if not (rule_id and uri and line): - return None - - return (rule_id, uri, line) - - -def normalize_file_path(uri: str) -> str: - """Normalize file path for matching (handle relative vs absolute).""" - if uri.startswith("file://"): - uri = uri[7:] - return uri.lstrip("./") - - def has_sarif_multitool() -> bool: """Check if SARIF Multitool is pre-installed via npx.""" if not shutil.which("npx"): @@ -129,9 +82,10 @@ def merge_sarif_pure_python(sarif_dir: Path) -> dict: seen_rules: dict[str, dict] = {} all_results: list[dict] = [] + seen_results: set[tuple[str, str, int]] = set() tool_info: dict | None = None - for sarif_file in sarif_dir.glob("*.sarif"): + for sarif_file in sorted(sarif_dir.glob("*.sarif")): try: data = json.loads(sarif_file.read_text()) except json.JSONDecodeError as e: @@ -148,7 +102,20 @@ def merge_sarif_pure_python(sarif_dir: Path) -> dict: if rule_id and rule_id not in seen_rules: seen_rules[rule_id] = rule - all_results.extend(run.get("results", [])) + for result in run.get("results", []): + rule_id = result.get("ruleId", "") + uri = "" + start_line = 0 + locations = result.get("locations", []) + if locations: + phys = locations[0].get("physicalLocation", {}) + uri = phys.get("artifactLocation", {}).get("uri", "") + start_line = phys.get("region", {}).get("startLine", 0) + dedup_key = (rule_id, uri, start_line) + if dedup_key in seen_results: + continue + seen_results.add(dedup_key) + all_results.append(result) if all_results: merged_run = { @@ -161,50 +128,6 @@ def merge_sarif_pure_python(sarif_dir: Path) -> dict: return merged -def filter_sarif_by_triage(sarif: dict, true_positives: set[tuple[str, str, int]]) -> dict: - """Filter SARIF results to include only triaged true positives.""" - normalized_tps: set[tuple[str, str, int]] = set() - for rule, file_path, line in true_positives: - normalized_tps.add((rule, normalize_file_path(file_path), line)) - - filtered = { - "version": sarif.get("version", "2.1.0"), - "$schema": sarif.get("$schema", "https://json.schemastore.org/sarif-2.1.0.json"), - "runs": [], - } - - for run in sarif.get("runs", []): - filtered_results = [] - for result in run.get("results", []): - key = extract_result_key(result) - if key is None: - continue - - rule_id, uri, line = key - normalized_key = (rule_id, normalize_file_path(uri), line) - - if normalized_key in normalized_tps: - filtered_results.append(result) - - if filtered_results: - result_rule_ids = {r.get("ruleId") for r in filtered_results} - driver = run.get("tool", {}).get("driver", {}) - filtered_rules = [r for r in driver.get("rules", []) if r.get("id") in result_rule_ids] - - filtered_run = { - "tool": { - "driver": { - **driver, - "rules": filtered_rules, - } - }, - "results": filtered_results, - } - filtered["runs"].append(filtered_run) - - return filtered - - def main() -> int: if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} OUTPUT_DIR", file=sys.stderr) @@ -215,12 +138,13 @@ def main() -> int: print(f"Error: {output_dir} is not a directory", file=sys.stderr) return 1 - # Load true positives from triage files - true_positives = load_true_positives(output_dir) - if not true_positives: - print("Warning: No true positives found in triage files", file=sys.stderr) + # Count SARIF files + sarif_files = list(output_dir.glob("*.sarif")) + print(f"Found {len(sarif_files)} SARIF files to merge") - print(f"Found {len(true_positives)} true positives from triage") + if not sarif_files: + print("No SARIF files found, nothing to merge", file=sys.stderr) + return 1 # Try SARIF Multitool first, fall back to pure Python merged: dict | None = None @@ -234,15 +158,12 @@ def main() -> int: print("Using pure Python merge (SARIF Multitool not available or failed)") merged = merge_sarif_pure_python(output_dir) - # Filter to true positives only - filtered = filter_sarif_by_triage(merged, true_positives) - - result_count = sum(len(run.get("results", [])) for run in filtered.get("runs", [])) - print(f"Filtered SARIF contains {result_count} true positives") + result_count = sum(len(run.get("results", [])) for run in merged.get("runs", [])) + print(f"Merged SARIF contains {result_count} findings") # Write output - output_file = output_dir / "findings-triaged.sarif" - output_file.write_text(json.dumps(filtered, indent=2)) + output_file = output_dir / "findings.sarif" + output_file.write_text(json.dumps(merged, indent=2)) print(f"Written to {output_file}") return 0 From fa3fd88c5b59d46e85ee45012945ebfbb89284e4 Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Mon, 23 Feb 2026 11:44:07 +0100 Subject: [PATCH 3/8] semgrep/codeql - descriptions --- plugins/static-analysis/agents/semgrep-scanner.md | 2 +- plugins/static-analysis/skills/codeql/SKILL.md | 13 +++++-------- .../static-analysis/skills/sarif-parsing/SKILL.md | 6 +++++- plugins/static-analysis/skills/semgrep/SKILL.md | 11 ++++++----- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/plugins/static-analysis/agents/semgrep-scanner.md b/plugins/static-analysis/agents/semgrep-scanner.md index 1b91bd4..0b482ce 100644 --- a/plugins/static-analysis/agents/semgrep-scanner.md +++ b/plugins/static-analysis/agents/semgrep-scanner.md @@ -1,6 +1,6 @@ --- name: semgrep-scanner -description: "Executes semgrep CLI scans for a language category. Use when running automated static analysis scans with semgrep against a codebase." +description: "Executes Semgrep CLI scans for a specific language category and produces SARIF output. Spawned by the semgrep skill as a parallel worker — one agent per detected language. Reads source context to classify findings as true or false positives." tools: Bash(semgrep scan:*), Bash --- diff --git a/plugins/static-analysis/skills/codeql/SKILL.md b/plugins/static-analysis/skills/codeql/SKILL.md index 612ec0b..324807c 100644 --- a/plugins/static-analysis/skills/codeql/SKILL.md +++ b/plugins/static-analysis/skills/codeql/SKILL.md @@ -1,14 +1,11 @@ --- name: codeql description: >- - Runs CodeQL static analysis for security vulnerability detection - using interprocedural data flow and taint tracking. Supports two - scan modes - "run all" (all queries from all packs via security-and-quality suite) and - "important only" (security vulnerabilities filtered by precision and security-severity score). Applicable - when finding vulnerabilities, running a security scan, performing a - security audit, running CodeQL, building a CodeQL database, selecting - query rulesets, creating data extension models, or processing CodeQL - SARIF output. + Scans a codebase for security vulnerabilities using CodeQL's interprocedural data flow and + taint tracking analysis. Triggers on "run codeql", "codeql scan", "codeql analysis", "build + codeql database", or "find vulnerabilities with codeql". Supports "run all" (security-and-quality + suite) and "important only" (high-precision security findings) scan modes. Also handles + creating data extension models and processing CodeQL SARIF output. allowed-tools: - Bash - Read diff --git a/plugins/static-analysis/skills/sarif-parsing/SKILL.md b/plugins/static-analysis/skills/sarif-parsing/SKILL.md index e3d7a5e..72fcda1 100644 --- a/plugins/static-analysis/skills/sarif-parsing/SKILL.md +++ b/plugins/static-analysis/skills/sarif-parsing/SKILL.md @@ -1,6 +1,10 @@ --- name: sarif-parsing -description: Parse, analyze, and process SARIF (Static Analysis Results Interchange Format) files. Use when reading security scan results, aggregating findings from multiple tools, deduplicating alerts, extracting specific vulnerabilities, or integrating SARIF data into CI/CD pipelines. +description: >- + Parses and processes SARIF files from static analysis tools like CodeQL, Semgrep, or other + scanners. Triggers on "parse sarif", "read scan results", "aggregate findings", "deduplicate + alerts", or "process sarif output". Handles filtering, deduplication, format conversion, and + CI/CD integration of SARIF data. Does NOT run scans — use the Semgrep or CodeQL skills for that. allowed-tools: - Bash - Read diff --git a/plugins/static-analysis/skills/semgrep/SKILL.md b/plugins/static-analysis/skills/semgrep/SKILL.md index 3cb93a5..5e1ff79 100644 --- a/plugins/static-analysis/skills/semgrep/SKILL.md +++ b/plugins/static-analysis/skills/semgrep/SKILL.md @@ -1,10 +1,11 @@ --- name: semgrep -description: Run Semgrep static analysis scan on a codebase using parallel subagents. Supports - two scan modes - "run all" (full coverage) and "important only" (high-confidence security - vulnerabilities). Automatically detects and uses Semgrep Pro for cross-file analysis when - available. Use when asked to scan code for vulnerabilities, run a security audit with Semgrep, - find bugs, or perform static analysis. Spawns parallel workers for multi-language codebases. +description: >- + Scans a codebase for security vulnerabilities using Semgrep with automatic language detection + and parallel execution. Triggers on "run semgrep", "semgrep scan", "find vulnerabilities with + semgrep", "static analysis", or "security scan". Supports "run all" (full ruleset coverage) + and "important only" (high-confidence vulnerabilities) scan modes. Uses Semgrep Pro for + cross-file taint analysis when available. allowed-tools: - Bash - Read From c711186c6169331ef688cfd2da82a0ad0cc94e07 Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Mon, 23 Feb 2026 11:59:00 +0100 Subject: [PATCH 4/8] semgrep - workflow plugin improvements --- .../static-analysis/skills/semgrep/SKILL.md | 108 +++++++++--------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/plugins/static-analysis/skills/semgrep/SKILL.md b/plugins/static-analysis/skills/semgrep/SKILL.md index 5e1ff79..eaa5c44 100644 --- a/plugins/static-analysis/skills/semgrep/SKILL.md +++ b/plugins/static-analysis/skills/semgrep/SKILL.md @@ -10,14 +10,11 @@ allowed-tools: - Bash - Read - Glob - - Grep - - Write - Task - AskUserQuestion - TaskCreate - TaskList - TaskUpdate - - WebFetch --- # Semgrep Security Scan @@ -61,6 +58,14 @@ Pro enables: cross-file taint tracking, inter-procedural analysis, and additiona - Creating custom Semgrep rules → Use `semgrep-rule-creator` skill - Porting existing rules to other languages → Use `semgrep-rule-variant-creator` skill +## Essential Principles + +1. **Always use `--metrics=off`** — Semgrep sends telemetry by default; `--config auto` also phones home. Every `semgrep` command must include `--metrics=off` to prevent data leakage during security audits. +2. **User must approve the scan plan (Step 3 is a hard gate)** — The original "scan this codebase" request is NOT approval. Present exact rulesets, target, engine, and mode; wait for explicit "yes"/"proceed" before spawning scanners. +3. **Third-party rulesets are required, not optional** — Trail of Bits, 0xdea, and Decurity rules catch vulnerabilities absent from the official registry. Include them whenever the detected language matches. +4. **Spawn all scan Tasks in a single message** — Parallel execution is the core performance advantage. Never spawn Tasks sequentially; always emit all Task tool calls in one response. +5. **Always check for Semgrep Pro before scanning** — Pro enables cross-file taint tracking and catches ~250% more true positives. Skipping the check means silently missing critical inter-file vulnerabilities. + --- ## Scan Modes @@ -149,25 +154,21 @@ TaskCreate: "Merge results and report" (Step 5) - blockedBy: Step 4 ### Step 1: Detect Languages and Pro Availability (Main Agent) +> **Entry:** User has specified or confirmed the target directory. +> **Exit:** Language list with file counts produced; Pro availability determined. + +**Detect Pro availability** (requires Bash — semgrep CLI check): + ```bash -# Check if Semgrep Pro is available (non-destructive check) -SEMGREP_PRO=false -if semgrep --pro --validate --config p/default 2>/dev/null; then - SEMGREP_PRO=true - echo "Semgrep Pro: AVAILABLE (cross-file analysis enabled)" -else - echo "Semgrep Pro: NOT AVAILABLE (OSS mode, single-file analysis)" -fi - -# Find languages by file extension -fd -t f -e py -e js -e ts -e jsx -e tsx -e go -e rb -e java -e php -e c -e cpp -e rs | \ - sed 's/.*\.//' | sort | uniq -c | sort -rn - -# Check for frameworks/technologies -ls -la package.json pyproject.toml Gemfile go.mod Cargo.toml pom.xml 2>/dev/null -fd -t f "Dockerfile" "docker-compose" ".tf" "*.yaml" "*.yml" | head -20 +semgrep --pro --validate --config p/default 2>/dev/null && echo "Pro: AVAILABLE" || echo "Pro: NOT AVAILABLE" ``` +**Detect languages** using Glob (not Bash). Run these patterns against the target directory and count matches: + +`**/*.py`, `**/*.js`, `**/*.ts`, `**/*.tsx`, `**/*.jsx`, `**/*.go`, `**/*.rb`, `**/*.java`, `**/*.php`, `**/*.c`, `**/*.cpp`, `**/*.rs`, `**/Dockerfile`, `**/*.tf` + +Also check for framework markers: `package.json`, `pyproject.toml`, `Gemfile`, `go.mod`, `Cargo.toml`, `pom.xml`. + Map findings to categories: | Detection | Category | @@ -186,6 +187,9 @@ Map findings to categories: ### Step 2: Select Scan Mode and Rulesets +> **Entry:** Step 1 complete — languages detected, Pro status known. +> **Exit:** Scan mode selected; structured rulesets JSON compiled for all detected languages. + **First, select scan mode** using `AskUserQuestion`: ``` @@ -225,6 +229,9 @@ The algorithm covers: ### Step 3: CRITICAL GATE - Present Plan and Get Approval +> **Entry:** Step 2 complete — scan mode and rulesets selected. +> **Exit:** User has explicitly approved the plan (quoted confirmation). + > **⛔ MANDATORY CHECKPOINT - DO NOT SKIP** > > This step requires explicit user approval before proceeding. @@ -266,15 +273,6 @@ Present plan to user with **explicit ruleset listing**: **Third-party (auto-included for detected languages):** - [x] Trail of Bits rules - https://github.com/trailofbits/semgrep-rules -**Available but not selected:** -- [ ] `p/owasp-top-ten` - OWASP Top 10 (overlaps with security-audit) - -### Execution Strategy: -- Spawn 3 parallel scan Tasks (Python, JavaScript, Docker) -- Total rulesets: 9 -- [If Pro] Cross-file taint tracking enabled -- Scan agent: `static-analysis:semgrep-scanner` - **Want to modify rulesets?** Tell me which to add or remove. **Ready to scan?** Say "proceed" or "yes". ``` @@ -319,6 +317,9 @@ Before marking Step 3 complete, verify: ### Step 4: Spawn Parallel Scan Tasks +> **Entry:** Step 3 approved — user explicitly confirmed the plan. +> **Exit:** All scan Tasks completed; result files exist in output directory. + Create output directory with run number to avoid collisions, then spawn Tasks with **approved rulesets from Step 3**: ```bash @@ -356,37 +357,19 @@ Spawn these 3 Tasks in a SINGLE message: ### Step 5: Merge Results and Report (Main Agent) +> **Entry:** Step 4 complete — all scan Tasks finished. +> **Exit:** `findings.sarif` exists in output directory and is valid JSON. + After all scan Tasks complete, apply mode-dependent filtering (if applicable), then generate merged SARIF and report. **Important-only mode: Post-filter before merge** -In important-only mode, filter each scan result JSON to remove non-security and low-confidence findings before merging. See [scan-modes.md](references/scan-modes.md) for the complete jq filter. - -```bash -# Apply important-only filter to all scan result JSON files -for f in "$OUTPUT_DIR"/*-*.json; do - [[ "$f" == *-important.json ]] && continue - jq '{ - results: [.results[] | - ((.extra.metadata.category // "security") | ascii_downcase) as $cat | - ((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf | - ((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp | - select( - ($cat == "security") and - ($conf == "MEDIUM" or $conf == "HIGH") and - ($imp == "MEDIUM" or $imp == "HIGH") - ) - ], - errors: .errors, - paths: .paths - }' "$f" > "${f%.json}-important.json" -done -``` +In important-only mode, apply the post-filter from [scan-modes.md](references/scan-modes.md) ("Filter All Result Files in a Directory" section) to each scan result JSON before merging. **Generate merged SARIF:** ```bash -uv run scripts/merge_triaged_sarif.py [OUTPUT_DIR] +uv run {baseDir}/scripts/merge_triaged_sarif.py [OUTPUT_DIR] ``` This script: @@ -428,6 +411,14 @@ Results written to: - semgrep-results-001/*.sarif (raw SARIF per ruleset) ``` +**Verify merged SARIF is valid** before reporting: + +```bash +python -c "import json; d=json.load(open('[OUTPUT_DIR]/findings.sarif')); print(f'{sum(len(r.get(\"results\",[]))for r in d.get(\"runs\",[]))} findings in merged SARIF')" +``` + +If this fails, the merge script produced invalid output — investigate before reporting results. + --- ## Common Mistakes @@ -479,3 +470,18 @@ Use `subagent_type: static-analysis:semgrep-scanner` in Step 4 when spawning Tas | "OSS is good enough" | OSS misses inter-file taint flows; always prefer Pro when available | | "Semgrep handles GitHub URLs natively" | URL handling is unreliable for repos with non-standard YAML (floats as keys, etc.); always clone first | | "Cleanup is optional" | Cloned repos left behind pollute the user's workspace and accumulate across runs | + +## Success Criteria + +A scan is complete and correct when ALL of the following are true: + +- [ ] Languages detected with file counts; Pro status checked +- [ ] Scan mode selected by user (run all / important only) +- [ ] Rulesets include third-party rules for all detected languages +- [ ] User explicitly approved the scan plan (Step 3 gate passed) +- [ ] All scan Tasks spawned in a single message and completed +- [ ] Every `semgrep` command used `--metrics=off` +- [ ] `findings.sarif` exists in the output directory and is valid JSON +- [ ] Important-only mode: post-filter applied before merge +- [ ] Results summary reported with severity and category breakdown +- [ ] Cloned repos (if any) cleaned up from `[OUTPUT_DIR]/repos/` From 23cd3517454533dea72ac6acbbfd6fc8d9a7cfe8 Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Mon, 23 Feb 2026 12:11:07 +0100 Subject: [PATCH 5/8] semgrep - workflow plugin improvements 2 --- .../static-analysis/skills/semgrep/SKILL.md | 453 +++--------------- .../skills/semgrep/workflows/scan-workflow.md | 270 +++++++++++ 2 files changed, 337 insertions(+), 386 deletions(-) create mode 100644 plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md diff --git a/plugins/static-analysis/skills/semgrep/SKILL.md b/plugins/static-analysis/skills/semgrep/SKILL.md index eaa5c44..5a3bd7a 100644 --- a/plugins/static-analysis/skills/semgrep/SKILL.md +++ b/plugins/static-analysis/skills/semgrep/SKILL.md @@ -1,11 +1,13 @@ --- name: semgrep description: >- - Scans a codebase for security vulnerabilities using Semgrep with automatic language detection - and parallel execution. Triggers on "run semgrep", "semgrep scan", "find vulnerabilities with - semgrep", "static analysis", or "security scan". Supports "run all" (full ruleset coverage) - and "important only" (high-confidence vulnerabilities) scan modes. Uses Semgrep Pro for - cross-file taint analysis when available. + Run Semgrep static analysis scan on a codebase using parallel subagents. + Supports two scan modes — "run all" (full ruleset coverage) and "important + only" (high-confidence security vulnerabilities). Automatically detects and + uses Semgrep Pro for cross-file taint analysis when available. Use when asked + to scan code for vulnerabilities, run a security audit with Semgrep, find + bugs, or perform static analysis. Spawns parallel workers for multi-language + codebases. allowed-tools: - Bash - Read @@ -19,29 +21,15 @@ allowed-tools: # Semgrep Security Scan -Run a complete Semgrep scan with automatic language detection, parallel execution via Task subagents, and merged SARIF output. Automatically uses Semgrep Pro for cross-file taint analysis when available. +Run a Semgrep scan with automatic language detection, parallel execution via Task subagents, and merged SARIF output. -## Prerequisites - -**Required:** Semgrep CLI - -```bash -semgrep --version -``` - -If not installed, see [Semgrep installation docs](https://semgrep.dev/docs/getting-started/). - -**Optional:** Semgrep Pro (for cross-file analysis and Pro languages) - -```bash -# Check if Semgrep Pro engine is installed -semgrep --pro --validate --config p/default 2>/dev/null && echo "Pro available" || echo "OSS only" - -# If logged in, install/update Pro Engine -semgrep install-semgrep-pro -``` +## Essential Principles -Pro enables: cross-file taint tracking, inter-procedural analysis, and additional languages (Apex, C#, Elixir). +1. **Always use `--metrics=off`** — Semgrep sends telemetry by default; `--config auto` also phones home. Every `semgrep` command must include `--metrics=off` to prevent data leakage during security audits. +2. **User must approve the scan plan (Step 3 is a hard gate)** — The original "scan this codebase" request is NOT approval. Present exact rulesets, target, engine, and mode; wait for explicit "yes"/"proceed" before spawning scanners. +3. **Third-party rulesets are required, not optional** — Trail of Bits, 0xdea, and Decurity rules catch vulnerabilities absent from the official registry. Include them whenever the detected language matches. +4. **Spawn all scan Tasks in a single message** — Parallel execution is the core performance advantage. Never spawn Tasks sequentially; always emit all Task tool calls in one response. +5. **Always check for Semgrep Pro before scanning** — Pro enables cross-file taint tracking and catches ~250% more true positives. Skipping the check means silently missing critical inter-file vulnerabilities. ## When to Use @@ -58,48 +46,46 @@ Pro enables: cross-file taint tracking, inter-procedural analysis, and additiona - Creating custom Semgrep rules → Use `semgrep-rule-creator` skill - Porting existing rules to other languages → Use `semgrep-rule-variant-creator` skill -## Essential Principles +## Prerequisites -1. **Always use `--metrics=off`** — Semgrep sends telemetry by default; `--config auto` also phones home. Every `semgrep` command must include `--metrics=off` to prevent data leakage during security audits. -2. **User must approve the scan plan (Step 3 is a hard gate)** — The original "scan this codebase" request is NOT approval. Present exact rulesets, target, engine, and mode; wait for explicit "yes"/"proceed" before spawning scanners. -3. **Third-party rulesets are required, not optional** — Trail of Bits, 0xdea, and Decurity rules catch vulnerabilities absent from the official registry. Include them whenever the detected language matches. -4. **Spawn all scan Tasks in a single message** — Parallel execution is the core performance advantage. Never spawn Tasks sequentially; always emit all Task tool calls in one response. -5. **Always check for Semgrep Pro before scanning** — Pro enables cross-file taint tracking and catches ~250% more true positives. Skipping the check means silently missing critical inter-file vulnerabilities. +**Required:** Semgrep CLI (`semgrep --version`). If not installed, see [Semgrep installation docs](https://semgrep.dev/docs/getting-started/). ---- +**Optional:** Semgrep Pro — enables cross-file taint tracking, inter-procedural analysis, and additional languages (Apex, C#, Elixir). Check with: + +```bash +semgrep --pro --validate --config p/default 2>/dev/null && echo "Pro available" || echo "OSS only" +``` + +**Limitations:** OSS mode cannot track data flow across files. Pro mode uses `-j 1` for cross-file analysis (slower per ruleset, but parallel rulesets compensate). ## Scan Modes -Two modes control scan scope and result filtering. Select mode early in the workflow (Step 2). +Select mode in Step 2 of the workflow. Mode affects both scanner flags and post-processing. | Mode | Coverage | Findings Reported | |------|----------|-------------------| | **Run all** | All rulesets, all severity levels | Everything | -| **Important only** | All rulesets, but pre-filtered and post-filtered | Security vulnerabilities only, medium-high confidence and impact | +| **Important only** | All rulesets, pre- and post-filtered | Security vulns only, medium-high confidence/impact | -**Important only** applies two layers of filtering: -1. **Pre-filter**: `--severity MEDIUM --severity HIGH --severity CRITICAL` (CLI flag, excludes LOW/INFO at scan time) -2. **Post-filter**: JSON metadata filtering — keeps only findings where `category=security`, `confidence∈{MEDIUM,HIGH}`, `impact∈{MEDIUM,HIGH}` +**Important only** applies two filter layers: +1. **Pre-filter**: `--severity MEDIUM --severity HIGH --severity CRITICAL` (CLI flag) +2. **Post-filter**: JSON metadata — keeps only `category=security`, `confidence∈{MEDIUM,HIGH}`, `impact∈{MEDIUM,HIGH}` -See [scan-modes.md](references/scan-modes.md) for detailed metadata criteria and jq filter commands. - ---- +See [scan-modes.md](references/scan-modes.md) for metadata criteria and jq filter commands. ## Orchestration Architecture -This skill uses **parallel Task subagents** for maximum efficiency: - ``` -┌─────────────────────────────────────────────────────────────────┐ -│ MAIN AGENT │ -│ 1. Detect languages + check Pro availability │ -│ 2. Select scan mode + rulesets (ref: rulesets.md, scan-modes.md)│ -│ 3. Present plan + rulesets, get approval [⛔ HARD GATE] │ -│ 4. Spawn parallel scan Tasks (with approved rulesets + mode) │ -│ 5. Merge results and report │ -└─────────────────────────────────────────────────────────────────┘ - │ Step 4 - ▼ +┌──────────────────────────────────────────────────────────────────┐ +│ MAIN AGENT (this skill) │ +│ Step 1: Detect languages + check Pro availability │ +│ Step 2: Select scan mode + rulesets (ref: rulesets.md) │ +│ Step 3: Present plan + rulesets, get approval [⛔ HARD GATE] │ +│ Step 4: Spawn parallel scan Tasks (approved rulesets + mode) │ +│ Step 5: Merge results and report │ +└──────────────────────────────────────────────────────────────────┘ + │ Step 4 + ▼ ┌─────────────────┐ │ Scan Tasks │ │ (parallel) │ @@ -111,340 +97,28 @@ This skill uses **parallel Task subagents** for maximum efficiency: └─────────────────┘ ``` ---- - -## Workflow Enforcement via Task System - -This skill uses the **Task system** to enforce workflow compliance. On invocation, create these tasks: - -``` -TaskCreate: "Detect languages and Pro availability" (Step 1) -TaskCreate: "Select scan mode and rulesets" (Step 2) - blockedBy: Step 1 -TaskCreate: "Present plan with rulesets, get approval" (Step 3) - blockedBy: Step 2 -TaskCreate: "Execute scans with approved rulesets and mode" (Step 4) - blockedBy: Step 3 -TaskCreate: "Merge results and report" (Step 5) - blockedBy: Step 4 -``` - -### Mandatory Gates - -| Task | Gate Type | Cannot Proceed Until | -|------|-----------|---------------------| -| Step 3: Get approval | **HARD GATE** | User explicitly approves rulesets + plan | - -**Step 3 is a HARD GATE**: Mark as `completed` ONLY after user says "yes", "proceed", "approved", or equivalent. - -### Task Flow Example - -``` -1. Create all 5 tasks with dependencies -2. TaskUpdate Step 1 → in_progress, execute detection -3. TaskUpdate Step 1 → completed -4. TaskUpdate Step 2 → in_progress, select rulesets -5. TaskUpdate Step 2 → completed -6. TaskUpdate Step 3 → in_progress, present plan with rulesets -7. STOP: Wait for user response (may modify rulesets) -8. User approves → TaskUpdate Step 3 → completed -9. TaskUpdate Step 4 → in_progress (now unblocked) -... continue workflow -``` - ---- - ## Workflow -### Step 1: Detect Languages and Pro Availability (Main Agent) - -> **Entry:** User has specified or confirmed the target directory. -> **Exit:** Language list with file counts produced; Pro availability determined. +**Follow the detailed workflow in [scan-workflow.md](workflows/scan-workflow.md).** Summary: -**Detect Pro availability** (requires Bash — semgrep CLI check): +| Step | Action | Gate | Key Reference | +|------|--------|------|---------------| +| 1 | Detect languages + Pro availability | — | Use Glob, not Bash | +| 2 | Select scan mode + rulesets | — | [rulesets.md](references/rulesets.md) | +| 3 | Present plan, get explicit approval | ⛔ HARD | AskUserQuestion | +| 4 | Spawn parallel scan Tasks | — | [scanner-task-prompt.md](references/scanner-task-prompt.md) | +| 5 | Merge results and report | — | Merge script (below) | -```bash -semgrep --pro --validate --config p/default 2>/dev/null && echo "Pro: AVAILABLE" || echo "Pro: NOT AVAILABLE" -``` - -**Detect languages** using Glob (not Bash). Run these patterns against the target directory and count matches: - -`**/*.py`, `**/*.js`, `**/*.ts`, `**/*.tsx`, `**/*.jsx`, `**/*.go`, `**/*.rb`, `**/*.java`, `**/*.php`, `**/*.c`, `**/*.cpp`, `**/*.rs`, `**/Dockerfile`, `**/*.tf` - -Also check for framework markers: `package.json`, `pyproject.toml`, `Gemfile`, `go.mod`, `Cargo.toml`, `pom.xml`. - -Map findings to categories: - -| Detection | Category | -|-----------|----------| -| `.py`, `pyproject.toml` | Python | -| `.js`, `.ts`, `package.json` | JavaScript/TypeScript | -| `.go`, `go.mod` | Go | -| `.rb`, `Gemfile` | Ruby | -| `.java`, `pom.xml` | Java | -| `.php` | PHP | -| `.c`, `.cpp` | C/C++ | -| `.rs`, `Cargo.toml` | Rust | -| `Dockerfile` | Docker | -| `.tf` | Terraform | -| k8s manifests | Kubernetes | - -### Step 2: Select Scan Mode and Rulesets - -> **Entry:** Step 1 complete — languages detected, Pro status known. -> **Exit:** Scan mode selected; structured rulesets JSON compiled for all detected languages. - -**First, select scan mode** using `AskUserQuestion`: - -``` -header: "Scan Mode" -question: "Which scan mode should be used?" -multiSelect: false -options: - - label: "Run all (Recommended)" - description: "Full coverage — all rulesets, all severity levels" - - label: "Important only" - description: "Security vulnerabilities only — medium-high confidence and impact, no code quality" -``` - -Record the selected mode. It affects Steps 4 and 5. - -**Then, select rulesets.** Using the detected languages and frameworks from Step 1, select rulesets by following the **Ruleset Selection Algorithm** in [rulesets.md](references/rulesets.md). - -The algorithm covers: -1. Security baseline (always included) -2. Language-specific rulesets -3. Framework rulesets (if detected) -4. Infrastructure rulesets -5. **Required** third-party rulesets (Trail of Bits, 0xdea, Decurity - NOT optional) -6. Registry verification - -**Output:** Structured JSON passed to Step 3 for user review: - -```json -{ - "baseline": ["p/security-audit", "p/secrets"], - "python": ["p/python", "p/django"], - "javascript": ["p/javascript", "p/react", "p/nodejs"], - "docker": ["p/dockerfile"], - "third_party": ["https://github.com/trailofbits/semgrep-rules"] -} -``` +**Task enforcement:** On invocation, create 5 tasks with blockedBy dependencies (each step blocks the previous). Step 3 is a HARD GATE — mark complete ONLY after user explicitly approves. -### Step 3: CRITICAL GATE - Present Plan and Get Approval - -> **Entry:** Step 2 complete — scan mode and rulesets selected. -> **Exit:** User has explicitly approved the plan (quoted confirmation). - -> **⛔ MANDATORY CHECKPOINT - DO NOT SKIP** -> -> This step requires explicit user approval before proceeding. -> User may modify rulesets before approving. - -Present plan to user with **explicit ruleset listing**: - -``` -## Semgrep Scan Plan - -**Target:** /path/to/codebase -**Output directory:** ./semgrep-results-001/ -**Engine:** Semgrep Pro (cross-file analysis) | Semgrep OSS (single-file) -**Scan mode:** Run all | Important only (security vulns, medium-high confidence/impact) - -### Detected Languages/Technologies: -- Python (1,234 files) - Django framework detected -- JavaScript (567 files) - React detected -- Dockerfile (3 files) - -### Rulesets to Run: - -**Security Baseline (always included):** -- [x] `p/security-audit` - Comprehensive security rules -- [x] `p/secrets` - Hardcoded credentials, API keys - -**Python (1,234 files):** -- [x] `p/python` - Python security patterns -- [x] `p/django` - Django-specific vulnerabilities - -**JavaScript (567 files):** -- [x] `p/javascript` - JavaScript security patterns -- [x] `p/react` - React-specific issues -- [x] `p/nodejs` - Node.js server-side patterns - -**Docker (3 files):** -- [x] `p/dockerfile` - Dockerfile best practices - -**Third-party (auto-included for detected languages):** -- [x] Trail of Bits rules - https://github.com/trailofbits/semgrep-rules - -**Want to modify rulesets?** Tell me which to add or remove. -**Ready to scan?** Say "proceed" or "yes". -``` - -**⛔ STOP: Await explicit user approval** - -After presenting the plan: - -1. **If user wants to modify rulesets:** - - Add requested rulesets to the appropriate category - - Remove requested rulesets - - Re-present the updated plan - - Return to waiting for approval - -2. **Use AskUserQuestion** if user hasn't responded: - ``` - "I've prepared the scan plan with 9 rulesets (including Trail of Bits). Proceed with scanning?" - Options: ["Yes, run scan", "Modify rulesets first"] - ``` - -3. **Valid approval responses:** - - "yes", "proceed", "approved", "go ahead", "looks good", "run it" - -4. **Mark task completed** only after approval with final rulesets confirmed - -5. **Do NOT treat as approval:** - - User's original request ("scan this codebase") - - Silence / no response - - Questions about the plan - -### Pre-Scan Checklist - -Before marking Step 3 complete, verify: -- [ ] Target directory shown to user -- [ ] Engine type (Pro/OSS) displayed -- [ ] Languages detected and listed -- [ ] **All rulesets explicitly listed with checkboxes** -- [ ] User given opportunity to modify rulesets -- [ ] User explicitly approved (quote their confirmation) -- [ ] **Final ruleset list captured for Step 4** -- [ ] Agent type listed: `static-analysis:semgrep-scanner` - -### Step 4: Spawn Parallel Scan Tasks - -> **Entry:** Step 3 approved — user explicitly confirmed the plan. -> **Exit:** All scan Tasks completed; result files exist in output directory. - -Create output directory with run number to avoid collisions, then spawn Tasks with **approved rulesets from Step 3**: - -```bash -# Find next available run number -LAST=$(ls -d semgrep-results-[0-9][0-9][0-9] 2>/dev/null | sort | tail -1 | grep -o '[0-9]*$' || true) -NEXT_NUM=$(printf "%03d" $(( ${LAST:-0} + 1 ))) -OUTPUT_DIR="semgrep-results-${NEXT_NUM}" -mkdir -p "$OUTPUT_DIR" -echo "Output directory: $OUTPUT_DIR" -``` - -**Spawn N Tasks in a SINGLE message** (one per language category) using `subagent_type: static-analysis:semgrep-scanner`. - -Use the scanner task prompt template from [scanner-task-prompt.md](references/scanner-task-prompt.md). - -**Mode-dependent scanner flags:** -- **Run all**: No additional flags -- **Important only**: Add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every `semgrep` command (set `[SEVERITY_FLAGS]` in the template) - -**Example - 3 Language Scan (with approved rulesets):** - -Spawn these 3 Tasks in a SINGLE message: - -1. **Task: Python Scanner** - - Approved rulesets: p/python, p/django, p/security-audit, p/secrets, https://github.com/trailofbits/semgrep-rules - - Output: semgrep-results-001/python-*.json - -2. **Task: JavaScript Scanner** - - Approved rulesets: p/javascript, p/react, p/nodejs, p/security-audit, p/secrets, https://github.com/trailofbits/semgrep-rules - - Output: semgrep-results-001/js-*.json - -3. **Task: Docker Scanner** - - Approved rulesets: p/dockerfile - - Output: semgrep-results-001/docker-*.json - -### Step 5: Merge Results and Report (Main Agent) - -> **Entry:** Step 4 complete — all scan Tasks finished. -> **Exit:** `findings.sarif` exists in output directory and is valid JSON. - -After all scan Tasks complete, apply mode-dependent filtering (if applicable), then generate merged SARIF and report. - -**Important-only mode: Post-filter before merge** - -In important-only mode, apply the post-filter from [scan-modes.md](references/scan-modes.md) ("Filter All Result Files in a Directory" section) to each scan result JSON before merging. - -**Generate merged SARIF:** +**Merge command (Step 5):** ```bash uv run {baseDir}/scripts/merge_triaged_sarif.py [OUTPUT_DIR] ``` -This script: -1. Attempts to use [SARIF Multitool](https://www.npmjs.com/package/@microsoft/sarif-multitool) for merging (if `npx` is available) -2. Falls back to pure Python merge if Multitool unavailable -3. Merges all `*.sarif` files into a single SARIF output -4. Writes output to `[OUTPUT_DIR]/findings.sarif` - -**Optional: Install SARIF Multitool for better merge quality:** - -```bash -npm install -g @microsoft/sarif-multitool -``` - -**Report to user:** - -``` -## Semgrep Scan Complete - -**Scanned:** 1,804 files -**Rulesets used:** 9 (including Trail of Bits) -**Total findings:** 156 - -### By Severity: -- ERROR: 5 -- WARNING: 18 -- INFO: 9 - -### By Category: -- SQL Injection: 3 -- XSS: 7 -- Hardcoded secrets: 2 -- Insecure configuration: 12 -- Code quality: 8 - -Results written to: -- semgrep-results-001/findings.sarif (merged SARIF) -- semgrep-results-001/*.json (raw scan results per ruleset) -- semgrep-results-001/*.sarif (raw SARIF per ruleset) -``` - -**Verify merged SARIF is valid** before reporting: - -```bash -python -c "import json; d=json.load(open('[OUTPUT_DIR]/findings.sarif')); print(f'{sum(len(r.get(\"results\",[]))for r in d.get(\"runs\",[]))} findings in merged SARIF')" -``` - -If this fails, the merge script produced invalid output — investigate before reporting results. - ---- - -## Common Mistakes - -| Mistake | Correct Approach | -|---------|------------------| -| Running without `--metrics=off` | Always use `--metrics=off` to prevent telemetry | -| Running rulesets sequentially | Run in parallel with `&` and `wait` | -| Not scoping rulesets to languages | Use `--include="*.py"` for language-specific rules | -| Single-threaded for multi-lang | Spawn parallel Tasks per language | -| Sequential Tasks | Spawn all Tasks in SINGLE message for parallelism | -| Using OSS when Pro is available | Check login status; use `--pro` for deeper analysis | -| Assuming Pro is unavailable | Always check with login detection before scanning | -| Passing GitHub URLs directly to `--config` | Clone repos into `[OUTPUT_DIR]/repos/` first; semgrep's URL handling fails on repos with non-standard YAML | -| Leaving cloned repos on disk after scan | Delete `[OUTPUT_DIR]/repos/` after all scans complete | -| Using `.` or relative path as `[TARGET]` | Always use an absolute path for `[TARGET]` to avoid ambiguity in subagents | - -## Limitations - -1. **OSS mode:** Cannot track data flow across files (login with `semgrep login` and run `semgrep install-semgrep-pro` to enable) -2. **Pro mode:** Cross-file analysis uses `-j 1` (single job) which is slower per ruleset, but parallel rulesets compensate - ## Agents -This plugin provides a specialized agent for the scan phase: - | Agent | Tools | Purpose | |-------|-------|---------| | `static-analysis:semgrep-scanner` | Bash | Executes parallel semgrep scans for a language category | @@ -455,25 +129,32 @@ Use `subagent_type: static-analysis:semgrep-scanner` in Step 4 when spawning Tas | Shortcut | Why It's Wrong | |----------|----------------| -| "User asked for scan, that's approval" | Original request ≠ plan approval; user must confirm specific parameters. Present plan, use AskUserQuestion, await explicit "yes" | +| "User asked for scan, that's approval" | Original request ≠ plan approval. Present plan, use AskUserQuestion, await explicit "yes" | | "Step 3 task is blocking, just mark complete" | Lying about task status defeats enforcement. Only mark complete after real approval | -| "I already know what they want" | Assumptions cause scanning wrong directories/rulesets. Present plan with all parameters for verification | +| "I already know what they want" | Assumptions cause scanning wrong directories/rulesets. Present plan for verification | | "Just use default rulesets" | User must see and approve exact rulesets before scan | | "Add extra rulesets without asking" | Modifying approved list without consent breaks trust | -| "Skip showing ruleset list" | User can't make informed decision without seeing what will run | -| "Third-party rulesets are optional" | Trail of Bits, 0xdea, Decurity rules catch vulnerabilities not in official registry - they are REQUIRED when language matches | -| "Run one ruleset at a time" | Wastes time; parallel execution is faster | +| "Third-party rulesets are optional" | Trail of Bits, 0xdea, Decurity catch vulnerabilities not in official registry — REQUIRED | | "Use --config auto" | Sends metrics; less control over rulesets | | "One Task at a time" | Defeats parallelism; spawn all Tasks together | | "Pro is too slow, skip --pro" | Cross-file analysis catches 250% more true positives; worth the time | -| "Don't bother checking for Pro" | Missing Pro = missing critical cross-file vulnerabilities | -| "OSS is good enough" | OSS misses inter-file taint flows; always prefer Pro when available | -| "Semgrep handles GitHub URLs natively" | URL handling is unreliable for repos with non-standard YAML (floats as keys, etc.); always clone first | -| "Cleanup is optional" | Cloned repos left behind pollute the user's workspace and accumulate across runs | +| "Semgrep handles GitHub URLs natively" | URL handling fails on repos with non-standard YAML; always clone first | +| "Cleanup is optional" | Cloned repos pollute the user's workspace and accumulate across runs | +| "Use `.` or relative path as target" | Subagents need absolute paths to avoid ambiguity | -## Success Criteria +## Reference Index + +| File | Content | +|------|---------| +| [rulesets.md](references/rulesets.md) | Complete ruleset catalog and selection algorithm | +| [scan-modes.md](references/scan-modes.md) | Pre/post-filter criteria and jq commands | +| [scanner-task-prompt.md](references/scanner-task-prompt.md) | Template for spawning scanner subagents | -A scan is complete and correct when ALL of the following are true: +| Workflow | Purpose | +|----------|---------| +| [scan-workflow.md](workflows/scan-workflow.md) | Complete 5-step scan execution process | + +## Success Criteria - [ ] Languages detected with file counts; Pro status checked - [ ] Scan mode selected by user (run all / important only) diff --git a/plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md b/plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md new file mode 100644 index 0000000..3dff901 --- /dev/null +++ b/plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md @@ -0,0 +1,270 @@ +# Semgrep Scan Workflow + +Complete 5-step scan execution process. Read from start to finish and follow each step in order. + +## Task System Enforcement + +On invocation, create these tasks with dependencies: + +``` +TaskCreate: "Detect languages and Pro availability" (Step 1) +TaskCreate: "Select scan mode and rulesets" (Step 2) - blockedBy: Step 1 +TaskCreate: "Present plan with rulesets, get approval" (Step 3) - blockedBy: Step 2 +TaskCreate: "Execute scans with approved rulesets and mode" (Step 4) - blockedBy: Step 3 +TaskCreate: "Merge results and report" (Step 5) - blockedBy: Step 4 +``` + +### Mandatory Gate + +| Task | Gate Type | Cannot Proceed Until | +|------|-----------|---------------------| +| Step 3 | **HARD GATE** | User explicitly approves rulesets + plan | + +Mark Step 3 as `completed` ONLY after user says "yes", "proceed", "approved", or equivalent. + +--- + +## Step 1: Detect Languages and Pro Availability + +> **Entry:** User has specified or confirmed the target directory. +> **Exit:** Language list with file counts produced; Pro availability determined. + +**Detect Pro availability** (requires Bash): + +```bash +semgrep --pro --validate --config p/default 2>/dev/null && echo "Pro: AVAILABLE" || echo "Pro: NOT AVAILABLE" +``` + +**Detect languages** using Glob (not Bash). Run these patterns against the target directory and count matches: + +`**/*.py`, `**/*.js`, `**/*.ts`, `**/*.tsx`, `**/*.jsx`, `**/*.go`, `**/*.rb`, `**/*.java`, `**/*.php`, `**/*.c`, `**/*.cpp`, `**/*.rs`, `**/Dockerfile`, `**/*.tf` + +Also check for framework markers: `package.json`, `pyproject.toml`, `Gemfile`, `go.mod`, `Cargo.toml`, `pom.xml`. Use Read to inspect these files for framework dependencies (e.g., read `package.json` to detect React, Express, Next.js; read `pyproject.toml` for Django, Flask, FastAPI). + +Map findings to categories: + +| Detection | Category | +|-----------|----------| +| `.py`, `pyproject.toml` | Python | +| `.js`, `.ts`, `package.json` | JavaScript/TypeScript | +| `.go`, `go.mod` | Go | +| `.rb`, `Gemfile` | Ruby | +| `.java`, `pom.xml` | Java | +| `.php` | PHP | +| `.c`, `.cpp` | C/C++ | +| `.rs`, `Cargo.toml` | Rust | +| `Dockerfile` | Docker | +| `.tf` | Terraform | +| k8s manifests | Kubernetes | + +--- + +## Step 2: Select Scan Mode and Rulesets + +> **Entry:** Step 1 complete — languages detected, Pro status known. +> **Exit:** Scan mode selected; structured rulesets JSON compiled for all detected languages. + +**First, select scan mode** using `AskUserQuestion`: + +``` +header: "Scan Mode" +question: "Which scan mode should be used?" +multiSelect: false +options: + - label: "Run all (Recommended)" + description: "Full coverage — all rulesets, all severity levels" + - label: "Important only" + description: "Security vulnerabilities only — medium-high confidence and impact, no code quality" +``` + +Record the selected mode. It affects Steps 4 and 5. + +**Then, select rulesets.** Using the detected languages and frameworks from Step 1, follow the **Ruleset Selection Algorithm** in [rulesets.md](../references/rulesets.md). + +The algorithm covers: +1. Security baseline (always included) +2. Language-specific rulesets +3. Framework rulesets (if detected) +4. Infrastructure rulesets +5. **Required** third-party rulesets (Trail of Bits, 0xdea, Decurity — NOT optional) +6. Registry verification + +**Output:** Structured JSON passed to Step 3 for user review: + +```json +{ + "baseline": ["p/security-audit", "p/secrets"], + "python": ["p/python", "p/django"], + "javascript": ["p/javascript", "p/react", "p/nodejs"], + "docker": ["p/dockerfile"], + "third_party": ["https://github.com/trailofbits/semgrep-rules"] +} +``` + +--- + +## Step 3: CRITICAL GATE — Present Plan and Get Approval + +> **Entry:** Step 2 complete — scan mode and rulesets selected. +> **Exit:** User has explicitly approved the plan (quoted confirmation). + +> **⛔ MANDATORY CHECKPOINT — DO NOT SKIP** +> +> This step requires explicit user approval before proceeding. +> User may modify rulesets before approving. + +Present plan to user with **explicit ruleset listing**: + +``` +## Semgrep Scan Plan + +**Target:** /path/to/codebase +**Output directory:** ./semgrep-results-001/ +**Engine:** Semgrep Pro (cross-file analysis) | Semgrep OSS (single-file) +**Scan mode:** Run all | Important only (security vulns, medium-high confidence/impact) + +### Detected Languages/Technologies: +- Python (1,234 files) - Django framework detected +- JavaScript (567 files) - React detected +- Dockerfile (3 files) + +### Rulesets to Run: + +**Security Baseline (always included):** +- [x] `p/security-audit` - Comprehensive security rules +- [x] `p/secrets` - Hardcoded credentials, API keys + +**Python (1,234 files):** +- [x] `p/python` - Python security patterns +- [x] `p/django` - Django-specific vulnerabilities + +**JavaScript (567 files):** +- [x] `p/javascript` - JavaScript security patterns +- [x] `p/react` - React-specific issues +- [x] `p/nodejs` - Node.js server-side patterns + +**Docker (3 files):** +- [x] `p/dockerfile` - Dockerfile best practices + +**Third-party (auto-included for detected languages):** +- [x] Trail of Bits rules - https://github.com/trailofbits/semgrep-rules + +**Want to modify rulesets?** Tell me which to add or remove. +**Ready to scan?** Say "proceed" or "yes". +``` + +**⛔ STOP: Await explicit user approval.** + +1. **If user wants to modify rulesets:** Add/remove as requested, re-present the updated plan, return to waiting. +2. **Use AskUserQuestion** if user hasn't responded: + ``` + "I've prepared the scan plan with N rulesets (including Trail of Bits). Proceed with scanning?" + Options: ["Yes, run scan", "Modify rulesets first"] + ``` +3. **Valid approval:** "yes", "proceed", "approved", "go ahead", "looks good", "run it" +4. **NOT approval:** User's original request ("scan this codebase"), silence, questions about the plan + +### Pre-Scan Checklist + +Before marking Step 3 complete: +- [ ] Target directory shown to user +- [ ] Engine type (Pro/OSS) displayed +- [ ] Languages detected and listed +- [ ] **All rulesets explicitly listed with checkboxes** +- [ ] User given opportunity to modify rulesets +- [ ] User explicitly approved (quote their confirmation) +- [ ] **Final ruleset list captured for Step 4** +- [ ] Agent type listed: `static-analysis:semgrep-scanner` + +--- + +## Step 4: Spawn Parallel Scan Tasks + +> **Entry:** Step 3 approved — user explicitly confirmed the plan. +> **Exit:** All scan Tasks completed; result files exist in output directory. + +**Create output directory** with run number to avoid collisions: + +```bash +LAST=$(ls -d semgrep-results-[0-9][0-9][0-9] 2>/dev/null | sort | tail -1 | grep -o '[0-9]*$' || true) +NEXT_NUM=$(printf "%03d" $(( ${LAST:-0} + 1 ))) +OUTPUT_DIR="semgrep-results-${NEXT_NUM}" +mkdir -p "$OUTPUT_DIR" +echo "Output directory: $OUTPUT_DIR" +``` + +**Spawn N Tasks in a SINGLE message** (one per language category) using `subagent_type: static-analysis:semgrep-scanner`. + +Use the scanner task prompt template from [scanner-task-prompt.md](../references/scanner-task-prompt.md). + +**Mode-dependent scanner flags:** +- **Run all**: No additional flags +- **Important only**: Add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every `semgrep` command + +**Example — 3 Language Scan (with approved rulesets):** + +Spawn these 3 Tasks in a SINGLE message: + +1. **Task: Python Scanner** — Rulesets: p/python, p/django, p/security-audit, p/secrets, trailofbits → `semgrep-results-001/python-*.json` +2. **Task: JavaScript Scanner** — Rulesets: p/javascript, p/react, p/nodejs, p/security-audit, p/secrets, trailofbits → `semgrep-results-001/js-*.json` +3. **Task: Docker Scanner** — Rulesets: p/dockerfile → `semgrep-results-001/docker-*.json` + +### Operational Notes + +- Always use **absolute paths** for `[TARGET]` — subagents can't resolve relative paths +- Clone GitHub URL rulesets into `[OUTPUT_DIR]/repos/` — never pass URLs directly to `--config` (semgrep's URL handling fails on repos with non-standard YAML) +- Delete `[OUTPUT_DIR]/repos/` after all scans complete +- Run rulesets in parallel with `&` and `wait`, not sequentially +- Use `--include="*.py"` for language-specific rulesets, but NOT for cross-language rulesets (p/security-audit, p/secrets, third-party repos) + +--- + +## Step 5: Merge Results and Report + +> **Entry:** Step 4 complete — all scan Tasks finished. +> **Exit:** `findings.sarif` exists in output directory and is valid JSON. + +**Important-only mode: Post-filter before merge.** Apply the filter from [scan-modes.md](../references/scan-modes.md) ("Filter All Result Files in a Directory" section) to each result JSON. + +**Generate merged SARIF** using the merge script. The resolved path is in SKILL.md's "Merge command" section — use that exact path: + +```bash +uv run {baseDir}/scripts/merge_triaged_sarif.py [OUTPUT_DIR] +``` + +**Verify merged SARIF is valid:** + +```bash +python -c "import json; d=json.load(open('[OUTPUT_DIR]/findings.sarif')); print(f'{sum(len(r.get(\"results\",[]))for r in d.get(\"runs\",[]))} findings in merged SARIF')" +``` + +If verification fails, the merge script produced invalid output — investigate before reporting. + +**Report to user:** + +``` +## Semgrep Scan Complete + +**Scanned:** 1,804 files +**Rulesets used:** 9 (including Trail of Bits) +**Total findings:** 156 + +### By Severity: +- ERROR: 5 +- WARNING: 18 +- INFO: 9 + +### By Category: +- SQL Injection: 3 +- XSS: 7 +- Hardcoded secrets: 2 +- Insecure configuration: 12 +- Code quality: 8 + +Results written to: +- semgrep-results-001/findings.sarif (merged SARIF) +- semgrep-results-001/*.json (raw scan results per ruleset) +- semgrep-results-001/*.sarif (raw SARIF per ruleset) +``` + +**Verify** before reporting: confirm `findings.sarif` exists and is valid JSON. From 96cd3b60a0d8f614cbd20739edc4246018e95e21 Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Mon, 23 Feb 2026 12:11:21 +0100 Subject: [PATCH 6/8] codeql - workflow plugin improvements --- .../static-analysis/skills/codeql/SKILL.md | 76 +- .../skills/codeql/references/build-fixes.md | 90 +++ .../references/diagnostic-query-templates.md | 2 +- .../references/extension-yaml-format.md | 199 +++++ .../codeql/references/important-only-suite.md | 2 +- .../references/macos-arm64e-workaround.md | 179 +++++ .../codeql/references/performance-tuning.md | 2 +- .../codeql/references/quality-assessment.md | 172 +++++ .../codeql/references/sarif-processing.md | 71 ++ .../skills/codeql/workflows/build-database.md | 731 +----------------- .../workflows/create-data-extensions.md | 372 +-------- .../skills/codeql/workflows/run-analysis.md | 483 ++---------- 12 files changed, 901 insertions(+), 1478 deletions(-) create mode 100644 plugins/static-analysis/skills/codeql/references/build-fixes.md create mode 100644 plugins/static-analysis/skills/codeql/references/extension-yaml-format.md create mode 100644 plugins/static-analysis/skills/codeql/references/macos-arm64e-workaround.md create mode 100644 plugins/static-analysis/skills/codeql/references/quality-assessment.md create mode 100644 plugins/static-analysis/skills/codeql/references/sarif-processing.md diff --git a/plugins/static-analysis/skills/codeql/SKILL.md b/plugins/static-analysis/skills/codeql/SKILL.md index 324807c..80a2d82 100644 --- a/plugins/static-analysis/skills/codeql/SKILL.md +++ b/plugins/static-analysis/skills/codeql/SKILL.md @@ -13,17 +13,31 @@ allowed-tools: - Glob - Grep - AskUserQuestion - - Task - TaskCreate - TaskList - TaskUpdate + - TaskGet --- # CodeQL Analysis Supported languages: Python, JavaScript/TypeScript, Go, Java/Kotlin, C/C++, C#, Ruby, Swift. -**Skill resources:** Reference files and templates are located at `references/` and `workflows/` (relative to this skill directory). +**Skill resources:** Reference files and templates are located at `{baseDir}/references/` and `{baseDir}/workflows/`. + +## Essential Principles + +1. **Database quality is non-negotiable.** A database that builds is not automatically good. Always run quality assessment (file counts, baseline LoC, extractor errors) and compare against expected source files. A cached build produces zero useful extraction. + +2. **Data extensions catch what CodeQL misses.** Even projects using standard frameworks (Django, Spring, Express) have custom wrappers around database calls, request parsing, or shell execution. Skipping the create-data-extensions workflow means missing vulnerabilities in project-specific code paths. + +3. **Explicit suite references prevent silent query dropping.** Never pass pack names directly to `codeql database analyze` — each pack's `defaultSuiteFile` applies hidden filters that can produce zero results. Always generate a custom `.qls` suite file. + +4. **Zero findings needs investigation, not celebration.** Zero results can indicate poor database quality, missing models, wrong query packs, or silent suite filtering. Investigate before reporting clean. + +5. **macOS Apple Silicon requires workarounds for compiled languages.** Exit code 137 is `arm64e`/`arm64` mismatch, not a build failure. Try Homebrew arm64 tools or Rosetta before falling back to `build-mode=none`. + +6. **Follow workflows step by step.** Once a workflow is selected, execute it step by step without skipping phases. Each phase gates the next — skipping quality assessment or data extensions leads to incomplete analysis. ## Quick Start @@ -59,35 +73,32 @@ Then execute the full pipeline: **build database → create data extensions → These shortcuts lead to missed findings. Do not accept them: - **"security-extended is enough"** - It is the baseline. Always check if Trail of Bits packs and Community Packs are available for the language. They catch categories `security-extended` misses entirely. -- **"The database built, so it's good"** - A database that builds does not mean it extracted well. Always run Step 4 (quality assessment) and check file counts against expected source files. A cached build produces zero useful extraction. -- **"Data extensions aren't needed for standard frameworks"** - Even Django/Spring apps have custom wrappers around ORM calls, request parsing, or shell execution that CodeQL does not model. Skipping the extensions workflow means missing vulnerabilities in project-specific code. -- **"build-mode=none is fine for compiled languages"** - It produces severely incomplete analysis. No interprocedural data flow through compiled code is traced. Only use as an absolute last resort and clearly flag the limitation. On macOS Apple Silicon, try the arm64 toolchain workaround (Method 2m) or Rosetta before falling back to `build-mode=none`. -- **"The build fails on macOS, just use build-mode=none"** - On Apple Silicon Macs, exit code 137 during tracing is caused by `arm64e`/`arm64` architecture mismatch in `libtrace.dylib`, not a fundamental build failure. Try Homebrew arm64 tools (Method 2m-a), then Rosetta (Method 2m-b) before accepting `build-mode=none`. +- **"The database built, so it's good"** - A database that builds does not mean it extracted well. Always run quality assessment and check file counts against expected source files. +- **"Data extensions aren't needed for standard frameworks"** - Even Django/Spring apps have custom wrappers that CodeQL does not model. Skipping extensions means missing vulnerabilities. +- **"build-mode=none is fine for compiled languages"** - It produces severely incomplete analysis. Only use as an absolute last resort. On macOS, try the arm64 toolchain workaround or Rosetta first. +- **"The build fails on macOS, just use build-mode=none"** - Exit code 137 is caused by `arm64e`/`arm64` mismatch, not a fundamental build failure. See [macos-arm64e-workaround.md](references/macos-arm64e-workaround.md). - **"No findings means the code is secure"** - Zero findings can indicate poor database quality, missing models, or wrong query packs. Investigate before reporting clean results. -- **"I'll just run the default suite"** - The default suite varies by how CodeQL is invoked. Passing a pack name directly (e.g., `-- codeql/cpp-queries`) uses the pack's `defaultSuiteFile` from `qlpack.yml` (typically `code-scanning.qls`), which silently applies strict filters and can produce zero results. Always use an explicit suite reference or generate a custom `.qls` file. -- **"I'll just pass the pack names directly"** - Same issue as above. Each pack's `defaultSuiteFile` applies hidden filters. Always generate a custom suite that explicitly references the desired built-in suite (e.g., `security-and-quality`) or loads queries with known filtering. +- **"I'll just run the default suite"** / **"I'll just pass the pack names directly"** - Each pack's `defaultSuiteFile` applies hidden filters and can produce zero results. Always use an explicit suite reference. --- ## Workflow Selection -This skill has three workflows: +This skill has three workflows. **Once a workflow is selected, execute it step by step without skipping phases.** | Workflow | Purpose | |----------|---------| -| [build-database](workflows/build-database.md) | Create CodeQL database using 3 build methods in sequence | +| [build-database](workflows/build-database.md) | Create CodeQL database using build methods in sequence | | [create-data-extensions](workflows/create-data-extensions.md) | Detect or generate data extension models for project APIs | | [run-analysis](workflows/run-analysis.md) | Select rulesets, execute queries, process results | - ### Auto-Detection Logic **If user explicitly specifies** what to do (e.g., "build a database", "run analysis"), execute that workflow. -**Default pipeline for "test", "scan", "analyze", or similar:** Execute all three workflows sequentially: build → extensions → analysis. The create-data-extensions step is critical for finding vulnerabilities in projects with custom frameworks or annotations that CodeQL doesn't model by default. +**Default pipeline for "test", "scan", "analyze", or similar:** Execute all three workflows sequentially: build → extensions → analysis. ```bash -# Check if database exists DB=$(ls -dt codeql_*.db 2>/dev/null | head -1) if [ -n "$DB" ] && codeql resolve database -- "$DB" >/dev/null 2>&1; then echo "DATABASE EXISTS ($DB) - can run analysis" @@ -103,7 +114,6 @@ fi | Database exists, extensions exist | Ask user: run analysis on existing DB, or rebuild? | | User says "just run analysis" or "skip extensions" | Run analysis only | - ### Decision Prompt If unclear, ask user: @@ -118,3 +128,41 @@ I can help with CodeQL analysis. What would you like to do? [If database exists: "I found an existing database at "] ``` + +--- + +## Reference Index + +| File | Content | +|------|---------| +| **Workflows** | | +| [workflows/build-database.md](workflows/build-database.md) | Database creation with build method sequence | +| [workflows/create-data-extensions.md](workflows/create-data-extensions.md) | Data extension generation pipeline | +| [workflows/run-analysis.md](workflows/run-analysis.md) | Query execution and result processing | +| **References** | | +| [references/macos-arm64e-workaround.md](references/macos-arm64e-workaround.md) | Apple Silicon build tracing workarounds | +| [references/build-fixes.md](references/build-fixes.md) | Build failure fix catalog | +| [references/quality-assessment.md](references/quality-assessment.md) | Database quality metrics and improvements | +| [references/extension-yaml-format.md](references/extension-yaml-format.md) | Data extension YAML column definitions and examples | +| [references/sarif-processing.md](references/sarif-processing.md) | jq commands for SARIF output processing | +| [references/diagnostic-query-templates.md](references/diagnostic-query-templates.md) | QL queries for source/sink enumeration | +| [references/important-only-suite.md](references/important-only-suite.md) | Important-only suite template and generation | +| [references/run-all-suite.md](references/run-all-suite.md) | Run-all suite template | +| [references/ruleset-catalog.md](references/ruleset-catalog.md) | Available query packs by language | +| [references/threat-models.md](references/threat-models.md) | Threat model configuration | +| [references/language-details.md](references/language-details.md) | Language-specific build and extraction details | +| [references/performance-tuning.md](references/performance-tuning.md) | Memory, threading, and timeout configuration | + +--- + +## Success Criteria + +A complete CodeQL analysis run should satisfy: + +- [ ] Database built with quality assessment passed (baseline LoC > 0, errors < 5%) +- [ ] Data extensions evaluated — either created for project-specific APIs or explicitly skipped with justification +- [ ] Analysis run with explicit suite reference (not default pack suite) +- [ ] All installed query packs (official + Trail of Bits + Community) used or explicitly excluded +- [ ] Results processed with severity summary and file locations +- [ ] Zero-finding results investigated (database quality, model coverage, suite selection) +- [ ] Build log preserved with all commands, fixes, and quality assessments diff --git a/plugins/static-analysis/skills/codeql/references/build-fixes.md b/plugins/static-analysis/skills/codeql/references/build-fixes.md new file mode 100644 index 0000000..fb4e32a --- /dev/null +++ b/plugins/static-analysis/skills/codeql/references/build-fixes.md @@ -0,0 +1,90 @@ +# Build Fixes + +Fixes to apply when a CodeQL database build method fails. Try these in order, then retry the current build method. **Log each fix attempt.** + +## 1. Clean existing state + +```bash +log_step "Applying fix: clean existing state" +rm -rf "$DB_NAME" +log_result "Removed $DB_NAME" +``` + +## 2. Clean build cache + +```bash +log_step "Applying fix: clean build cache" +CLEANED="" +make clean 2>/dev/null && CLEANED="$CLEANED make" +rm -rf build CMakeCache.txt CMakeFiles 2>/dev/null && CLEANED="$CLEANED cmake-artifacts" +./gradlew clean 2>/dev/null && CLEANED="$CLEANED gradle" +mvn clean 2>/dev/null && CLEANED="$CLEANED maven" +cargo clean 2>/dev/null && CLEANED="$CLEANED cargo" +log_result "Cleaned: $CLEANED" +``` + +## 3. Install missing dependencies + +> **Note:** The commands below install the *target project's* dependencies so CodeQL can trace the build. Use whatever package manager the target project expects (`pip`, `npm`, `go mod`, etc.) — these are not the skill's own tooling preferences. + +```bash +log_step "Applying fix: install dependencies" + +# Python — use target project's package manager (pip/uv/poetry) +if [ -f requirements.txt ]; then + log_cmd "pip install -r requirements.txt" + pip install -r requirements.txt 2>&1 | tee -a "$LOG_FILE" +fi +if [ -f setup.py ] || [ -f pyproject.toml ]; then + log_cmd "pip install -e ." + pip install -e . 2>&1 | tee -a "$LOG_FILE" +fi + +# Node - log installed packages +if [ -f package.json ]; then + log_cmd "npm install" + npm install 2>&1 | tee -a "$LOG_FILE" +fi + +# Go +if [ -f go.mod ]; then + log_cmd "go mod download" + go mod download 2>&1 | tee -a "$LOG_FILE" +fi + +# Java - log downloaded dependencies +if [ -f build.gradle ] || [ -f build.gradle.kts ]; then + log_cmd "./gradlew dependencies --refresh-dependencies" + ./gradlew dependencies --refresh-dependencies 2>&1 | tee -a "$LOG_FILE" +fi +if [ -f pom.xml ]; then + log_cmd "mvn dependency:resolve" + mvn dependency:resolve 2>&1 | tee -a "$LOG_FILE" +fi + +# Rust +if [ -f Cargo.toml ]; then + log_cmd "cargo fetch" + cargo fetch 2>&1 | tee -a "$LOG_FILE" +fi + +log_result "Dependencies installed - see above for details" +``` + +## 4. Handle private registries + +If dependencies require authentication, ask user: +``` +AskUserQuestion: "Build requires private registry access. Options:" + 1. "I'll configure auth and retry" + 2. "Skip these dependencies" + 3. "Show me what's needed" +``` + +```bash +# Log authentication setup if performed +log_step "Private registry authentication configured" +log_result "Registry: , Method: " +``` + +**After fixes:** Retry current build method. If still fails, move to next method. diff --git a/plugins/static-analysis/skills/codeql/references/diagnostic-query-templates.md b/plugins/static-analysis/skills/codeql/references/diagnostic-query-templates.md index 106ab0f..6104dbf 100644 --- a/plugins/static-analysis/skills/codeql/references/diagnostic-query-templates.md +++ b/plugins/static-analysis/skills/codeql/references/diagnostic-query-templates.md @@ -1,6 +1,6 @@ # Diagnostic Query Templates -Language-specific QL queries for enumerating sources and sinks recognized by CodeQL. Used by the [create-data-extensions workflow](../workflows/create-data-extensions.md). +Language-specific QL queries for enumerating sources and sinks recognized by CodeQL. Used during the data extensions creation process. ## Source Enumeration Query diff --git a/plugins/static-analysis/skills/codeql/references/extension-yaml-format.md b/plugins/static-analysis/skills/codeql/references/extension-yaml-format.md new file mode 100644 index 0000000..042ac60 --- /dev/null +++ b/plugins/static-analysis/skills/codeql/references/extension-yaml-format.md @@ -0,0 +1,199 @@ +# Data Extension YAML Format + +YAML format for CodeQL data extension files. Used by the create-data-extensions workflow to model project-specific sources, sinks, and flow summaries. + +## Structure + +All extension files follow this structure: + +```yaml +extensions: + - addsTo: + pack: codeql/-all # Target library pack + extensible: # sourceModel, sinkModel, summaryModel, neutralModel + data: + - [] +``` + +## Source Models + +Columns: `[package, type, subtypes, name, signature, ext, output, kind, provenance]` + +| Column | Description | Example | +|--------|-------------|---------| +| package | Module/package path | `myapp.auth` | +| type | Class or module name | `AuthManager` | +| subtypes | Include subclasses | `True` (Java: capitalized) / `true` (Python/JS/Go) | +| name | Method name | `get_token` | +| signature | Method signature (optional) | `""` (Python/JS), `"(String,int)"` (Java) | +| ext | Extension (optional) | `""` | +| output | What is tainted | `ReturnValue`, `Parameter[0]` (Java) / `Argument[0]` (Python/JS/Go) | +| kind | Source category | `remote`, `local`, `file`, `environment`, `database` | +| provenance | How model was created | `manual` | + +**Java-specific format differences:** +- **subtypes**: Use `True` / `False` (capitalized, Python-style), not `true` / `false` +- **output for parameters**: Use `Parameter[N]` (not `Argument[N]`) to mark method parameters as sources +- **signature**: Required for disambiguation — use Java type syntax: `"(String)"`, `"(String,int)"` +- **Parameter ranges**: Use `Parameter[0..2]` to mark multiple consecutive parameters + +Example (Python): + +```yaml +# codeql-extensions/sources.yml +extensions: + - addsTo: + pack: codeql/python-all + extensible: sourceModel + data: + - ["myapp.http", "Request", true, "get_param", "", "", "ReturnValue", "remote", "manual"] + - ["myapp.http", "Request", true, "get_header", "", "", "ReturnValue", "remote", "manual"] +``` + +Example (Java — note `True`, `Parameter[N]`, and signature): + +```yaml +# codeql-extensions/sources.yml +extensions: + - addsTo: + pack: codeql/java-all + extensible: sourceModel + data: + - ["com.myapp.controller", "ApiController", True, "search", "(String)", "", "Parameter[0]", "remote", "manual"] + - ["com.myapp.service", "FileService", True, "upload", "(String,String)", "", "Parameter[0..1]", "remote", "manual"] +``` + +## Sink Models + +Columns: `[package, type, subtypes, name, signature, ext, input, kind, provenance]` + +Note: column 7 is `input` (which argument receives tainted data), not `output`. + +| Kind | Vulnerability | +|------|---------------| +| `sql-injection` | SQL injection | +| `command-injection` | Command injection | +| `path-injection` | Path traversal | +| `xss` | Cross-site scripting | +| `code-injection` | Code injection | +| `ssrf` | Server-side request forgery | +| `unsafe-deserialization` | Insecure deserialization | + +Example (Python): + +```yaml +# codeql-extensions/sinks.yml +extensions: + - addsTo: + pack: codeql/python-all + extensible: sinkModel + data: + - ["myapp.db", "Connection", true, "raw_query", "", "", "Argument[0]", "sql-injection", "manual"] + - ["myapp.shell", "Runner", false, "execute", "", "", "Argument[0]", "command-injection", "manual"] +``` + +Example (Java — note `True` and `Argument[N]` for sink input): + +```yaml +extensions: + - addsTo: + pack: codeql/java-all + extensible: sinkModel + data: + - ["com.myapp.db", "QueryRunner", True, "execute", "(String)", "", "Argument[0]", "sql-injection", "manual"] +``` + +## Summary Models + +Columns: `[package, type, subtypes, name, signature, ext, input, output, kind, provenance]` + +| Kind | Description | +|------|-------------| +| `taint` | Data flows through, still tainted | +| `value` | Data flows through, exact value preserved | + +Example: + +```yaml +# codeql-extensions/summaries.yml +extensions: + # Pass-through: taint propagates + - addsTo: + pack: codeql/python-all + extensible: summaryModel + data: + - ["myapp.cache", "Cache", true, "get", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] + - ["myapp.utils", "JSON", false, "parse", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] + + # Sanitizer: taint blocked + - addsTo: + pack: codeql/python-all + extensible: neutralModel + data: + - ["myapp.security", "Sanitizer", "escape_html", "", "summary", "manual"] +``` + +**`neutralModel` vs no model:** If a function has no model at all, CodeQL may still infer flow through it. Use `neutralModel` to explicitly block taint propagation through known-safe functions. + +## Language-Specific Notes + +**Python:** Use dotted module paths for `package` (e.g., `myapp.db`). + +**JavaScript:** `package` is often `""` for project-local code. Use the import path for npm packages. + +**Go:** Use full import paths (e.g., `myapp/internal/db`). `type` is often `""` for package-level functions. + +**Java:** Use fully qualified package names (e.g., `com.myapp.db`). + +**C/C++:** Use `""` for package, put the namespace in `type`. + +## Deploying Extensions + +**Known limitation:** `--additional-packs` and `--model-packs` flags do not work with pre-compiled query packs (bundled CodeQL distributions that cache `java-all` inside `.codeql/libraries/`). Extensions placed in a standalone model pack directory will be resolved by `codeql resolve qlpacks` but silently ignored during `codeql database analyze`. + +**Workaround — copy extensions into the library pack's `ext/` directory:** + +> **Warning:** Files copied into the `ext/` directory live inside CodeQL's managed pack cache. They will be **lost** when packs are updated via `codeql pack download` or version upgrades. After any pack update, re-run this deployment step to restore the extensions. + +```bash +# Find the java-all ext directory used by the query pack +JAVA_ALL_EXT=$(find "$(codeql resolve qlpacks 2>/dev/null | grep 'java-queries' | awk '{print $NF}' | tr -d '()')" \ + -path '*/.codeql/libraries/codeql/java-all/*/ext' -type d 2>/dev/null | head -1) + +if [ -n "$JAVA_ALL_EXT" ]; then + PROJECT_NAME=$(basename "$(pwd)") + cp codeql-extensions/sources.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sources.model.yml" + [ -f codeql-extensions/sinks.yml ] && cp codeql-extensions/sinks.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sinks.model.yml" + [ -f codeql-extensions/summaries.yml ] && cp codeql-extensions/summaries.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.summaries.model.yml" + + # Verify deployment — confirm files landed correctly + DEPLOYED=$(ls "$JAVA_ALL_EXT/${PROJECT_NAME}".*.model.yml 2>/dev/null | wc -l) + if [ "$DEPLOYED" -gt 0 ]; then + echo "Extensions deployed to $JAVA_ALL_EXT ($DEPLOYED files):" + ls -la "$JAVA_ALL_EXT/${PROJECT_NAME}".*.model.yml + else + echo "ERROR: Files were copied but verification failed. Check path: $JAVA_ALL_EXT" + fi +else + echo "WARNING: Could not find java-all ext directory. Extensions may not load." + echo "Attempted path lookup from: codeql resolve qlpacks | grep java-queries" + echo "Run 'codeql resolve qlpacks' manually to debug." +fi +``` + +**For Python/JS/Go:** The same limitation may apply. Locate the `-all` pack's `ext/` directory and copy extensions there. + +**Alternative (if query packs are NOT pre-compiled):** Use `--additional-packs=./codeql-extensions` with a proper model pack `qlpack.yml`: + +```yaml +# codeql-extensions/qlpack.yml +name: custom/-extensions +version: 0.0.1 +library: true +extensionTargets: + codeql/-all: "*" +dataExtensions: + - sources.yml + - sinks.yml + - summaries.yml +``` diff --git a/plugins/static-analysis/skills/codeql/references/important-only-suite.md b/plugins/static-analysis/skills/codeql/references/important-only-suite.md index 35fca3f..6e5b3b5 100644 --- a/plugins/static-analysis/skills/codeql/references/important-only-suite.md +++ b/plugins/static-analysis/skills/codeql/references/important-only-suite.md @@ -77,7 +77,7 @@ Generate this file as `important-only.qls` in the results directory before runni - modelgenerator ``` -> **Post-analysis step required:** After running the analysis, apply the jq filter from [run-analysis.md](../workflows/run-analysis.md) Step 5 to remove medium-precision results with `security-severity` < 6.0. +> **Post-analysis step required:** After running the analysis, apply the post-analysis jq filter (defined in the run-analysis workflow Step 5) to remove medium-precision results with `security-severity` < 6.0. ## Generation Script diff --git a/plugins/static-analysis/skills/codeql/references/macos-arm64e-workaround.md b/plugins/static-analysis/skills/codeql/references/macos-arm64e-workaround.md new file mode 100644 index 0000000..331824a --- /dev/null +++ b/plugins/static-analysis/skills/codeql/references/macos-arm64e-workaround.md @@ -0,0 +1,179 @@ +# macOS arm64e Workaround + +Methods for building CodeQL databases on macOS Apple Silicon when the `arm64e`/`arm64` architecture mismatch causes SIGKILL (exit code 137) during build tracing. + +**Use when `IS_MACOS_ARM64E=true`** (detected in build-database workflow Step 2a). These replace Methods 1 and 2 on affected systems. + +The strategy is to use Homebrew-installed tools (plain `arm64`, not `arm64e`) so `libtrace.dylib` can be injected successfully. Try sub-methods in order: + +## Sub-method 2m-a: Homebrew clang/gcc with multi-step tracing + +Trace only the compiler invocations individually, avoiding system tools (`/usr/bin/ar`, `/bin/mkdir`) that would be killed. This requires a multi-step build: init → trace each compiler call → finalize. + +```bash +log_step "METHOD 2m-a: macOS arm64 — Homebrew compiler with multi-step tracing" + +# 1. Find Homebrew C/C++ compiler (arm64, not arm64e) +BREW_CC="" +# Prefer Homebrew clang +if [ -x "/opt/homebrew/opt/llvm/bin/clang" ]; then + BREW_CC="/opt/homebrew/opt/llvm/bin/clang" +# Try Homebrew GCC (e.g. gcc-14, gcc-13) +elif command -v gcc-14 >/dev/null 2>&1; then + BREW_CC="$(command -v gcc-14)" +elif command -v gcc-13 >/dev/null 2>&1; then + BREW_CC="$(command -v gcc-13)" +fi + +if [ -z "$BREW_CC" ]; then + log_result "No Homebrew C/C++ compiler found — skipping 2m-a" + # Fall through to 2m-b +else + # Verify it's arm64 (not arm64e) + BREW_CC_ARCH=$(lipo -archs "$BREW_CC" 2>/dev/null) + if [[ "$BREW_CC_ARCH" == *"arm64e"* ]]; then + log_result "Homebrew compiler is arm64e — skipping 2m-a" + else + log_step "Using Homebrew compiler: $BREW_CC (arch: $BREW_CC_ARCH)" + + # 2. Run the build normally (without tracing) to create build dirs and artifacts + # Use Homebrew make (gmake) if available, otherwise system make outside tracer + if command -v gmake >/dev/null 2>&1; then + MAKE_CMD="gmake" + else + MAKE_CMD="make" + fi + $MAKE_CMD clean 2>/dev/null || true + $MAKE_CMD CC="$BREW_CC" 2>&1 | tee -a "$LOG_FILE" + + # 3. Extract compiler commands from the Makefile / build system + # Use make's dry-run mode to get the exact compiler invocations + $MAKE_CMD clean 2>/dev/null || true + COMPILE_CMDS=$($MAKE_CMD CC="$BREW_CC" --dry-run 2>/dev/null \ + | grep -E "^\s*$BREW_CC\b.*\s-c\s" \ + | sed 's/^[[:space:]]*//') + + if [ -z "$COMPILE_CMDS" ]; then + log_result "Could not extract compile commands from dry-run — skipping 2m-a" + else + # 4. Init database + codeql database init $DB_NAME --language=cpp --source-root=. --overwrite 2>&1 \ + | tee -a "$LOG_FILE" + + # 5. Ensure build directories exist (outside tracer — avoids arm64e mkdir) + $MAKE_CMD clean 2>/dev/null || true + # Parse -o flags to find output dirs, or just create common dirs + echo "$COMPILE_CMDS" | grep -oP '(?<=-o\s)\S+' | xargs -I{} dirname {} \ + | sort -u | xargs mkdir -p 2>/dev/null || true + + # 6. Trace each compiler invocation individually + TRACE_OK=true + while IFS= read -r cmd; do + [ -z "$cmd" ] && continue + log_cmd "codeql database trace-command $DB_NAME -- $cmd" + if ! codeql database trace-command $DB_NAME -- $cmd 2>&1 | tee -a "$LOG_FILE"; then + log_result "FAILED on: $cmd" + TRACE_OK=false + break + fi + done <<< "$COMPILE_CMDS" + + if $TRACE_OK; then + # 7. Finalize + codeql database finalize $DB_NAME 2>&1 | tee -a "$LOG_FILE" + if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then + log_result "SUCCESS (macOS arm64 multi-step)" + # Done — skip to Step 4 + else + log_result "FAILED (finalize failed)" + fi + fi + fi + fi +fi +``` + +## Sub-method 2m-b: Rosetta x86_64 emulation + +Force the entire CodeQL pipeline to run under Rosetta, which uses the `x86_64` slice of both `libtrace.dylib` and system tools — no `arm64e` mismatch. + +```bash +log_step "METHOD 2m-b: macOS arm64 — Rosetta x86_64 emulation" + +# Check if Rosetta is available +if ! arch -x86_64 /usr/bin/true 2>/dev/null; then + log_result "Rosetta not available — skipping 2m-b" +else + BUILD_CMD="" # e.g. "make clean && make -j4" + CMD="arch -x86_64 codeql database create $DB_NAME --language= --source-root=. --command='$BUILD_CMD' --overwrite" + log_cmd "$CMD" + + arch -x86_64 codeql database create $DB_NAME --language= --source-root=. \ + --command="$BUILD_CMD" --overwrite 2>&1 | tee -a "$LOG_FILE" + + if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then + log_result "SUCCESS (Rosetta x86_64)" + else + log_result "FAILED (Rosetta)" + fi +fi +``` + +## Sub-method 2m-c: System compiler (direct attempt) + +As a verification step, try the standard autobuild with the system compiler. This will likely fail with exit code 137 on affected systems, but confirms the arm64e issue is the cause. + +> **This sub-method is optional.** Skip it if arm64e incompatibility was already confirmed in Step 2a. + +```bash +log_step "METHOD 2m-c: System compiler (expected to fail on arm64e)" +CMD="codeql database create $DB_NAME --language= --source-root=. --overwrite" +log_cmd "$CMD" + +$CMD 2>&1 | tee -a "$LOG_FILE" + +EXIT_CODE=$? +if [ $EXIT_CODE -eq 137 ] || [ $EXIT_CODE -eq 134 ]; then + log_result "FAILED: exit code $EXIT_CODE confirms arm64e/libtrace incompatibility" +elif codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then + log_result "SUCCESS (unexpected — system compiler worked)" +else + log_result "FAILED (exit code: $EXIT_CODE)" +fi +``` + +## Sub-method 2m-d: Ask user + +If all macOS workarounds fail, present options: + +``` +AskUserQuestion: + header: "macOS Build" + question: "Build tracing failed due to macOS arm64e incompatibility. How to proceed?" + multiSelect: false + options: + - label: "Use build-mode=none (Recommended)" + description: "Source-level analysis only. Misses some interprocedural data flow but catches most C/C++ vulnerabilities (format strings, buffer overflows, unsafe functions)." + - label: "Install arm64 tools and retry" + description: "Run: brew install llvm make — then retry with Homebrew toolchain" + - label: "Install Rosetta and retry" + description: "Run: softwareupdate --install-rosetta — then retry under x86_64 emulation" + - label: "Abort" + description: "Stop database creation" +``` + +**If "Use build-mode=none":** Proceed to Method 4. + +**If "Install arm64 tools and retry":** +```bash +log_step "Installing Homebrew arm64 toolchain" +brew install llvm make 2>&1 | tee -a "$LOG_FILE" +# Retry Sub-method 2m-a +``` + +**If "Install Rosetta and retry":** +```bash +log_step "Installing Rosetta" +softwareupdate --install-rosetta --agree-to-license 2>&1 | tee -a "$LOG_FILE" +# Retry Sub-method 2m-b +``` diff --git a/plugins/static-analysis/skills/codeql/references/performance-tuning.md b/plugins/static-analysis/skills/codeql/references/performance-tuning.md index f4e7b21..3dfe8f5 100644 --- a/plugins/static-analysis/skills/codeql/references/performance-tuning.md +++ b/plugins/static-analysis/skills/codeql/references/performance-tuning.md @@ -106,6 +106,6 @@ codeql database cleanup codeql_1.db | OOM during analysis | Not enough RAM | Increase `CODEQL_RAM` | | Slow database creation | Complex build | Use `--threads`, simplify build | | Slow query execution | Large codebase | Reduce query scope, add RAM | -| Database too large | Too many files | Use exclusion config (see [build-database workflow](../workflows/build-database.md#1b-create-exclusion-config-interpreted-languages-only)) | +| Database too large | Too many files | Use exclusion config (`codeql-config.yml` with `paths-ignore`) | | Single query hangs | Runaway evaluation | Use `--timeout` and check `--evaluator-log` | | Repeated runs still slow | Cache not used | Check you're using same database path | diff --git a/plugins/static-analysis/skills/codeql/references/quality-assessment.md b/plugins/static-analysis/skills/codeql/references/quality-assessment.md new file mode 100644 index 0000000..a1371e6 --- /dev/null +++ b/plugins/static-analysis/skills/codeql/references/quality-assessment.md @@ -0,0 +1,172 @@ +# Quality Assessment + +How to assess and improve CodeQL database quality after a successful build. + +## Collect Metrics + +```bash +log_step "Assessing database quality" + +# 1. Baseline lines of code and file list (most reliable metric) +codeql database print-baseline -- "$DB_NAME" +BASELINE_LOC=$(python3 -c " +import json +with open('$DB_NAME/baseline-info.json') as f: + d = json.load(f) +for lang, info in d['languages'].items(): + print(f'{lang}: {info[\"linesOfCode\"]} LoC, {len(info[\"files\"])} files') +") +echo "$BASELINE_LOC" +log_result "Baseline: $BASELINE_LOC" + +# 2. Source archive file count +SRC_FILE_COUNT=$(unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null | wc -l) +echo "Files in source archive: $SRC_FILE_COUNT" + +# 3. Extraction errors from extractor diagnostics +EXTRACTOR_ERRORS=$(find "$DB_NAME/diagnostic/extractors" -name '*.jsonl' \ + -exec cat {} + 2>/dev/null | grep -c '^{' 2>/dev/null || true) +EXTRACTOR_ERRORS=${EXTRACTOR_ERRORS:-0} +echo "Extractor errors: $EXTRACTOR_ERRORS" + +# 4. Export diagnostics summary (experimental but useful) +DIAG_TEXT=$(codeql database export-diagnostics --format=text -- "$DB_NAME" 2>/dev/null || true) +if [ -n "$DIAG_TEXT" ]; then + echo "Diagnostics: $DIAG_TEXT" +fi + +# 5. Check database is finalized +FINALIZED=$(grep '^finalised:' "$DB_NAME/codeql-database.yml" 2>/dev/null \ + | awk '{print $2}') +echo "Finalized: $FINALIZED" +``` + +## Compare Against Expected Source + +Estimate the expected source file count from the working directory and compare. + +> **Compiled languages (C/C++, Java, C#):** The source archive (`src.zip`) includes system headers and SDK files alongside project source files. For C/C++, this can inflate the archive count 10-20x (e.g., 111 archive files for 5 project source files). Compare against **project-relative files only** by filtering the archive listing. + +```bash +# Count source files in the project (adjust extensions per language) +EXPECTED=$(fd -t f -e c -e cpp -e h -e hpp -e java -e kt -e py -e js -e ts \ + --exclude 'codeql_*.db' --exclude node_modules --exclude vendor --exclude .git . \ + 2>/dev/null | wc -l) +echo "Expected source files: $EXPECTED" + +# Count PROJECT files in source archive (exclude system/SDK paths) +PROJECT_SRC_COUNT=$(unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \ + | grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' | wc -l) +echo "Project files in source archive: $PROJECT_SRC_COUNT" +echo "Total files in source archive: $SRC_FILE_COUNT (includes system headers for compiled langs)" + +# Baseline LOC from database metadata (most reliable single metric) +DB_LOC=$(grep '^baselineLinesOfCode:' "$DB_NAME/codeql-database.yml" \ + | awk '{print $2}') +echo "Baseline LoC: $DB_LOC" + +# Error ratio — use project file count for compiled langs, total for interpreted +if [ "$PROJECT_SRC_COUNT" -gt 0 ]; then + ERROR_RATIO=$(python3 -c "print(f'{$EXTRACTOR_ERRORS/$PROJECT_SRC_COUNT*100:.1f}%')") +else + ERROR_RATIO="N/A (no files)" +fi +echo "Error ratio: $ERROR_RATIO ($EXTRACTOR_ERRORS errors / $PROJECT_SRC_COUNT project files)" +``` + +## Log Assessment + +```bash +log_step "Quality assessment results" +log_result "Baseline LoC: $DB_LOC" +log_result "Project source files: $PROJECT_SRC_COUNT (expected: ~$EXPECTED)" +log_result "Total archive files: $SRC_FILE_COUNT (includes system headers for compiled langs)" +log_result "Extractor errors: $EXTRACTOR_ERRORS (ratio: $ERROR_RATIO)" +log_result "Finalized: $FINALIZED" + +# Sample extracted project files (exclude system paths) +unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \ + | grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' \ + | head -20 >> "$LOG_FILE" +``` + +## Quality Criteria + +| Metric | Source | Good | Poor | +|--------|--------|------|------| +| Baseline LoC | `print-baseline` / `baseline-info.json` | > 0, proportional to project size | 0 or far below expected | +| Project source files | `src.zip` (filtered) | Close to expected source file count | 0 or < 50% of expected | +| Extractor errors | `diagnostic/extractors/*.jsonl` | 0 or < 5% of project files | > 5% of project files | +| Finalized | `codeql-database.yml` | `true` | `false` (incomplete build) | +| Key directories | `src.zip` listing | Application code directories present | Missing `src/main`, `lib/`, `app/` etc. | +| "No source code seen" | build log | Absent | Present (cached build — compiled languages) | + +**Interpreting archive file counts for compiled languages:** C/C++ databases include system headers (e.g., ``, SDK headers) in `src.zip`. A project with 5 source files may have 100+ files in the archive. Always filter to project-relative paths when comparing against expected counts. Use `baselineLinesOfCode` as the primary quality indicator. + +**Interpreting baseline LoC:** A small number of extractor errors is normal and does not significantly impact analysis. However, if `baselineLinesOfCode` is 0 or the source archive contains no files, the database is empty — likely a cached build (compiled languages) or wrong `--source-root`. + +--- + +## Improve Quality (if poor) + +Try these improvements, re-assess after each. **Log all improvements:** + +### 1. Adjust source root + +```bash +log_step "Quality improvement: adjust source root" +NEW_ROOT="./src" # or detected subdirectory +# For interpreted: add --codescanning-config=codeql-config.yml +# For compiled: omit config flag +log_cmd "codeql database create $DB_NAME --language= --source-root=$NEW_ROOT --overwrite" +codeql database create $DB_NAME --language= --source-root=$NEW_ROOT --overwrite +log_result "Changed source-root to: $NEW_ROOT" +``` + +### 2. Fix "no source code seen" (cached build - compiled languages only) + +```bash +log_step "Quality improvement: force rebuild (cached build detected)" +log_cmd "make clean && rebuild" +make clean && codeql database create $DB_NAME --language= --overwrite +log_result "Forced clean rebuild" +``` + +### 3. Install type stubs / dependencies + +> **Note:** These install into the *target project's* environment to improve CodeQL extraction quality. + +```bash +log_step "Quality improvement: install type stubs/additional deps" + +# Python type stubs — install into target project's environment +STUBS_INSTALLED="" +for stub in types-requests types-PyYAML types-redis; do + if pip install "$stub" 2>/dev/null; then + STUBS_INSTALLED="$STUBS_INSTALLED $stub" + fi +done +log_result "Installed type stubs:$STUBS_INSTALLED" + +# Additional project dependencies +log_cmd "pip install -e ." +pip install -e . 2>&1 | tee -a "$LOG_FILE" +``` + +### 4. Adjust extractor options + +```bash +log_step "Quality improvement: adjust extractor options" + +# C/C++: Include headers +export CODEQL_EXTRACTOR_CPP_OPTION_TRAP_HEADERS=true +log_result "Set CODEQL_EXTRACTOR_CPP_OPTION_TRAP_HEADERS=true" + +# Java: Specific JDK version +export CODEQL_EXTRACTOR_JAVA_OPTION_JDK_VERSION=17 +log_result "Set CODEQL_EXTRACTOR_JAVA_OPTION_JDK_VERSION=17" + +# Then rebuild with current method +``` + +**After each improvement:** Re-assess quality. If no improvement possible, move to next build method. diff --git a/plugins/static-analysis/skills/codeql/references/sarif-processing.md b/plugins/static-analysis/skills/codeql/references/sarif-processing.md new file mode 100644 index 0000000..35b1bd7 --- /dev/null +++ b/plugins/static-analysis/skills/codeql/references/sarif-processing.md @@ -0,0 +1,71 @@ +# SARIF Processing + +jq commands for processing CodeQL SARIF output. Used in the run-analysis workflow Step 5. + +> **SARIF structure note:** `security-severity` and `level` are stored on rule definitions (`.runs[].tool.driver.rules[]`), NOT on individual result objects. Results reference rules by `ruleIndex`. The jq commands below join results with their rule metadata. + +## Count Findings + +```bash +jq '.runs[].results | length' "$RESULTS_DIR/results.sarif" +``` + +## Summary by SARIF Level + +```bash +jq -r ' + .runs[] | + . as $run | + .results[] | + ($run.tool.driver.rules[.ruleIndex].defaultConfiguration.level // "unknown") +' "$RESULTS_DIR/results.sarif" \ + | sort | uniq -c | sort -rn +``` + +## Summary by Security Severity (most useful for triage) + +```bash +jq -r ' + .runs[] | + . as $run | + .results[] | + ($run.tool.driver.rules[.ruleIndex].properties["security-severity"] // "none") + " | " + + .ruleId + " | " + + (.locations[0].physicalLocation.artifactLocation.uri // "?") + ":" + + ((.locations[0].physicalLocation.region.startLine // 0) | tostring) + " | " + + (.message.text // "no message" | .[0:80]) +' "$RESULTS_DIR/results.sarif" | sort -rn | head -20 +``` + +## Summary by Rule + +```bash +jq -r '.runs[].results[] | .ruleId' "$RESULTS_DIR/results.sarif" \ + | sort | uniq -c | sort -rn +``` + +## Important-Only Post-Filter + +If scan mode is "important only", filter out medium-precision results with `security-severity` < 6.0 from the report. The suite includes all medium-precision security queries to let CodeQL evaluate them, but low-severity medium-precision findings are noise: + +```bash +# Filter important-only results: drop medium-precision findings with security-severity < 6.0 +# Medium-precision queries without a security-severity score default to 0.0 (excluded). +# Non-medium queries are always kept regardless of security-severity. +jq ' + .runs[] |= ( + . as $run | + .results = [ + .results[] | + ($run.tool.driver.rules[.ruleIndex].properties.precision // "unknown") as $prec | + ($run.tool.driver.rules[.ruleIndex].properties["security-severity"] // null) as $raw_sev | + (if $prec == "medium" then ($raw_sev // "0" | tonumber) else 10 end) as $sev | + select( + ($prec == "high") or ($prec == "very-high") or ($prec == "unknown") or + ($prec == "medium" and $sev >= 6.0) + ) + ] + ) +' "$RESULTS_DIR/results.sarif" > "$RESULTS_DIR/results-filtered.sarif" +mv "$RESULTS_DIR/results-filtered.sarif" "$RESULTS_DIR/results.sarif" +``` diff --git a/plugins/static-analysis/skills/codeql/workflows/build-database.md b/plugins/static-analysis/skills/codeql/workflows/build-database.md index d0133c8..85114e8 100644 --- a/plugins/static-analysis/skills/codeql/workflows/build-database.md +++ b/plugins/static-analysis/skills/codeql/workflows/build-database.md @@ -22,96 +22,60 @@ TaskCreate: "Generate final report" (Step 6) - blockedBy: Step 5 Database creation differs by language type: ### Interpreted Languages (Python, JavaScript, Go, Ruby) -- **No build required** - CodeQL extracts source directly -- **Exclusion config supported** - Use `--codescanning-config` to skip irrelevant files +- **No build required** — CodeQL extracts source directly +- **Exclusion config supported** — Use `--codescanning-config` to skip irrelevant files ### Compiled Languages (C/C++, Java, C#, Rust, Swift) -- **Build required** - CodeQL must trace the compilation -- **Exclusion config NOT supported** - All compiled code must be traced +- **Build required** — CodeQL must trace the compilation +- **Exclusion config NOT supported** — All compiled code must be traced - Try build methods in order until one succeeds: - 1. **Autobuild** - CodeQL auto-detects and runs the build - 2. **Custom Command** - Explicit build command for the detected build system - 2m. **macOS arm64 Toolchain** - Homebrew compiler + multi-step tracing (Apple Silicon workaround, see Step 2a) - 3. **Multi-step** - Fine-grained control with init → trace-command → finalize - 4. **No-build fallback** - `--build-mode=none` (partial analysis, last resort) + 1. **Autobuild** — CodeQL auto-detects and runs the build + 2. **Custom Command** — Explicit build command for the detected build system + 2m. **macOS arm64 Toolchain** — Homebrew compiler + multi-step tracing (Apple Silicon workaround) + 3. **Multi-step** — Fine-grained control with init → trace-command → finalize + 4. **No-build fallback** — `--build-mode=none` (partial analysis, last resort) -> **macOS Apple Silicon:** On arm64 Macs, system tools (`/usr/bin/make`, `/usr/bin/clang`, `/usr/bin/ar`) are built for `arm64e` (pointer-authenticated ABI), but CodeQL's `libtrace.dylib` only has `arm64`. macOS kills any `arm64e` process with a non-`arm64e` injected dylib (SIGKILL, exit 137). Step 2a detects this and routes to Method 2m which uses Homebrew tools (plain `arm64`) or Rosetta (`x86_64`). +> **macOS Apple Silicon:** On arm64 Macs, system tools (`/usr/bin/make`, `/usr/bin/clang`, `/usr/bin/ar`) are `arm64e` but CodeQL's `libtrace.dylib` only has `arm64`. macOS kills `arm64e` processes with a non-`arm64e` injected dylib (SIGKILL, exit 137). Step 2a detects this and routes to Method 2m. --- ## Database Naming -Generate a unique sequential database name to avoid overwriting previous databases: - ```bash -# Find next available database number get_next_db_name() { local prefix="${1:-codeql}" local max=0 for db in ${prefix}_*.db; do - if [[ -d "$db" ]]; then - num="${db#${prefix}_}" - num="${num%.db}" - if [[ "$num" =~ ^[0-9]+$ ]] && (( num > max )); then - max=$num - fi - fi + [[ -d "$db" ]] || continue + num="${db#${prefix}_}"; num="${num%.db}" + [[ "$num" =~ ^[0-9]+$ ]] && (( num > max )) && max=$num done echo "${prefix}_$((max + 1)).db" } - DB_NAME=$(get_next_db_name) -echo "Database name: $DB_NAME" ``` -Use `$DB_NAME` in all commands below. - --- ## Build Log -Maintain a detailed log file throughout the workflow. Log every significant action. +Maintain a log file throughout. Initialize at start: -**Initialize at start:** ```bash LOG_FILE="${DB_NAME%.db}-build.log" echo "=== CodeQL Database Build Log ===" > "$LOG_FILE" echo "Started: $(date -Iseconds)" >> "$LOG_FILE" -echo "Working directory: $(pwd)" >> "$LOG_FILE" echo "Database: $DB_NAME" >> "$LOG_FILE" -echo "" >> "$LOG_FILE" ``` -**Log helper function:** +Log helper: ```bash -log_step() { - echo "[$(date -Iseconds)] $1" >> "$LOG_FILE" -} - -log_cmd() { - echo "[$(date -Iseconds)] COMMAND: $1" >> "$LOG_FILE" -} - -log_result() { - echo "[$(date -Iseconds)] RESULT: $1" >> "$LOG_FILE" - echo "" >> "$LOG_FILE" -} +log_step() { echo "[$(date -Iseconds)] $1" >> "$LOG_FILE"; } +log_cmd() { echo "[$(date -Iseconds)] COMMAND: $1" >> "$LOG_FILE"; } +log_result() { echo "[$(date -Iseconds)] RESULT: $1" >> "$LOG_FILE"; echo "" >> "$LOG_FILE"; } ``` -**What to log:** -- Detected language and build system -- Each build attempt with exact command -- Fix attempts and their outcomes: - - Cache/artifacts cleaned - - Dependencies installed (package names, versions) - - Downloaded JARs, npm packages, Python wheels - - Registry authentication configured -- Quality improvements applied: - - Source root adjustments - - Extractor options set - - Type stubs installed -- Quality assessment results (file counts, errors) -- Final successful command with all environment variables +**What to log:** Detected language/build system, each build attempt with exact command, fix attempts and outcomes, quality assessment results, final successful command. --- @@ -120,15 +84,9 @@ log_result() { ### 1a. Detect Language ```bash -# Detect primary language by file count fd -t f -e py -e js -e ts -e go -e rb -e java -e c -e cpp -e h -e hpp -e rs -e cs | \ sed 's/.*\.//' | sort | uniq -c | sort -rn | head -5 - -# Check for build files (compiled languages) ls -la Makefile CMakeLists.txt build.gradle pom.xml Cargo.toml *.sln 2>/dev/null || true - -# Check for existing CodeQL database -ls -la "$DB_NAME" 2>/dev/null && echo "WARNING: existing database found" ``` | Language | `--language=` | Type | @@ -145,115 +103,46 @@ ls -la "$DB_NAME" 2>/dev/null && echo "WARNING: existing database found" ### 1b. Create Exclusion Config (Interpreted Languages Only) -> **Skip this substep for compiled languages** - exclusion config is not supported when build tracing is required. - -Scan for irrelevant files and create `codeql-config.yml`: +> **Skip for compiled languages** — exclusion config is not supported when build tracing is required. -```bash -# Find common excludable directories -ls -d node_modules vendor third_party external deps 2>/dev/null || true - -# Find test directories -fd -t d -E node_modules "test|tests|spec|__tests__|fixtures" . - -# Find generated/minified files -fd -t f -E node_modules "\.min\.js$|\.bundle\.js$|\.generated\." . | head -20 - -# Estimate file counts -echo "Total source files:" -fd -t f -e py -e js -e ts -e go -e rb | wc -l -echo "In node_modules:" -fd -t f -e js -e ts node_modules 2>/dev/null | wc -l -``` - -**Create exclusion config:** - -```yaml -# codeql-config.yml -paths-ignore: - # Package managers - - node_modules - - vendor - - venv - - .venv - # Third-party code - - third_party - - external - - deps - # Generated/minified - - "**/*.min.js" - - "**/*.bundle.js" - - "**/generated/**" - - "**/dist/**" - # Tests (optional) - # - "**/test/**" - # - "**/tests/**" -``` - -```bash -log_step "Created codeql-config.yml" -log_result "Exclusions: $(grep -c '^ -' codeql-config.yml) patterns" -``` +Scan for irrelevant directories and create `codeql-config.yml` with `paths-ignore` entries for `node_modules`, `vendor`, `venv`, third-party code, and generated/minified files. --- ## Step 2: Build Database -### For Interpreted Languages (Python, JavaScript, Go, Ruby) - -Single command, no build required: +### For Interpreted Languages ```bash log_step "Building database for interpreted language: " CMD="codeql database create $DB_NAME --language= --source-root=. --codescanning-config=codeql-config.yml --overwrite" log_cmd "$CMD" - $CMD 2>&1 | tee -a "$LOG_FILE" - -if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS" -else - log_result "FAILED" -fi ``` -**Skip to Step 4 (Assess Quality) after success.** +**Skip to Step 4 after success.** --- -### For Compiled Languages (Java, C/C++, C#, Rust, Swift) - -#### Step 2a: macOS arm64e Detection (C/C++ only) +### For Compiled Languages -On macOS with Apple Silicon, CodeQL's build tracer (`preload_tracer`) injects `libtrace.dylib` into every spawned process via `DYLD_INSERT_LIBRARIES`. This dylib ships with `x86_64` + `arm64` slices, but Apple's system binaries (`/usr/bin/make`, `/usr/bin/clang`, `/usr/bin/ar`, `/bin/mkdir`, etc.) are built for `arm64e` (pointer-authenticated ABI). macOS kills any `arm64e` process that tries to load a non-`arm64e` injected dylib with **SIGKILL (signal 9, exit code 137)**. - -**This affects C/C++ builds on macOS Apple Silicon when the build invokes any `arm64e` system tool under tracing.** Java, Swift, and other languages may also be affected if their build tools are `arm64e`. - -**Detection:** +#### Step 2a: macOS arm64e Detection (C/C++ primarily) ```bash IS_MACOS_ARM64E=false if [[ "$(uname -s)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then - # Check if libtrace.dylib lacks arm64e LIBTRACE=$(find "$(dirname "$(command -v codeql)")" -name libtrace.dylib 2>/dev/null | head -1) if [ -n "$LIBTRACE" ]; then LIBTRACE_ARCHS=$(lipo -archs "$LIBTRACE" 2>/dev/null) if [[ "$LIBTRACE_ARCHS" != *"arm64e"* ]]; then - # Check if system tools are arm64e MAKE_ARCHS=$(lipo -archs /usr/bin/make 2>/dev/null) - if [[ "$MAKE_ARCHS" == *"arm64e"* ]]; then - IS_MACOS_ARM64E=true - log_step "DETECTED: macOS arm64e tracer incompatibility" - log_result "libtrace.dylib archs: $LIBTRACE_ARCHS | /usr/bin/make archs: $MAKE_ARCHS" - fi + [[ "$MAKE_ARCHS" == *"arm64e"* ]] && IS_MACOS_ARM64E=true fi fi fi ``` -**If `IS_MACOS_ARM64E=true`:** Skip Method 1 (autobuild) and Method 2 (custom command) — they will fail with exit code 137. Go directly to **Method 2m (macOS arm64 toolchain)**. - -**If `IS_MACOS_ARM64E=false`:** Proceed with Method 1, 2, 3 in normal order. +**If `IS_MACOS_ARM64E=true`:** Skip Methods 1 and 2 — go directly to Method 2m. --- @@ -261,25 +150,18 @@ Try build methods in sequence until one succeeds: #### Method 1: Autobuild -> **Skip if `IS_MACOS_ARM64E=true`** — autobuild spawns system tools that will be killed. +> **Skip if `IS_MACOS_ARM64E=true`.** ```bash log_step "METHOD 1: Autobuild" CMD="codeql database create $DB_NAME --language= --source-root=. --overwrite" log_cmd "$CMD" - $CMD 2>&1 | tee -a "$LOG_FILE" - -if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS" -else - log_result "FAILED" -fi ``` #### Method 2: Custom Command -> **Skip if `IS_MACOS_ARM64E=true`** — custom command wraps the entire build in the tracer, which will inject `libtrace.dylib` into `arm64e` system tools called by make/cmake/etc. +> **Skip if `IS_MACOS_ARM64E=true`.** Detect build system and use explicit command: @@ -291,544 +173,64 @@ Detect build system and use explicit command: | Maven | `pom.xml` | `mvn clean compile -DskipTests` | | Cargo | `Cargo.toml` | `cargo clean && cargo build` | | .NET | `*.sln` | `dotnet clean && dotnet build` | -| Meson | `meson.build` | `meson setup build && ninja -C build` | -| Bazel | `BUILD`/`WORKSPACE` | `bazel build //...` | - -**Find project-specific build scripts:** -```bash -# Look for custom build scripts -fd -t f -e sh -e bash -e py "build|compile|make|setup" . -ls -la build.sh compile.sh Makefile.custom configure 2>/dev/null || true - -# Check README for build instructions -grep -i -A5 "build\|compile\|install" README* 2>/dev/null | head -20 -``` -Projects may have custom scripts (`build.sh`, `compile.sh`) or non-standard build steps documented in README. Use these instead of generic commands when found. +Also check for project-specific build scripts (`build.sh`, `compile.sh`) and README instructions. ```bash log_step "METHOD 2: Custom command" -log_step "Detected build system: " -BUILD_CMD="" CMD="codeql database create $DB_NAME --language= --source-root=. --command='$BUILD_CMD' --overwrite" log_cmd "$CMD" - $CMD 2>&1 | tee -a "$LOG_FILE" - -if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS" -else - log_result "FAILED" -fi ``` #### Method 2m: macOS arm64 Toolchain (Apple Silicon workaround) -> **Use this method when `IS_MACOS_ARM64E=true`.** It replaces Methods 1 and 2 on affected systems. - -The strategy is to use Homebrew-installed tools (which are plain `arm64`, not `arm64e`) so `libtrace.dylib` can be injected successfully. Try these sub-methods in order: - -##### Sub-method 2m-a: Homebrew clang/gcc with multi-step tracing - -Trace only the compiler invocations individually, avoiding system tools (`/usr/bin/ar`, `/bin/mkdir`) that would be killed. This requires a multi-step build: init → trace each compiler call → finalize. - -```bash -log_step "METHOD 2m-a: macOS arm64 — Homebrew compiler with multi-step tracing" - -# 1. Find Homebrew C/C++ compiler (arm64, not arm64e) -BREW_CC="" -# Prefer Homebrew clang -if [ -x "/opt/homebrew/opt/llvm/bin/clang" ]; then - BREW_CC="/opt/homebrew/opt/llvm/bin/clang" -# Try Homebrew GCC (e.g. gcc-14, gcc-13) -elif command -v gcc-14 >/dev/null 2>&1; then - BREW_CC="$(command -v gcc-14)" -elif command -v gcc-13 >/dev/null 2>&1; then - BREW_CC="$(command -v gcc-13)" -fi - -if [ -z "$BREW_CC" ]; then - log_result "No Homebrew C/C++ compiler found — skipping 2m-a" - # Fall through to 2m-b -else - # Verify it's arm64 (not arm64e) - BREW_CC_ARCH=$(lipo -archs "$BREW_CC" 2>/dev/null) - if [[ "$BREW_CC_ARCH" == *"arm64e"* ]]; then - log_result "Homebrew compiler is arm64e — skipping 2m-a" - else - log_step "Using Homebrew compiler: $BREW_CC (arch: $BREW_CC_ARCH)" - - # 2. Run the build normally (without tracing) to create build dirs and artifacts - # Use Homebrew make (gmake) if available, otherwise system make outside tracer - if command -v gmake >/dev/null 2>&1; then - MAKE_CMD="gmake" - else - MAKE_CMD="make" - fi - $MAKE_CMD clean 2>/dev/null || true - $MAKE_CMD CC="$BREW_CC" 2>&1 | tee -a "$LOG_FILE" - - # 3. Extract compiler commands from the Makefile / build system - # Use make's dry-run mode to get the exact compiler invocations - $MAKE_CMD clean 2>/dev/null || true - COMPILE_CMDS=$($MAKE_CMD CC="$BREW_CC" --dry-run 2>/dev/null \ - | grep -E "^\s*$BREW_CC\b.*\s-c\s" \ - | sed 's/^[[:space:]]*//') - - if [ -z "$COMPILE_CMDS" ]; then - log_result "Could not extract compile commands from dry-run — skipping 2m-a" - else - # 4. Init database - codeql database init $DB_NAME --language=cpp --source-root=. --overwrite 2>&1 \ - | tee -a "$LOG_FILE" - - # 5. Ensure build directories exist (outside tracer — avoids arm64e mkdir) - $MAKE_CMD clean 2>/dev/null || true - # Parse -o flags to find output dirs, or just create common dirs - echo "$COMPILE_CMDS" | grep -oP '(?<=-o\s)\S+' | xargs -I{} dirname {} \ - | sort -u | xargs mkdir -p 2>/dev/null || true - - # 6. Trace each compiler invocation individually - TRACE_OK=true - while IFS= read -r cmd; do - [ -z "$cmd" ] && continue - log_cmd "codeql database trace-command $DB_NAME -- $cmd" - if ! codeql database trace-command $DB_NAME -- $cmd 2>&1 | tee -a "$LOG_FILE"; then - log_result "FAILED on: $cmd" - TRACE_OK=false - break - fi - done <<< "$COMPILE_CMDS" - - if $TRACE_OK; then - # 7. Finalize - codeql database finalize $DB_NAME 2>&1 | tee -a "$LOG_FILE" - if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS (macOS arm64 multi-step)" - # Done — skip to Step 4 - else - log_result "FAILED (finalize failed)" - fi - fi - fi - fi -fi -``` - -##### Sub-method 2m-b: Rosetta x86_64 emulation - -Force the entire CodeQL pipeline to run under Rosetta, which uses the `x86_64` slice of both `libtrace.dylib` and system tools — no `arm64e` mismatch. - -```bash -log_step "METHOD 2m-b: macOS arm64 — Rosetta x86_64 emulation" - -# Check if Rosetta is available -if ! arch -x86_64 /usr/bin/true 2>/dev/null; then - log_result "Rosetta not available — skipping 2m-b" -else - BUILD_CMD="" # e.g. "make clean && make -j4" - CMD="arch -x86_64 codeql database create $DB_NAME --language= --source-root=. --command='$BUILD_CMD' --overwrite" - log_cmd "$CMD" - - arch -x86_64 codeql database create $DB_NAME --language= --source-root=. \ - --command="$BUILD_CMD" --overwrite 2>&1 | tee -a "$LOG_FILE" - - if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS (Rosetta x86_64)" - else - log_result "FAILED (Rosetta)" - fi -fi -``` - -##### Sub-method 2m-c: System compiler (direct attempt) - -As a verification step, try the standard autobuild with the system compiler. This will likely fail with exit code 137 on affected systems, but confirms the arm64e issue is the cause. - -> **This sub-method is optional.** Skip it if arm64e incompatibility was already confirmed in Step 2a. - -```bash -log_step "METHOD 2m-c: System compiler (expected to fail on arm64e)" -CMD="codeql database create $DB_NAME --language= --source-root=. --overwrite" -log_cmd "$CMD" - -$CMD 2>&1 | tee -a "$LOG_FILE" - -EXIT_CODE=$? -if [ $EXIT_CODE -eq 137 ] || [ $EXIT_CODE -eq 134 ]; then - log_result "FAILED: exit code $EXIT_CODE confirms arm64e/libtrace incompatibility" -elif codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS (unexpected — system compiler worked)" -else - log_result "FAILED (exit code: $EXIT_CODE)" -fi -``` - -##### Sub-method 2m-d: Ask user +> **Use when `IS_MACOS_ARM64E=true`.** Replaces Methods 1 and 2 on affected systems. -If all macOS workarounds fail, present options: - -``` -AskUserQuestion: - header: "macOS Build" - question: "Build tracing failed due to macOS arm64e incompatibility. How to proceed?" - multiSelect: false - options: - - label: "Use build-mode=none (Recommended)" - description: "Source-level analysis only. Misses some interprocedural data flow but catches most C/C++ vulnerabilities (format strings, buffer overflows, unsafe functions)." - - label: "Install arm64 tools and retry" - description: "Run: brew install llvm make — then retry with Homebrew toolchain" - - label: "Install Rosetta and retry" - description: "Run: softwareupdate --install-rosetta — then retry under x86_64 emulation" - - label: "Abort" - description: "Stop database creation" -``` - -**If "Use build-mode=none":** Proceed to Method 4. - -**If "Install arm64 tools and retry":** -```bash -log_step "Installing Homebrew arm64 toolchain" -brew install llvm make 2>&1 | tee -a "$LOG_FILE" -# Retry Method 2m-a -``` - -**If "Install Rosetta and retry":** -```bash -log_step "Installing Rosetta" -softwareupdate --install-rosetta --agree-to-license 2>&1 | tee -a "$LOG_FILE" -# Retry Method 2m-b -``` - ---- +See [macos-arm64e-workaround.md](../references/macos-arm64e-workaround.md) for the full sub-method sequence (2m-a through 2m-d): Homebrew compiler with multi-step tracing → Rosetta x86_64 → system compiler verification → ask user. #### Method 3: Multi-step Build For complex builds needing fine-grained control: -> **On macOS with `IS_MACOS_ARM64E=true`:** Only trace compiler commands (arm64 Homebrew binaries). Do NOT trace system tools like `make`, `ar`, `mkdir` — they are arm64e and will be killed. Run non-compiler build steps outside the tracer. +> **On macOS with `IS_MACOS_ARM64E=true`:** Only trace arm64 Homebrew binaries. Do NOT trace system tools. ```bash log_step "METHOD 3: Multi-step build" - -# 1. Initialize -log_cmd "codeql database init $DB_NAME --language= --source-root=. --overwrite" codeql database init $DB_NAME --language= --source-root=. --overwrite - -# 2. Trace each build step -log_cmd "codeql database trace-command $DB_NAME -- " codeql database trace-command $DB_NAME -- - -log_cmd "codeql database trace-command $DB_NAME -- " codeql database trace-command $DB_NAME -- -# ... more steps as needed - -# 3. Finalize -log_cmd "codeql database finalize $DB_NAME" codeql database finalize $DB_NAME - -if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS" -else - log_result "FAILED" -fi ``` #### Method 4: No-Build Fallback (Last Resort) -When all build methods fail, use `--build-mode=none` for partial analysis: - -> **⚠️ WARNING:** This creates a database without build tracing. Analysis will be incomplete - only source-level patterns detected, no data flow through compiled code. +> **WARNING:** Creates a database without build tracing. Only source-level patterns detected. ```bash log_step "METHOD 4: No-build fallback (partial analysis)" CMD="codeql database create $DB_NAME --language= --source-root=. --build-mode=none --overwrite" log_cmd "$CMD" - $CMD 2>&1 | tee -a "$LOG_FILE" - -if codeql resolve database -- "$DB_NAME" >/dev/null 2>&1; then - log_result "SUCCESS (partial - no build tracing)" -else - log_result "FAILED" -fi ``` --- ## Step 3: Apply Fixes (if build failed) -Try these in order, then retry current build method. **Log each fix attempt:** - -### 1. Clean existing state -```bash -log_step "Applying fix: clean existing state" -rm -rf "$DB_NAME" -log_result "Removed $DB_NAME" -``` - -### 2. Clean build cache -```bash -log_step "Applying fix: clean build cache" -CLEANED="" -make clean 2>/dev/null && CLEANED="$CLEANED make" -rm -rf build CMakeCache.txt CMakeFiles 2>/dev/null && CLEANED="$CLEANED cmake-artifacts" -./gradlew clean 2>/dev/null && CLEANED="$CLEANED gradle" -mvn clean 2>/dev/null && CLEANED="$CLEANED maven" -cargo clean 2>/dev/null && CLEANED="$CLEANED cargo" -log_result "Cleaned: $CLEANED" -``` - -### 3. Install missing dependencies - -> **Note:** The commands below install the *target project's* dependencies so CodeQL can trace the build. Use whatever package manager the target project expects (`pip`, `npm`, `go mod`, etc.) — these are not the skill's own tooling preferences. - -```bash -log_step "Applying fix: install dependencies" - -# Python — use target project's package manager (pip/uv/poetry) -if [ -f requirements.txt ]; then - log_cmd "pip install -r requirements.txt" - pip install -r requirements.txt 2>&1 | tee -a "$LOG_FILE" -fi -if [ -f setup.py ] || [ -f pyproject.toml ]; then - log_cmd "pip install -e ." - pip install -e . 2>&1 | tee -a "$LOG_FILE" -fi - -# Node - log installed packages -if [ -f package.json ]; then - log_cmd "npm install" - npm install 2>&1 | tee -a "$LOG_FILE" -fi - -# Go -if [ -f go.mod ]; then - log_cmd "go mod download" - go mod download 2>&1 | tee -a "$LOG_FILE" -fi - -# Java - log downloaded dependencies -if [ -f build.gradle ] || [ -f build.gradle.kts ]; then - log_cmd "./gradlew dependencies --refresh-dependencies" - ./gradlew dependencies --refresh-dependencies 2>&1 | tee -a "$LOG_FILE" -fi -if [ -f pom.xml ]; then - log_cmd "mvn dependency:resolve" - mvn dependency:resolve 2>&1 | tee -a "$LOG_FILE" -fi - -# Rust -if [ -f Cargo.toml ]; then - log_cmd "cargo fetch" - cargo fetch 2>&1 | tee -a "$LOG_FILE" -fi - -log_result "Dependencies installed - see above for details" -``` - -### 4. Handle private registries - -If dependencies require authentication, ask user: -``` -AskUserQuestion: "Build requires private registry access. Options:" - 1. "I'll configure auth and retry" - 2. "Skip these dependencies" - 3. "Show me what's needed" -``` - -```bash -# Log authentication setup if performed -log_step "Private registry authentication configured" -log_result "Registry: , Method: " -``` - -**After fixes:** Retry current build method. If still fails, move to next method. +Try fixes in order, then retry current build method. See [build-fixes.md](../references/build-fixes.md) for the full fix catalog: clean state, clean build cache, install dependencies, handle private registries. --- -## Step 4: Assess Quality - -Run all quality checks and compare against the project's expected source files. - -### 4a. Collect Metrics - -```bash -log_step "Assessing database quality" - -# 1. Baseline lines of code and file list (most reliable metric) -codeql database print-baseline -- "$DB_NAME" -BASELINE_LOC=$(python3 -c " -import json -with open('$DB_NAME/baseline-info.json') as f: - d = json.load(f) -for lang, info in d['languages'].items(): - print(f'{lang}: {info[\"linesOfCode\"]} LoC, {len(info[\"files\"])} files') -") -echo "$BASELINE_LOC" -log_result "Baseline: $BASELINE_LOC" - -# 2. Source archive file count -SRC_FILE_COUNT=$(unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null | wc -l) -echo "Files in source archive: $SRC_FILE_COUNT" - -# 3. Extraction errors from extractor diagnostics -EXTRACTOR_ERRORS=$(find "$DB_NAME/diagnostic/extractors" -name '*.jsonl' \ - -exec cat {} + 2>/dev/null | grep -c '^{' 2>/dev/null || true) -EXTRACTOR_ERRORS=${EXTRACTOR_ERRORS:-0} -echo "Extractor errors: $EXTRACTOR_ERRORS" - -# 4. Export diagnostics summary (experimental but useful) -DIAG_TEXT=$(codeql database export-diagnostics --format=text -- "$DB_NAME" 2>/dev/null || true) -if [ -n "$DIAG_TEXT" ]; then - echo "Diagnostics: $DIAG_TEXT" -fi - -# 5. Check database is finalized -FINALIZED=$(grep '^finalised:' "$DB_NAME/codeql-database.yml" 2>/dev/null \ - | awk '{print $2}') -echo "Finalized: $FINALIZED" -``` - -### 4b. Compare Against Expected Source - -Estimate the expected source file count from the working directory and compare. - -> **Compiled languages (C/C++, Java, C#):** The source archive (`src.zip`) includes system headers and SDK files alongside project source files. For C/C++, this can inflate the archive count 10-20x (e.g., 111 archive files for 5 project source files). Compare against **project-relative files only** by filtering the archive listing. - -```bash -# Count source files in the project (adjust extensions per language) -# C/C++: -e c -e cpp -e h -e hpp -# Java: -e java -e kt -# Python: -e py -# JS/TS: -e js -e ts -e jsx -e tsx -EXPECTED=$(fd -t f -e c -e cpp -e h -e hpp -e java -e kt -e py -e js -e ts \ - --exclude 'codeql_*.db' --exclude node_modules --exclude vendor --exclude .git . \ - 2>/dev/null | wc -l) -echo "Expected source files: $EXPECTED" - -# Count PROJECT files in source archive (exclude system/SDK paths) -# For compiled languages, src.zip contains system headers under SDK paths -PROJECT_SRC_COUNT=$(unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \ - | grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' | wc -l) -echo "Project files in source archive: $PROJECT_SRC_COUNT" -echo "Total files in source archive: $SRC_FILE_COUNT (includes system headers for compiled langs)" - -# Baseline LOC from database metadata (most reliable single metric) -DB_LOC=$(grep '^baselineLinesOfCode:' "$DB_NAME/codeql-database.yml" \ - | awk '{print $2}') -echo "Baseline LoC: $DB_LOC" - -# Error ratio — use project file count for compiled langs, total for interpreted -if [ "$PROJECT_SRC_COUNT" -gt 0 ]; then - ERROR_RATIO=$(python3 -c "print(f'{$EXTRACTOR_ERRORS/$PROJECT_SRC_COUNT*100:.1f}%')") -else - ERROR_RATIO="N/A (no files)" -fi -echo "Error ratio: $ERROR_RATIO ($EXTRACTOR_ERRORS errors / $PROJECT_SRC_COUNT project files)" -``` - -### 4c. Log Assessment - -```bash -log_step "Quality assessment results" -log_result "Baseline LoC: $DB_LOC" -log_result "Project source files: $PROJECT_SRC_COUNT (expected: ~$EXPECTED)" -log_result "Total archive files: $SRC_FILE_COUNT (includes system headers for compiled langs)" -log_result "Extractor errors: $EXTRACTOR_ERRORS (ratio: $ERROR_RATIO)" -log_result "Finalized: $FINALIZED" - -# Sample extracted project files (exclude system paths) -unzip -Z1 "$DB_NAME/src.zip" 2>/dev/null \ - | grep -v -E '^(Library/|usr/|System/|opt/|Applications/)' \ - | head -20 >> "$LOG_FILE" -``` +## Steps 4-5: Assess and Improve Quality -### Quality Criteria - -| Metric | Source | Good | Poor | -|--------|--------|------|------| -| Baseline LoC | `print-baseline` / `baseline-info.json` | > 0, proportional to project size | 0 or far below expected | -| Project source files | `src.zip` (filtered) | Close to expected source file count | 0 or < 50% of expected | -| Extractor errors | `diagnostic/extractors/*.jsonl` | 0 or < 5% of project files | > 5% of project files | -| Finalized | `codeql-database.yml` | `true` | `false` (incomplete build) | -| Key directories | `src.zip` listing | Application code directories present | Missing `src/main`, `lib/`, `app/` etc. | -| "No source code seen" | build log | Absent | Present (cached build — compiled languages) | - -**Interpreting archive file counts for compiled languages:** C/C++ databases include system headers (e.g., ``, SDK headers) in `src.zip`. A project with 5 source files may have 100+ files in the archive. Always filter to project-relative paths when comparing against expected counts. Use `baselineLinesOfCode` as the primary quality indicator. - -**Interpreting baseline LoC:** A small number of extractor errors is normal and does not significantly impact analysis. However, if `baselineLinesOfCode` is 0 or the source archive contains no files, the database is empty — likely a cached build (compiled languages) or wrong `--source-root`. - ---- - -## Step 5: Improve Quality (if poor) - -Try these improvements, re-assess after each. **Log all improvements:** - -### 1. Adjust source root -```bash -log_step "Quality improvement: adjust source root" -NEW_ROOT="./src" # or detected subdirectory -# For interpreted: add --codescanning-config=codeql-config.yml -# For compiled: omit config flag -log_cmd "codeql database create $DB_NAME --language= --source-root=$NEW_ROOT --overwrite" -codeql database create $DB_NAME --language= --source-root=$NEW_ROOT --overwrite -log_result "Changed source-root to: $NEW_ROOT" -``` - -### 2. Fix "no source code seen" (cached build - compiled languages only) -```bash -log_step "Quality improvement: force rebuild (cached build detected)" -log_cmd "make clean && rebuild" -make clean && codeql database create $DB_NAME --language= --overwrite -log_result "Forced clean rebuild" -``` - -### 3. Install type stubs / dependencies - -> **Note:** These install into the *target project's* environment to improve CodeQL extraction quality. - -```bash -log_step "Quality improvement: install type stubs/additional deps" - -# Python type stubs — install into target project's environment -STUBS_INSTALLED="" -for stub in types-requests types-PyYAML types-redis; do - if pip install "$stub" 2>/dev/null; then - STUBS_INSTALLED="$STUBS_INSTALLED $stub" - fi -done -log_result "Installed type stubs:$STUBS_INSTALLED" - -# Additional project dependencies -log_cmd "pip install -e ." -pip install -e . 2>&1 | tee -a "$LOG_FILE" -``` - -### 4. Adjust extractor options -```bash -log_step "Quality improvement: adjust extractor options" - -# C/C++: Include headers -export CODEQL_EXTRACTOR_CPP_OPTION_TRAP_HEADERS=true -log_result "Set CODEQL_EXTRACTOR_CPP_OPTION_TRAP_HEADERS=true" - -# Java: Specific JDK version -export CODEQL_EXTRACTOR_JAVA_OPTION_JDK_VERSION=17 -log_result "Set CODEQL_EXTRACTOR_JAVA_OPTION_JDK_VERSION=17" - -# Then rebuild with current method -``` - -**After each improvement:** Re-assess quality. If no improvement possible, move to next build method. +Run quality checks and compare against expected source files. See [quality-assessment.md](../references/quality-assessment.md) for metric collection, quality criteria table, and improvement steps. --- ## Exit Conditions -**Success:** -- Quality assessment shows GOOD -- User accepts current database state +**Success:** Quality assessment shows GOOD or user accepts current state. **Failure (all methods exhausted):** ``` @@ -842,17 +244,15 @@ AskUserQuestion: "All build methods failed. Options:" ## Final Report -**Finalize the log file:** ```bash echo "=== Build Complete ===" >> "$LOG_FILE" echo "Finished: $(date -Iseconds)" >> "$LOG_FILE" echo "Final database: $DB_NAME" >> "$LOG_FILE" echo "Successful method: " >> "$LOG_FILE" -echo "Final command: " >> "$LOG_FILE" codeql resolve database -- "$DB_NAME" >> "$LOG_FILE" 2>&1 ``` -**Report to user:** +Report to user: ``` ## Database Build Complete @@ -866,57 +266,8 @@ codeql resolve database -- "$DB_NAME" >> "$LOG_FILE" 2>&1 - Coverage: ### Build Log: -See `$LOG_FILE` for complete details including: -- All attempted commands -- Fixes applied -- Quality assessments - -**Final command used:** - +See `$LOG_FILE` for complete details. +**Final command used:** **Ready for analysis.** ``` - ---- - -## Performance: Parallel Extraction - -Use `--threads` to parallelize database creation: - -```bash -# Compiled language (no exclusion config) -codeql database create $DB_NAME --language=cpp --threads=0 --command='make -j$(nproc)' - -# Interpreted language (with exclusion config) -codeql database create $DB_NAME --language=python --threads=0 \ - --codescanning-config=codeql-config.yml -``` - -**Note:** `--threads=0` auto-detects available cores. For shared machines, use explicit count. - ---- - -## Quick Reference - -| Language | Build System | Custom Command | -|----------|--------------|----------------| -| C/C++ | Make | `make clean && make -j$(nproc)` | -| C/C++ | CMake | `cmake -B build && cmake --build build` | -| Java | Gradle | `./gradlew clean build -x test` | -| Java | Maven | `mvn clean compile -DskipTests` | -| Rust | Cargo | `cargo clean && cargo build` | -| C# | .NET | `dotnet clean && dotnet build` | - -### macOS Apple Silicon (arm64e workaround) - -| Priority | Method | Command | -|----------|--------|---------| -| 1st | Homebrew clang + multi-step | `codeql database init` → `codeql database trace-command -- /opt/homebrew/opt/llvm/bin/clang -c file.c` (per file) → `codeql database finalize` | -| 2nd | Rosetta x86_64 | `arch -x86_64 codeql database create --command='make'` | -| 3rd | `build-mode=none` | `codeql database create --build-mode=none` (source-level only) | - -**Why:** CodeQL's `libtrace.dylib` has `x86_64`+`arm64` slices but Apple system tools are `arm64e`. macOS kills `arm64e` processes that load non-`arm64e` injected dylibs. - -**Key constraint:** Only trace `arm64` binaries (Homebrew tools). Never trace `arm64e` binaries (`/usr/bin/*`, `/bin/*`) — they will be killed with signal 9. - -See [language-details.md](../references/language-details.md) for more. diff --git a/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md b/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md index 98648ca..95d5ce7 100644 --- a/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md +++ b/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md @@ -29,29 +29,22 @@ TaskCreate: "Validate with re-analysis" (Step 5) - blockedBy: Step 4 Search the project for existing data extensions and model packs. -**1. In-repo model packs** — `qlpack.yml` or `codeql-pack.yml` with `dataExtensions`: - ```bash +# 1. In-repo model packs fd '(qlpack|codeql-pack)\.yml$' . --exclude codeql_*.db | while read -r f; do if grep -q 'dataExtensions' "$f"; then echo "MODEL PACK: $(dirname "$f") - $(grep '^name:' "$f")" fi done -``` - -**2. Standalone data extension files** — `.yml` files with `extensions:` key: -```bash +# 2. Standalone data extension files rg -l '^extensions:' --glob '*.yml' --glob '!codeql_*.db/**' | head -20 -``` -**3. Installed model packs:** - -```bash +# 3. Installed model packs codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' ``` -**If any found:** Report to user what was found and finish. These will be picked up by the run-analysis workflow's model pack detection (Step 2b). +**If any found:** Report to user and finish. These will be picked up by the run-analysis workflow. **If none found:** Proceed to Step 2. @@ -59,15 +52,13 @@ codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' ### Step 2: Query Known Sources and Sinks -Run custom QL queries against the database to enumerate all sources and sinks CodeQL currently recognizes. This gives a direct inventory of what is modeled vs. what is not. +Run custom QL queries against the database to enumerate all sources and sinks CodeQL currently recognizes. #### 2a: Select Database and Language ```bash DB_NAME=$(ls -dt codeql_*.db 2>/dev/null | head -1) LANG=$(codeql resolve database --format=json -- "$DB_NAME" | jq -r '.languages[0]') -echo "Database: $DB_NAME, Language: $LANG" - DIAG_DIR="${DB_NAME%.db}-diagnostics" mkdir -p "$DIAG_DIR" ``` @@ -78,141 +69,60 @@ Use the `Write` tool to create `$DIAG_DIR/list-sources.ql` using the source temp #### 2c: Write Sink Enumeration Query -Use the `Write` tool to create `$DIAG_DIR/list-sinks.ql` using the language-specific sink template from [diagnostic-query-templates.md](../references/diagnostic-query-templates.md#sink-enumeration-queries). The Concepts API differs significantly across languages — use the exact template for the detected language. +Use the `Write` tool to create `$DIAG_DIR/list-sinks.ql` using the language-specific sink template from [diagnostic-query-templates.md](../references/diagnostic-query-templates.md#sink-enumeration-queries). -**For Java:** Also create `$DIAG_DIR/qlpack.yml` with a `codeql/java-all` dependency and run `codeql pack install` before executing queries. See the Java section in the templates reference. +**For Java:** Also create `$DIAG_DIR/qlpack.yml` with a `codeql/java-all` dependency and run `codeql pack install` before executing queries. #### 2d: Run Queries ```bash -# Run sources query -codeql query run \ - --database="$DB_NAME" \ - --output="$DIAG_DIR/sources.bqrs" \ - -- "$DIAG_DIR/list-sources.ql" - -codeql bqrs decode \ - --format=csv \ - --output="$DIAG_DIR/sources.csv" \ - -- "$DIAG_DIR/sources.bqrs" - -# Run sinks query -codeql query run \ - --database="$DB_NAME" \ - --output="$DIAG_DIR/sinks.bqrs" \ - -- "$DIAG_DIR/list-sinks.ql" - -codeql bqrs decode \ - --format=csv \ - --output="$DIAG_DIR/sinks.csv" \ - -- "$DIAG_DIR/sinks.bqrs" -``` +codeql query run --database="$DB_NAME" --output="$DIAG_DIR/sources.bqrs" -- "$DIAG_DIR/list-sources.ql" +codeql bqrs decode --format=csv --output="$DIAG_DIR/sources.csv" -- "$DIAG_DIR/sources.bqrs" -#### 2e: Summarize Results - -```bash -echo "=== Known Sources ===" -wc -l < "$DIAG_DIR/sources.csv" -# Show unique source types -cut -d',' -f2 "$DIAG_DIR/sources.csv" | sort -u - -echo "=== Known Sinks ===" -wc -l < "$DIAG_DIR/sinks.csv" -# Show unique sink kinds -cut -d',' -f2 "$DIAG_DIR/sinks.csv" | sort -u +codeql query run --database="$DB_NAME" --output="$DIAG_DIR/sinks.bqrs" -- "$DIAG_DIR/list-sinks.ql" +codeql bqrs decode --format=csv --output="$DIAG_DIR/sinks.csv" -- "$DIAG_DIR/sinks.bqrs" ``` -Read both CSV files and present a summary to the user: +#### 2e: Summarize Results -``` -## CodeQL Known Models - -### Sources ( total): -- remote: (HTTP handlers, request parsing) -- local: (CLI args, file reads) -- ... - -### Sinks ( total): -- sql-execution: -- command-execution: -- file-access: -- ... -``` +Read both CSV files and present a summary showing source types and sink kinds with counts. --- ### Step 3: Identify Missing Sources and Sinks -This is the core analysis step. Cross-reference the project's API surface against CodeQL's known models. +Cross-reference the project's API surface against CodeQL's known models. #### 3a: Map the Project's API Surface -Read source code to identify security-relevant patterns. Look for: +Read source code to identify security-relevant patterns: | Pattern | What To Find | Likely Model Type | |---------|-------------|-------------------| -| HTTP/request handlers | Custom request parsing, parameter access | `sourceModel` (kind: `remote`) | -| Database layers | Custom ORM methods, raw query wrappers | `sinkModel` (kind: `sql-injection`) | +| HTTP/request handlers | Custom request parsing | `sourceModel` (kind: `remote`) | +| Database layers | Custom ORM, raw query wrappers | `sinkModel` (kind: `sql-injection`) | | Command execution | Shell wrappers, process spawners | `sinkModel` (kind: `command-injection`) | -| File operations | Custom file read/write, path construction | `sinkModel` (kind: `path-injection`) | +| File operations | Custom file read/write | `sinkModel` (kind: `path-injection`) | | Template rendering | HTML output, response builders | `sinkModel` (kind: `xss`) | -| Deserialization | Custom deserializers, data loaders | `sinkModel` (kind: `unsafe-deserialization`) | -| HTTP clients | URL construction, request builders | `sinkModel` (kind: `ssrf`) | -| Sanitizers | Input validation, escaping functions | `neutralModel` | +| Deserialization | Custom deserializers | `sinkModel` (kind: `unsafe-deserialization`) | +| HTTP clients | URL construction | `sinkModel` (kind: `ssrf`) | +| Sanitizers | Input validation, escaping | `neutralModel` | | Pass-through wrappers | Logging, caching, encoding | `summaryModel` (kind: `taint`) | -Use `Grep` to search for these patterns in source code: - -```bash -# Examples for Python - adapt patterns per language -rg -n 'def (get_param|get_header|get_body|parse_request)' --type py -rg -n '(execute|query|raw_sql|cursor\.)' --type py -rg -n '(subprocess|os\.system|popen|exec)' --type py -rg -n '(open|read_file|write_file|path\.join)' --type py -rg -n '(render|template|html)' --type py -rg -n '(requests\.|urlopen|fetch|http_client)' --type py -``` +Use `Grep` to search for these patterns in source code (adapt per language). #### 3b: Cross-Reference Against Known Sources and Sinks -For each API pattern found in 3a, check if it appears in the source/sink CSVs from Step 2: - -```bash -# Check if a specific file/function appears in known sources -grep -i "" "$DIAG_DIR/sources.csv" - -# Check if a specific file/function appears in known sinks -grep -i "" "$DIAG_DIR/sinks.csv" -``` +For each API pattern found, check if it appears in `sources.csv` or `sinks.csv` from Step 2. **An API is "missing" if:** - It handles user input but does not appear in `sources.csv` - It performs a dangerous operation but does not appear in `sinks.csv` -- It wraps/transforms tainted data but CodeQL has no summary model for it (these won't appear in either CSV — identify by reading the code for wrapper patterns around known sources/sinks) +- It wraps tainted data but has no summary model #### 3c: Report Gaps -Present findings to user: - -``` -## Data Flow Coverage Gaps - -### Missing Sources (user input not tracked): -- `myapp.http.Request.get_param()` — custom parameter access -- `myapp.auth.Token.decode()` — untrusted token data - -### Missing Sinks (dangerous operations not checked): -- `myapp.db.Connection.raw_query()` — SQL execution wrapper -- `myapp.shell.Runner.execute()` — command execution - -### Missing Summaries (taint lost through wrappers): -- `myapp.cache.Cache.get()` — taint not propagated through cache -- `myapp.utils.encode_json()` — taint lost in serialization - -Proceed to create data extension files? -``` - -Use `AskUserQuestion`: +Present findings and use `AskUserQuestion`: ``` header: "Extensions" @@ -226,10 +136,6 @@ options: description: "No extensions needed, proceed to analysis" ``` -**If "Skip":** Finish workflow. - -**If "Select individually":** Use `AskUserQuestion` with `multiSelect: true` listing each gap. - --- ### Step 4: Create Data Extension Files @@ -247,241 +153,38 @@ codeql-extensions/ summaries.yml # summaryModel and neutralModel entries ``` -#### YAML Format +#### YAML Format and Deployment -All extension files follow this structure: - -```yaml -extensions: - - addsTo: - pack: codeql/-all # Target library pack - extensible: # sourceModel, sinkModel, summaryModel, neutralModel - data: - - [] -``` - -#### Source Models - -Columns: `[package, type, subtypes, name, signature, ext, output, kind, provenance]` - -| Column | Description | Example | -|--------|-------------|---------| -| package | Module/package path | `myapp.auth` | -| type | Class or module name | `AuthManager` | -| subtypes | Include subclasses | `True` (Java: capitalized) / `true` (Python/JS/Go) | -| name | Method name | `get_token` | -| signature | Method signature (optional) | `""` (Python/JS), `"(String,int)"` (Java) | -| ext | Extension (optional) | `""` | -| output | What is tainted | `ReturnValue`, `Parameter[0]` (Java) / `Argument[0]` (Python/JS/Go) | -| kind | Source category | `remote`, `local`, `file`, `environment`, `database` | -| provenance | How model was created | `manual` | - -**Java-specific format differences:** -- **subtypes**: Use `True` / `False` (capitalized, Python-style), not `true` / `false` -- **output for parameters**: Use `Parameter[N]` (not `Argument[N]`) to mark method parameters as sources -- **signature**: Required for disambiguation — use Java type syntax: `"(String)"`, `"(String,int)"` -- **Parameter ranges**: Use `Parameter[0..2]` to mark multiple consecutive parameters - -Example (Python): - -```yaml -# codeql-extensions/sources.yml -extensions: - - addsTo: - pack: codeql/python-all - extensible: sourceModel - data: - - ["myapp.http", "Request", true, "get_param", "", "", "ReturnValue", "remote", "manual"] - - ["myapp.http", "Request", true, "get_header", "", "", "ReturnValue", "remote", "manual"] -``` - -Example (Java — note `True`, `Parameter[N]`, and signature): - -```yaml -# codeql-extensions/sources.yml -extensions: - - addsTo: - pack: codeql/java-all - extensible: sourceModel - data: - - ["com.myapp.controller", "ApiController", True, "search", "(String)", "", "Parameter[0]", "remote", "manual"] - - ["com.myapp.service", "FileService", True, "upload", "(String,String)", "", "Parameter[0..1]", "remote", "manual"] -``` - -#### Sink Models - -Columns: `[package, type, subtypes, name, signature, ext, input, kind, provenance]` - -Note: column 7 is `input` (which argument receives tainted data), not `output`. - -| Kind | Vulnerability | -|------|---------------| -| `sql-injection` | SQL injection | -| `command-injection` | Command injection | -| `path-injection` | Path traversal | -| `xss` | Cross-site scripting | -| `code-injection` | Code injection | -| `ssrf` | Server-side request forgery | -| `unsafe-deserialization` | Insecure deserialization | - -Example (Python): - -```yaml -# codeql-extensions/sinks.yml -extensions: - - addsTo: - pack: codeql/python-all - extensible: sinkModel - data: - - ["myapp.db", "Connection", true, "raw_query", "", "", "Argument[0]", "sql-injection", "manual"] - - ["myapp.shell", "Runner", false, "execute", "", "", "Argument[0]", "command-injection", "manual"] -``` - -Example (Java — note `True` and `Argument[N]` for sink input): - -```yaml -extensions: - - addsTo: - pack: codeql/java-all - extensible: sinkModel - data: - - ["com.myapp.db", "QueryRunner", True, "execute", "(String)", "", "Argument[0]", "sql-injection", "manual"] -``` - -#### Summary Models - -Columns: `[package, type, subtypes, name, signature, ext, input, output, kind, provenance]` - -| Kind | Description | -|------|-------------| -| `taint` | Data flows through, still tainted | -| `value` | Data flows through, exact value preserved | - -Example: - -```yaml -# codeql-extensions/summaries.yml -extensions: - # Pass-through: taint propagates - - addsTo: - pack: codeql/python-all - extensible: summaryModel - data: - - ["myapp.cache", "Cache", true, "get", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] - - ["myapp.utils", "JSON", false, "parse", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] - - # Sanitizer: taint blocked - - addsTo: - pack: codeql/python-all - extensible: neutralModel - data: - - ["myapp.security", "Sanitizer", "escape_html", "", "summary", "manual"] -``` - -**`neutralModel` vs no model:** If a function has no model at all, CodeQL may still infer flow through it. Use `neutralModel` to explicitly block taint propagation through known-safe functions. - -#### Language-Specific Notes - -**Python:** Use dotted module paths for `package` (e.g., `myapp.db`). - -**JavaScript:** `package` is often `""` for project-local code. Use the import path for npm packages. - -**Go:** Use full import paths (e.g., `myapp/internal/db`). `type` is often `""` for package-level functions. - -**Java:** Use fully qualified package names (e.g., `com.myapp.db`). - -**C/C++:** Use `""` for package, put the namespace in `type`. - -#### Write the Files +See [extension-yaml-format.md](../references/extension-yaml-format.md) for column definitions, per-language examples (Python, Java, JS, Go, C/C++), and the deployment workaround for pre-compiled query packs. Use the `Write` tool to create each file. Only create files that have entries — skip empty categories. -#### Deploy the Extensions - -**Known limitation:** `--additional-packs` and `--model-packs` flags do not work with pre-compiled query packs (bundled CodeQL distributions that cache `java-all` inside `.codeql/libraries/`). Extensions placed in a standalone model pack directory will be resolved by `codeql resolve qlpacks` but silently ignored during `codeql database analyze`. - -**Workaround — copy extensions into the library pack's `ext/` directory:** - -> **Warning:** Files copied into the `ext/` directory live inside CodeQL's managed pack cache. They will be **lost** when packs are updated via `codeql pack download` or version upgrades. After any pack update, re-run this deployment step to restore the extensions. - -```bash -# Find the java-all ext directory used by the query pack -JAVA_ALL_EXT=$(find "$(codeql resolve qlpacks 2>/dev/null | grep 'java-queries' | awk '{print $NF}' | tr -d '()')" \ - -path '*/.codeql/libraries/codeql/java-all/*/ext' -type d 2>/dev/null | head -1) - -if [ -n "$JAVA_ALL_EXT" ]; then - PROJECT_NAME=$(basename "$(pwd)") - cp codeql-extensions/sources.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sources.model.yml" - [ -f codeql-extensions/sinks.yml ] && cp codeql-extensions/sinks.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sinks.model.yml" - [ -f codeql-extensions/summaries.yml ] && cp codeql-extensions/summaries.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.summaries.model.yml" - - # Verify deployment — confirm files landed correctly - DEPLOYED=$(ls "$JAVA_ALL_EXT/${PROJECT_NAME}".*.model.yml 2>/dev/null | wc -l) - if [ "$DEPLOYED" -gt 0 ]; then - echo "Extensions deployed to $JAVA_ALL_EXT ($DEPLOYED files):" - ls -la "$JAVA_ALL_EXT/${PROJECT_NAME}".*.model.yml - else - echo "ERROR: Files were copied but verification failed. Check path: $JAVA_ALL_EXT" - fi -else - echo "WARNING: Could not find java-all ext directory. Extensions may not load." - echo "Attempted path lookup from: codeql resolve qlpacks | grep java-queries" - echo "Run 'codeql resolve qlpacks' manually to debug." -fi -``` - -**For Python/JS/Go:** The same limitation may apply. Locate the `-all` pack's `ext/` directory and copy extensions there. - -**Alternative (if query packs are NOT pre-compiled):** Use `--additional-packs=./codeql-extensions` with a proper model pack `qlpack.yml`: - -```yaml -# codeql-extensions/qlpack.yml -name: custom/-extensions -version: 0.0.1 -library: true -extensionTargets: - codeql/-all: "*" -dataExtensions: - - sources.yml - - sinks.yml - - summaries.yml -``` - --- ### Step 5: Validate with Re-Analysis -Run a full security analysis with and without extensions to measure the finding delta. This is more reliable than re-running source/sink enumeration queries, which may not reflect the `sourceModel` extensible being used by taint-tracking queries. +Run a full security analysis with and without extensions to measure the finding delta. #### 5a: Run Baseline Analysis (without extensions) ```bash RESULTS_DIR="${DB_NAME%.db}-results" mkdir -p "$RESULTS_DIR" - -# Baseline run (or skip if already run in a previous step) codeql database analyze "$DB_NAME" \ - --format=sarif-latest \ - --output="$RESULTS_DIR/baseline.sarif" \ - --threads=0 \ + --format=sarif-latest --output="$RESULTS_DIR/baseline.sarif" --threads=0 \ -- codeql/-queries:codeql-suites/-security-extended.qls ``` #### 5b: Run Analysis with Extensions ```bash -# Clean cache to force re-evaluation codeql database cleanup "$DB_NAME" - codeql database analyze "$DB_NAME" \ - --format=sarif-latest \ - --output="$RESULTS_DIR/with-extensions.sarif" \ - --threads=0 \ - --rerun \ + --format=sarif-latest --output="$RESULTS_DIR/with-extensions.sarif" --threads=0 --rerun \ -- codeql/-queries:codeql-suites/-security-extended.qls ``` -Use `-vvv` flag to verify extensions are being loaded — look for `Loading data extensions in ... .yml` in stderr. +Use `-vvv` flag to verify extensions are being loaded. #### 5c: Compare Findings @@ -491,18 +194,7 @@ WITH_EXT=$(python3 -c "import json; print(sum(len(r.get('results',[])) for r in echo "Findings: $BASELINE → $WITH_EXT (+$((WITH_EXT - BASELINE)))" ``` -**If counts did not increase:** The extension YAML may have syntax errors or column values that don't match the code. Check: - -| Issue | Solution | -|-------|----------| -| Extension not loaded | Run with `-vvv` and grep for your extension filename in output | -| Pre-compiled pack ignores extensions | Use the `ext/` directory workaround above | -| Java: No new findings | Verify `True`/`False` (capitalized) for subtypes, `Parameter[N]` for sources | -| No new sources/sinks | Verify column values match actual code signatures exactly | -| Type not found | Use exact type name as it appears in CodeQL database | -| Wrong argument index | Arguments are 0-indexed; `self` is `Argument[self]` (Python), `Parameter[0]` (Java) | - -Fix the extension files, re-deploy to `ext/`, and re-run 5b until counts increase. +**If counts did not increase:** Check extension loading (`-vvv`), pre-compiled pack workaround, Java `True`/`False` capitalization, column value accuracy. --- diff --git a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md index a27226f..e309b02 100644 --- a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md +++ b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md @@ -4,29 +4,14 @@ Execute CodeQL security queries on an existing database with ruleset selection a ## Scan Modes -Two modes control analysis scope. Select mode in Step 2 (before pack selection). +Two modes control analysis scope. Both use all installed packs — the difference is filtering. -| Mode | Packs | Filtering | -|------|-------|-----------| -| **Run all** | All installed packs (official + Trail of Bits + Community) | Uses `security-and-quality` suite for official pack; third-party packs run via custom suite without precision filtering | -| **Important only** | All installed packs (official + Trail of Bits + Community) | Custom suite: security-only, medium-high precision, with security-severity threshold for medium precision | +| Mode | Description | Suite Reference | +|------|-------------|-----------------| +| **Run all** | All queries from all installed packs via `security-and-quality` suite | [run-all-suite.md](../references/run-all-suite.md) | +| **Important only** | Security queries filtered by precision and security-severity threshold | [important-only-suite.md](../references/important-only-suite.md) | -**Run all** generates a custom `.qls` suite that references the official `security-and-quality` suite and loads all third-party packs with only `kind: problem/path-problem` filtering (no precision or severity restrictions). See [run-all-suite.md](../references/run-all-suite.md) for the suite template. - -> **WARNING:** Do NOT pass pack names directly to `codeql database analyze` (e.g., `-- codeql/cpp-queries`). Each pack has a `defaultSuiteFile` in its `qlpack.yml` (typically `code-scanning.qls`) that applies strict filters — this silently drops queries and can produce zero results. Always use an explicit suite reference. - -**Important only** generates a custom `.qls` query suite at runtime that loads all installed packs and applies uniform filtering. See [important-only-suite.md](../references/important-only-suite.md) for the suite template and generation script. - -| Metadata | Important-only criteria | -|---|---| -| `@tags` | Must contain `security` (excludes correctness, maintainability, readability) | -| `@precision` high/very-high | Included at any `@problem.severity` | -| `@precision` medium | Included only if `@security-severity` >= 6.0 (checked post-analysis; suite includes all medium-precision security queries, low-severity ones are filtered from results) | -| `@precision` low | Excluded | -| Experimental | Included (both modes run experimental queries) | -| Diagnostic / metric | Excluded (both modes skip non-alert queries) | - -Third-party queries without `@precision` or `@tags security` metadata are excluded — if a query doesn't declare its confidence, we cannot assess it for important-only mode. +> **WARNING:** Do NOT pass pack names directly to `codeql database analyze` (e.g., `-- codeql/cpp-queries`). Each pack's `defaultSuiteFile` silently applies strict filters and can produce zero results. Always use an explicit suite reference. --- @@ -49,7 +34,7 @@ TaskCreate: "Process and report results" (Step 5) - blockedBy: Step 4 | Step 2 | **SOFT GATE** | User selects mode; confirms installed/ignored for each missing pack | | Step 3 | **SOFT GATE** | User approves query packs, model packs, and threat model selection | -**Auto-skip rule:** If the user already specified a choice in the invocation arguments or conversation prompt, skip the corresponding `AskUserQuestion` and use the provided value directly. For example, if the user said "run important only mode", skip the scan mode selection in Step 2a. If the user said "use all packs" or "skip extensions", skip the corresponding gates in Step 3. Only prompt for information not already provided. +**Auto-skip rule:** If the user already specified a choice in the invocation, skip the corresponding `AskUserQuestion` and use the provided value directly. --- @@ -57,59 +42,14 @@ TaskCreate: "Process and report results" (Step 5) - blockedBy: Step 4 ### Step 1: Select Database and Detect Language -**Find available databases:** - ```bash -# List all CodeQL databases -ls -dt codeql_*.db 2>/dev/null | head -10 - -# Get the most recent database -get_latest_db() { - ls -dt codeql_*.db 2>/dev/null | head -1 -} - -DB_NAME=$(get_latest_db) -if [[ -z "$DB_NAME" ]]; then - echo "ERROR: No CodeQL database found. Run build-database workflow first." - exit 1 -fi -echo "Using database: $DB_NAME" +DB_NAME=$(ls -dt codeql_*.db 2>/dev/null | head -1) +[[ -z "$DB_NAME" ]] && echo "ERROR: No CodeQL database found." && exit 1 +LANG=$(codeql resolve database --format=json -- "$DB_NAME" | jq -r '.languages[0]') +echo "Using: $DB_NAME (language: $LANG)" ``` -**If multiple databases exist**, use `AskUserQuestion` to let user select: - -``` -header: "Database" -question: "Multiple databases found. Which one to analyze?" -options: - - label: "codeql_3.db (latest)" - description: "Created: " - - label: "codeql_2.db" - description: "Created: " - - label: "codeql_1.db" - description: "Created: " -``` - -**Verify and detect language:** - -```bash -# Check database exists and get language(s) -codeql resolve database -- "$DB_NAME" - -# Get primary language from database -LANG=$(codeql resolve database --format=json -- "$DB_NAME" \ - | jq -r '.languages[0]') -LANG_COUNT=$(codeql resolve database --format=json -- "$DB_NAME" \ - | jq '.languages | length') -echo "Primary language: $LANG" -if [ "$LANG_COUNT" -gt 1 ]; then - echo "WARNING: Multi-language database ($LANG_COUNT languages)" - codeql resolve database --format=json -- "$DB_NAME" \ - | jq -r '.languages[]' -fi -``` - -**Multi-language databases:** If more than one language is detected, ask the user which language to analyze or run separate analyses for each. +If multiple databases exist, use `AskUserQuestion` to let user select. If multi-language database, ask which language to analyze. --- @@ -117,284 +57,109 @@ fi #### 2a: Select Scan Mode -**Skip if the user already specified a scan mode** (e.g., "important only", "run all", "full scan") in the invocation arguments or prompt. Use the provided value directly. - -Otherwise, use `AskUserQuestion`: +**Skip if user already specified.** Otherwise use `AskUserQuestion`: ``` header: "Scan Mode" question: "Which scan mode should be used?" -multiSelect: false options: - label: "Run all (Recommended)" - description: "Maximum coverage — all queries from all installed packs via security-and-quality suite" + description: "Maximum coverage — all queries from all installed packs" - label: "Important only" - description: "Security vulnerabilities only — all packs filtered by custom suite (medium-high precision, security-severity threshold)" + description: "Security vulnerabilities only — medium-high precision, security-severity threshold" ``` -Record the selected mode. It affects Steps 3 and 4. - -In both modes, check and install third-party packs below. Both modes use all installed packs — the difference is whether filtering is applied. - #### 2b: Query Packs -**Available packs by language** (see [ruleset-catalog.md](../references/ruleset-catalog.md)): +For each pack available for the detected language (see [ruleset-catalog.md](../references/ruleset-catalog.md)): | Language | Trail of Bits | Community Pack | |----------|---------------|----------------| | C/C++ | `trailofbits/cpp-queries` | `GitHubSecurityLab/CodeQL-Community-Packs-CPP` | | Go | `trailofbits/go-queries` | `GitHubSecurityLab/CodeQL-Community-Packs-Go` | | Java | `trailofbits/java-queries` | `GitHubSecurityLab/CodeQL-Community-Packs-Java` | -| JavaScript | - | `GitHubSecurityLab/CodeQL-Community-Packs-JavaScript` | -| Python | - | `GitHubSecurityLab/CodeQL-Community-Packs-Python` | -| C# | - | `GitHubSecurityLab/CodeQL-Community-Packs-CSharp` | -| Ruby | - | `GitHubSecurityLab/CodeQL-Community-Packs-Ruby` | +| JavaScript | — | `GitHubSecurityLab/CodeQL-Community-Packs-JavaScript` | +| Python | — | `GitHubSecurityLab/CodeQL-Community-Packs-Python` | +| C# | — | `GitHubSecurityLab/CodeQL-Community-Packs-CSharp` | +| Ruby | — | `GitHubSecurityLab/CodeQL-Community-Packs-Ruby` | -**For each pack available for the detected language:** - -```bash -# Check if pack is installed -codeql resolve qlpacks | grep -i "" -``` - -**If NOT installed**, use `AskUserQuestion`: - -``` -header: "" -question: " for is not installed. Install it?" -options: - - label: "Install (Recommended)" - description: "Run: codeql pack download " - - label: "Ignore" - description: "Skip this pack for this analysis" -``` - -**On "Install":** -```bash -codeql pack download -``` - -**On "Ignore":** Mark pack as skipped, continue to next pack. +Check if installed (`codeql resolve qlpacks | grep -i ""`). If not, ask user to install or ignore. #### 2c: Detect Model Packs -Model packs contain data extensions (custom sources, sinks, flow summaries) that improve CodeQL's data flow analysis for project-specific or framework-specific APIs. To create new extensions, run the [create-data-extensions](create-data-extensions.md) workflow first. - -**Search three locations:** - -**1. In-repo model packs** — `qlpack.yml` or `codeql-pack.yml` with `dataExtensions`: - -```bash -# Find CodeQL pack definitions in the codebase -fd '(qlpack|codeql-pack)\.yml$' . --exclude codeql_*.db | while read -r f; do - if grep -q 'dataExtensions' "$f"; then - echo "MODEL PACK: $(dirname "$f") - $(grep '^name:' "$f")" - fi -done -``` +Search three locations for data extension model packs: +1. **In-repo model packs** — `qlpack.yml`/`codeql-pack.yml` with `dataExtensions` +2. **In-repo standalone data extensions** — `.yml` files with `extensions:` key +3. **Installed model packs** — resolved by CodeQL -**2. In-repo standalone data extensions** — `.yml` files with `extensions:` key (auto-discovered by CodeQL): - -```bash -# Find data extension YAML files in the codebase -rg -l '^extensions:' --glob '*.yml' --glob '!codeql_*.db/**' | head -20 -``` - -**3. Installed model packs** — library packs resolved by CodeQL that contain models: - -```bash -# List all resolved packs and filter for model/library packs -# Model packs typically have "model" in the name or are library packs -codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' -``` - -**Record all detected model packs for presentation in Step 3.** If no model packs are found, note this and proceed — model packs are optional. Model packs are included in both scan modes since they improve data flow analysis quality without adding noise. +Record all detected packs for Step 3. --- ### Step 3: Select Query Packs and Model Packs > **CHECKPOINT** — Present available packs to user for confirmation. -> **Skip if the user already specified pack preferences** in the invocation (e.g., "use all packs", "skip extensions"). Use the provided values directly. +> **Skip if user already specified pack preferences.** #### 3a: Confirm Query Packs -**If scan mode is "Important only":** All installed packs will be included with metadata filtering via a custom query suite. Inform the user: +**Important-only mode:** Inform user all installed packs included with filtering. Proceed to 3b. -``` -**Scan mode: Important only** -All installed packs included, filtered by custom query suite: -- Official: codeql/-queries (security queries, medium-high precision) -- Trail of Bits: trailofbits/-queries [if installed] -- Community: GitHubSecurityLab/CodeQL-Community-Packs- [if installed] - -Filtering: security tag required, high/very-high precision (any severity), -medium precision (error severity only). Experimental queries included. -Third-party queries without @precision or @tags metadata are excluded. -``` - -See [important-only-suite.md](../references/important-only-suite.md) for the suite template. - -Proceed directly to 3b (model packs). - -**If scan mode is "Run all":** All installed packs run without query suite filtering. Use `AskUserQuestion` to confirm: - -``` -header: "Query Packs" -question: "All installed query packs will run unfiltered. Confirm or select individually:" -multiSelect: false -options: - - label: "Use all (Recommended)" - description: "Run all queries from all installed packs — maximum coverage" - - label: "Select individually" - description: "Choose specific packs from the full list" -``` - -**If "Use all":** Include all installed packs: official `codeql/-queries` + Trail of Bits + Community Packs. No suite filtering — every query runs. - -**If "Select individually":** Follow up with a `multiSelect: true` question listing all installed packs: - -``` -header: "Query Packs" -question: "Select query packs to run:" -multiSelect: true -options: - - label: "codeql/-queries" - description: "Official CodeQL queries (all queries, no suite filtering)" - - label: "Trail of Bits" - description: "trailofbits/-queries - Memory safety, domain expertise" - - label: "Community Packs" - description: "GitHubSecurityLab/CodeQL-Community-Packs- - Additional security queries" -``` - -**Only show packs that are installed (from Step 2b)** - -**⛔ STOP: Await user selection** +**Run-all mode:** Use `AskUserQuestion` to confirm "Use all" or "Select individually". #### 3b: Select Model Packs (if any detected) -**Skip this sub-step if no model packs were detected in Step 2c.** - -Present detected model packs from Step 2c. Categorize by source: - -Use `AskUserQuestion` tool: - -``` -header: "Model Packs" -question: "Model packs add custom data flow models (sources, sinks, summaries). Select which to include:" -multiSelect: false -options: - - label: "Use all (Recommended)" - description: "Include all detected model packs and data extensions" - - label: "Select individually" - description: "Choose specific model packs from the list" - - label: "Skip" - description: "Run without model packs" -``` - -**If "Use all":** Include all model packs and data extensions detected in Step 2c. - -**If "Select individually":** Follow up with a `multiSelect: true` question: +**Skip if no model packs detected in Step 2c.** -``` -header: "Model Packs" -question: "Select model packs to include:" -multiSelect: true -options: - # For each in-repo model pack found in 2c: - - label: "" - description: "In-repo model pack at - custom data flow models" - # For each standalone data extension found in 2c: - - label: "In-repo extensions" - description: " data extension files found in codebase (auto-discovered)" - # For each installed model pack found in 2c: - - label: "" - description: "Installed model pack - " -``` +Use `AskUserQuestion`: "Use all (Recommended)" / "Select individually" / "Skip". **Notes:** -- In-repo standalone data extensions (`.yml` files with `extensions:` key) are auto-discovered by CodeQL during analysis — selecting them here ensures the source directory is passed via `--additional-packs` -- In-repo model packs (with `qlpack.yml`) need their parent directory passed via `--additional-packs` -- Installed model packs are passed via `--model-packs` +- In-repo standalone extensions (`.yml`) are auto-discovered — pass source directory via `--additional-packs` +- In-repo model packs (with `qlpack.yml`) need parent directory via `--additional-packs` +- Installed model packs use `--model-packs` -**⛔ STOP: Await user selection** - ---- +#### 3c: Select Threat Models -### Step 3c: Select Threat Models - -Threat models control which input sources CodeQL treats as tainted. The default (`remote`) covers HTTP/network input only. Expanding the threat model finds more vulnerabilities but may increase false positives. See [threat-models.md](../references/threat-models.md) for details on each model. +Threat models control which input sources CodeQL treats as tainted. See [threat-models.md](../references/threat-models.md). Use `AskUserQuestion`: ``` header: "Threat Models" question: "Which input sources should CodeQL treat as tainted?" -multiSelect: false options: - label: "Remote only (Recommended)" - description: "Default — HTTP requests, network input. Best for web services and APIs." + description: "Default — HTTP requests, network input" - label: "Remote + Local" - description: "Add CLI args, local files. Use for CLI tools or desktop apps." + description: "Add CLI args, local files" - label: "All sources" - description: "Remote, local, environment, database, file. Maximum coverage, more noise." + description: "Remote, local, environment, database, file" - label: "Custom" description: "Select specific threat models individually" ``` -**If "Custom":** Follow up with `multiSelect: true`: - -``` -header: "Threat Models" -question: "Select threat models to enable:" -multiSelect: true -options: - - label: "remote" - description: "HTTP requests, network input (always included)" - - label: "local" - description: "CLI args, local files — for CLI tools, batch processors" - - label: "environment" - description: "Environment variables — for 12-factor/container apps" - - label: "database" - description: "Database results — for second-order injection audits" -``` - -**Build the threat model flag:** - -```bash -# Only add --threat-model when non-default models are selected -# Default (remote only) needs no flag -# NOTE: The flag is --threat-model (singular), NOT --threat-models -THREAT_MODEL_FLAG="" -# Examples: -# THREAT_MODEL_FLAG="--threat-model local" # adds local group -# THREAT_MODEL_FLAG="--threat-model local --threat-model file" # adds local + file -# THREAT_MODEL_FLAG="--threat-model all" # enables everything -``` +Build the flag: `THREAT_MODEL_FLAG=""` (remote only needs no flag), `--threat-model local`, etc. --- ### Step 4: Execute Analysis -Run analysis using the approach determined by scan mode. +#### Generate custom suite -#### Important-only mode: Generate custom suite +**Important-only mode:** Generate the custom `.qls` suite using the template and script in [important-only-suite.md](../references/important-only-suite.md). -Generate the custom `.qls` suite file that includes all installed packs with filtering. See [important-only-suite.md](../references/important-only-suite.md) for the full template and generation script. +**Run-all mode:** Generate the custom `.qls` suite using the template in [run-all-suite.md](../references/run-all-suite.md). ```bash RESULTS_DIR="${DB_NAME%.db}-results" mkdir -p "$RESULTS_DIR" -SUITE_FILE="$RESULTS_DIR/important-only.qls" - -# Generate suite — see important-only-suite.md for complete script -# The suite loads all installed packs and applies security+precision filtering +SUITE_FILE="$RESULTS_DIR/.qls" # Verify suite resolves correctly before running codeql resolve queries "$SUITE_FILE" | wc -l ``` -Then run analysis with the generated suite: +#### Run analysis ```bash codeql database analyze $DB_NAME \ @@ -407,169 +172,25 @@ codeql database analyze $DB_NAME \ -- "$SUITE_FILE" ``` -#### Run-all mode: Generate custom suite with explicit suite references - -> **WARNING:** Do NOT pass pack names directly (e.g., `-- codeql/cpp-queries`). Each pack has a `defaultSuiteFile` (typically `code-scanning.qls`) that silently applies strict precision/severity filters, dropping many queries. Always use explicit suite references. - -Generate a custom `.qls` suite that references the official `security-and-quality` suite (which includes all security + code quality queries) and loads third-party packs with minimal filtering: - -```bash -RESULTS_DIR="${DB_NAME%.db}-results" -mkdir -p "$RESULTS_DIR" -SUITE_FILE="$RESULTS_DIR/run-all.qls" - -# Generate the run-all suite -cat > "$SUITE_FILE" << HEADER -- description: Run-all — all security and quality queries from all installed packs -HEADER - -# Official pack: use security-and-quality suite (broadest built-in suite) -echo "- import: codeql-suites/${LANG}-security-and-quality.qls - from: codeql/${LANG}-queries" >> "$SUITE_FILE" - -# Third-party packs: include all problem/path-problem queries (no precision filter) -for PACK in $INSTALLED_THIRD_PARTY_PACKS; do - echo "- queries: . - from: ${PACK}" >> "$SUITE_FILE" -done - -# Minimal filtering — only select alert-type queries and exclude deprecated -cat >> "$SUITE_FILE" << 'FILTERS' -- include: - kind: - - problem - - path-problem -- exclude: - deprecated: // -- exclude: - tags contain: - - modeleditor - - modelgenerator -FILTERS - -echo "Suite generated: $SUITE_FILE" -codeql resolve queries "$SUITE_FILE" | wc -l - -# Build model pack flags from user selections in Step 3b -# --model-packs for installed model packs -# --additional-packs for in-repo model packs and data extensions -MODEL_PACK_FLAGS="" -ADDITIONAL_PACK_FLAGS="" - -# Threat model flag from Step 3c (empty string if default/remote-only) -# THREAT_MODEL_FLAG="" - -codeql database analyze $DB_NAME \ - --format=sarif-latest \ - --output="$RESULTS_DIR/results.sarif" \ - --threads=0 \ - $THREAT_MODEL_FLAG \ - $MODEL_PACK_FLAGS \ - $ADDITIONAL_PACK_FLAGS \ - -- "$SUITE_FILE" -``` - **Flag reference for model packs:** | Source | Flag | Example | |--------|------|---------| | Installed model packs | `--model-packs` | `--model-packs=myorg/java-models` | -| In-repo model packs (with `qlpack.yml`) | `--additional-packs` | `--additional-packs=./lib/codeql-models` | -| In-repo standalone extensions (`.yml`) | `--additional-packs` | `--additional-packs=.` | +| In-repo model packs | `--additional-packs` | `--additional-packs=./lib/codeql-models` | +| In-repo standalone extensions | `--additional-packs` | `--additional-packs=.` | -**Example (C++ run-all mode):** +### Performance -```bash -codeql database analyze codeql_1.db \ - --format=sarif-latest \ - --output=codeql_1-results/results.sarif \ - --threads=0 \ - --additional-packs=./codeql-models \ - -- codeql_1-results/run-all.qls -``` - -**Example (Python important-only mode with custom suite):** +If codebase is large, read [performance-tuning.md](../references/performance-tuning.md) and apply relevant optimizations. -```bash -codeql database analyze codeql_1.db \ - --format=sarif-latest \ - --output=codeql_1-results/results.sarif \ - --threads=0 \ - --model-packs=myorg/python-models \ - -- codeql_1-results/important-only.qls -``` - -### Performance Flags - -If codebase is large then read [../references/performance-tuning.md](../references/performance-tuning.md) and apply relevant optimizations. +--- ### Step 5: Process and Report Results -> **SARIF structure note:** `security-severity` and `level` are stored on rule definitions (`.runs[].tool.driver.rules[]`), NOT on individual result objects. Results reference rules by `ruleIndex`. The jq commands below join results with their rule metadata. - -**Count findings:** +Process the SARIF output using the jq commands in [sarif-processing.md](../references/sarif-processing.md): count findings, summarize by level, summarize by security severity, summarize by rule. -```bash -jq '.runs[].results | length' "$RESULTS_DIR/results.sarif" -``` - -**Summary by SARIF level:** - -```bash -jq -r ' - .runs[] | - . as $run | - .results[] | - ($run.tool.driver.rules[.ruleIndex].defaultConfiguration.level // "unknown") -' "$RESULTS_DIR/results.sarif" \ - | sort | uniq -c | sort -rn -``` - -**Summary by security severity** (more useful for triage): - -```bash -jq -r ' - .runs[] | - . as $run | - .results[] | - ($run.tool.driver.rules[.ruleIndex].properties["security-severity"] // "none") + " | " + - .ruleId + " | " + - (.locations[0].physicalLocation.artifactLocation.uri // "?") + ":" + - ((.locations[0].physicalLocation.region.startLine // 0) | tostring) + " | " + - (.message.text // "no message" | .[0:80]) -' "$RESULTS_DIR/results.sarif" | sort -rn | head -20 -``` - -**Summary by rule:** - -```bash -jq -r '.runs[].results[] | .ruleId' "$RESULTS_DIR/results.sarif" \ - | sort | uniq -c | sort -rn -``` - -**Important-only post-filter:** If scan mode is "important only", filter out medium-precision results with `security-severity` < 6.0 from the report. The suite includes all medium-precision security queries to let CodeQL evaluate them, but low-severity medium-precision findings are noise: - -```bash -# Filter important-only results: drop medium-precision findings with security-severity < 6.0 -# Medium-precision queries without a security-severity score default to 0.0 (excluded). -# Non-medium queries are always kept regardless of security-severity. -jq ' - .runs[] |= ( - . as $run | - .results = [ - .results[] | - ($run.tool.driver.rules[.ruleIndex].properties.precision // "unknown") as $prec | - ($run.tool.driver.rules[.ruleIndex].properties["security-severity"] // null) as $raw_sev | - (if $prec == "medium" then ($raw_sev // "0" | tonumber) else 10 end) as $sev | - select( - ($prec == "high") or ($prec == "very-high") or ($prec == "unknown") or - ($prec == "medium" and $sev >= 6.0) - ) - ] - ) -' "$RESULTS_DIR/results.sarif" > "$RESULTS_DIR/results-filtered.sarif" -mv "$RESULTS_DIR/results-filtered.sarif" "$RESULTS_DIR/results.sarif" -``` +**Important-only mode:** Apply the post-analysis filter from [sarif-processing.md](../references/sarif-processing.md#important-only-post-filter) to remove medium-precision results with `security-severity` < 6.0. --- From 88c66315a1712ccfc3e486280b57e7e4c342bf51 Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Mon, 23 Feb 2026 12:14:53 +0100 Subject: [PATCH 7/8] codeql - workflow plugin improvements 2 --- plugins/static-analysis/skills/codeql/SKILL.md | 3 +++ .../skills/codeql/workflows/build-database.md | 13 +++++++++++++ .../codeql/workflows/create-data-extensions.md | 15 +++++++++++++++ .../skills/codeql/workflows/run-analysis.md | 15 +++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/plugins/static-analysis/skills/codeql/SKILL.md b/plugins/static-analysis/skills/codeql/SKILL.md index 80a2d82..ae4c495 100644 --- a/plugins/static-analysis/skills/codeql/SKILL.md +++ b/plugins/static-analysis/skills/codeql/SKILL.md @@ -10,6 +10,7 @@ allowed-tools: - Bash - Read - Write + - Edit - Glob - Grep - AskUserQuestion @@ -17,6 +18,8 @@ allowed-tools: - TaskList - TaskUpdate - TaskGet + - TodoRead + - TodoWrite --- # CodeQL Analysis diff --git a/plugins/static-analysis/skills/codeql/workflows/build-database.md b/plugins/static-analysis/skills/codeql/workflows/build-database.md index 85114e8..dbf25df 100644 --- a/plugins/static-analysis/skills/codeql/workflows/build-database.md +++ b/plugins/static-analysis/skills/codeql/workflows/build-database.md @@ -81,6 +81,9 @@ log_result() { echo "[$(date -Iseconds)] RESULT: $1" >> "$LOG_FILE"; echo "" >> ## Step 1: Detect Language and Configure +**Entry:** CodeQL CLI installed and on PATH (`codeql --version` succeeds) +**Exit:** `LANG` variable set to a valid CodeQL language identifier; exclusion config created (interpreted) or skipped (compiled) + ### 1a. Detect Language ```bash @@ -111,6 +114,9 @@ Scan for irrelevant directories and create `codeql-config.yml` with `paths-ignor ## Step 2: Build Database +**Entry:** Step 1 complete (`LANG` set, `DB_NAME` assigned, log file initialized) +**Exit:** `codeql resolve database -- "$DB_NAME"` succeeds (database exists and is valid) + ### For Interpreted Languages ```bash @@ -218,12 +224,19 @@ $CMD 2>&1 | tee -a "$LOG_FILE" ## Step 3: Apply Fixes (if build failed) +**Entry:** Step 2 build method failed (non-zero exit or `codeql resolve database` fails) +**Exit:** Fix applied and current build method retried; either succeeds (go to Step 4) or all fixes exhausted (try next build method in Step 2) + Try fixes in order, then retry current build method. See [build-fixes.md](../references/build-fixes.md) for the full fix catalog: clean state, clean build cache, install dependencies, handle private registries. --- ## Steps 4-5: Assess and Improve Quality +**Entry:** Database exists and `codeql resolve database` succeeds +**Exit (Step 4):** Quality metrics collected (baseline LoC, file counts, extractor errors, finalization status) +**Exit (Step 5):** Quality is GOOD (baseline LoC > 0, errors < 5%, project files present) OR user accepts current state + Run quality checks and compare against expected source files. See [quality-assessment.md](../references/quality-assessment.md) for metric collection, quality criteria table, and improvement steps. --- diff --git a/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md b/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md index 95d5ce7..61c501b 100644 --- a/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md +++ b/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md @@ -27,6 +27,9 @@ TaskCreate: "Validate with re-analysis" (Step 5) - blockedBy: Step 4 ### Step 1: Check for Existing Data Extensions +**Entry:** CodeQL database exists (`codeql resolve database` succeeds) +**Exit:** Either existing extensions found (report and finish) OR no extensions found (proceed to Step 2) + Search the project for existing data extensions and model packs. ```bash @@ -52,6 +55,9 @@ codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' ### Step 2: Query Known Sources and Sinks +**Entry:** Step 1 found no existing extensions; database and language identified +**Exit:** `sources.csv` and `sinks.csv` exist in `$DIAG_DIR` with enumerated source/sink locations + Run custom QL queries against the database to enumerate all sources and sinks CodeQL currently recognizes. #### 2a: Select Database and Language @@ -91,6 +97,9 @@ Read both CSV files and present a summary showing source types and sink kinds wi ### Step 3: Identify Missing Sources and Sinks +**Entry:** Step 2 complete (`sources.csv` and `sinks.csv` available) +**Exit:** Either no gaps found (report adequate coverage and finish) OR user confirms which gaps to model (proceed to Step 4) + Cross-reference the project's API surface against CodeQL's known models. #### 3a: Map the Project's API Surface @@ -140,6 +149,9 @@ options: ### Step 4: Create Data Extension Files +**Entry:** Step 3 identified gaps and user confirmed which to model +**Exit:** YAML extension files created in `codeql-extensions/` and deployed to `-all` ext/ directory + Generate YAML data extension files for the gaps confirmed by the user. #### File Structure @@ -163,6 +175,9 @@ Use the `Write` tool to create each file. Only create files that have entries ### Step 5: Validate with Re-Analysis +**Entry:** Step 4 complete (extension files deployed) +**Exit:** Finding delta measured (with-extensions count >= baseline count); extensions validated as loading correctly + Run a full security analysis with and without extensions to measure the finding delta. #### 5a: Run Baseline Analysis (without extensions) diff --git a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md index e309b02..2a78cc8 100644 --- a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md +++ b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md @@ -42,6 +42,9 @@ TaskCreate: "Process and report results" (Step 5) - blockedBy: Step 4 ### Step 1: Select Database and Detect Language +**Entry:** At least one CodeQL database exists in the working directory +**Exit:** `DB_NAME` and `LANG` variables set; database resolves successfully + ```bash DB_NAME=$(ls -dt codeql_*.db 2>/dev/null | head -1) [[ -z "$DB_NAME" ]] && echo "ERROR: No CodeQL database found." && exit 1 @@ -55,6 +58,9 @@ If multiple databases exist, use `AskUserQuestion` to let user select. If multi- ### Step 2: Select Scan Mode, Check Additional Packs +**Entry:** Step 1 complete (`DB_NAME` and `LANG` set) +**Exit:** Scan mode selected; all available packs (official, ToB, community) checked for installation status; model packs detected + #### 2a: Select Scan Mode **Skip if user already specified.** Otherwise use `AskUserQuestion`: @@ -98,6 +104,9 @@ Record all detected packs for Step 3. ### Step 3: Select Query Packs and Model Packs +**Entry:** Step 2 complete (scan mode, pack availability, and model packs all determined) +**Exit:** User confirmed query packs, model packs, and threat model selection; all flags built (`THREAT_MODEL_FLAG`, `MODEL_PACK_FLAGS`, `ADDITIONAL_PACK_FLAGS`) + > **CHECKPOINT** — Present available packs to user for confirmation. > **Skip if user already specified pack preferences.** @@ -144,6 +153,9 @@ Build the flag: `THREAT_MODEL_FLAG=""` (remote only needs no flag), `--threat-mo ### Step 4: Execute Analysis +**Entry:** Step 3 complete (all flags and pack selections finalized) +**Exit:** `$RESULTS_DIR/results.sarif` exists and contains valid SARIF output + #### Generate custom suite **Important-only mode:** Generate the custom `.qls` suite using the template and script in [important-only-suite.md](../references/important-only-suite.md). @@ -188,6 +200,9 @@ If codebase is large, read [performance-tuning.md](../references/performance-tun ### Step 5: Process and Report Results +**Entry:** Step 4 complete (`results.sarif` exists) +**Exit:** Findings summarized by severity, rule, and location; zero-finding results investigated; final report presented to user + Process the SARIF output using the jq commands in [sarif-processing.md](../references/sarif-processing.md): count findings, summarize by level, summarize by security severity, summarize by rule. **Important-only mode:** Apply the post-analysis filter from [sarif-processing.md](../references/sarif-processing.md#important-only-post-filter) to remove medium-precision results with `security-severity` < 6.0. From dd75eb9f7b8a11c66e91fd87e3e9c2020009642c Mon Sep 17 00:00:00 2001 From: GrosQuildu Date: Tue, 24 Feb 2026 13:40:49 +0100 Subject: [PATCH 8/8] output dir and artifacts more precise handling, better db discovery --- .../static-analysis/skills/codeql/SKILL.md | 148 +++++++++++++++--- .../references/extension-yaml-format.md | 16 +- .../codeql/references/important-only-suite.md | 2 +- .../skills/codeql/references/run-all-suite.md | 2 +- .../codeql/references/sarif-processing.md | 12 +- .../skills/codeql/workflows/build-database.md | 26 ++- .../workflows/create-data-extensions.md | 54 ++++--- .../skills/codeql/workflows/run-analysis.md | 97 ++++++++++-- .../static-analysis/skills/semgrep/SKILL.md | 54 ++++++- .../skills/semgrep/references/scan-modes.md | 8 +- .../semgrep/references/scanner-task-prompt.md | 28 ++-- .../semgrep/scripts/merge_triaged_sarif.py | 40 ++--- .../skills/semgrep/workflows/scan-workflow.md | 88 ++++++++--- 13 files changed, 426 insertions(+), 149 deletions(-) diff --git a/plugins/static-analysis/skills/codeql/SKILL.md b/plugins/static-analysis/skills/codeql/SKILL.md index ae4c495..ac62373 100644 --- a/plugins/static-analysis/skills/codeql/SKILL.md +++ b/plugins/static-analysis/skills/codeql/SKILL.md @@ -42,6 +42,79 @@ Supported languages: Python, JavaScript/TypeScript, Go, Java/Kotlin, C/C++, C#, 6. **Follow workflows step by step.** Once a workflow is selected, execute it step by step without skipping phases. Each phase gates the next — skipping quality assessment or data extensions leads to incomplete analysis. +## Output Directory + +All generated files (database, build logs, diagnostics, extensions, results) are stored in a single output directory. + +- **If the user specifies an output directory** in their prompt, use it as `OUTPUT_DIR`. +- **If not specified**, default to `./static_analysis_codeql_1`. If that already exists, increment to `_2`, `_3`, etc. + +In both cases, **always create the directory** with `mkdir -p` before writing any files. + +```bash +# Resolve output directory +if [ -n "$USER_SPECIFIED_DIR" ]; then + OUTPUT_DIR="$USER_SPECIFIED_DIR" +else + BASE="static_analysis_codeql" + N=1 + while [ -e "${BASE}_${N}" ]; do + N=$((N + 1)) + done + OUTPUT_DIR="${BASE}_${N}" +fi +mkdir -p "$OUTPUT_DIR" +``` + +The output directory is resolved **once** at the start before any workflow executes. All workflows receive `$OUTPUT_DIR` and store their artifacts there: + +``` +$OUTPUT_DIR/ +├── rulesets.txt # Selected query packs (logged after Step 3) +├── codeql.db/ # CodeQL database (dir containing codeql-database.yml) +├── build.log # Build log +├── codeql-config.yml # Exclusion config (interpreted languages) +├── diagnostics/ # Diagnostic queries and CSVs +├── extensions/ # Data extension YAMLs +├── raw/ # Unfiltered analysis output +│ ├── results.sarif +│ └── .qls +└── results/ # Final results (filtered for important-only, copied for run-all) + └── results.sarif +``` + +### Database Discovery + +A CodeQL database is identified by the presence of a `codeql-database.yml` marker file inside its directory. When searching for existing databases, **always collect all matches** — there may be multiple databases from previous runs or for different languages. + +**Discovery command:** + +```bash +# Find ALL CodeQL databases (top-level and one subdirectory deep) +find . -maxdepth 3 -name "codeql-database.yml" -not -path "*/\.*" 2>/dev/null \ + | while read -r yml; do dirname "$yml"; done +``` + +- **Inside `$OUTPUT_DIR`:** `find "$OUTPUT_DIR" -maxdepth 2 -name "codeql-database.yml"` +- **Project-wide (for auto-detection):** `find . -maxdepth 3 -name "codeql-database.yml"` — covers databases at the project top level (`./db-name/`) and one subdirectory deep (`./subdir/db-name/`). Does not search deeper. + +Never assume a database is named `codeql.db` — discover it by its marker file. + +**When multiple databases are found:** + +For each discovered database, collect metadata to help the user choose: + +```bash +# For each database, extract language and creation time +for db in $FOUND_DBS; do + LANG=$(codeql resolve database --format=json -- "$db" 2>/dev/null | jq -r '.languages[0]') + CREATED=$(grep '^creationMetadata:' -A5 "$db/codeql-database.yml" 2>/dev/null | grep 'creationTime' | awk '{print $2}') + echo "$db — language: $LANG, created: $CREATED" +done +``` + +Then use `AskUserQuestion` to let the user select which database to use, or to build a new one. **Skip `AskUserQuestion` if the user explicitly stated which database to use or to build a new one in their prompt.** + ## Quick Start For the common case ("scan this codebase for vulnerabilities"): @@ -50,8 +123,10 @@ For the common case ("scan this codebase for vulnerabilities"): # 1. Verify CodeQL is installed command -v codeql >/dev/null 2>&1 && codeql --version || echo "NOT INSTALLED" -# 2. Check for existing database -ls -dt codeql_*.db 2>/dev/null | head -1 +# 2. Resolve output directory +BASE="static_analysis_codeql"; N=1 +while [ -e "${BASE}_${N}" ]; do N=$((N + 1)); done +OUTPUT_DIR="${BASE}_${N}"; mkdir -p "$OUTPUT_DIR" ``` Then execute the full pipeline: **build database → create data extensions → run analysis** using the workflows below. @@ -82,6 +157,9 @@ These shortcuts lead to missed findings. Do not accept them: - **"The build fails on macOS, just use build-mode=none"** - Exit code 137 is caused by `arm64e`/`arm64` mismatch, not a fundamental build failure. See [macos-arm64e-workaround.md](references/macos-arm64e-workaround.md). - **"No findings means the code is secure"** - Zero findings can indicate poor database quality, missing models, or wrong query packs. Investigate before reporting clean results. - **"I'll just run the default suite"** / **"I'll just pass the pack names directly"** - Each pack's `defaultSuiteFile` applies hidden filters and can produce zero results. Always use an explicit suite reference. +- **"I'll put files in the current directory"** - All generated files must go in `$OUTPUT_DIR`. Scattering files in the working directory makes cleanup impossible and risks overwriting previous runs. +- **"Just use the first database I find"** - Multiple databases may exist for different languages or from previous runs. When more than one is found, present all options to the user. Only skip the prompt when the user already specified which database to use. +- **"The user said 'scan', that means they want me to pick a database"** - "Scan" is not database selection. If multiple databases exist and the user didn't name one, ask. --- @@ -97,29 +175,52 @@ This skill has three workflows. **Once a workflow is selected, execute it step b ### Auto-Detection Logic -**If user explicitly specifies** what to do (e.g., "build a database", "run analysis"), execute that workflow. +**If user explicitly specifies** what to do (e.g., "build a database", "run analysis on ./my-db"), execute that workflow directly. **Do NOT call `AskUserQuestion` for database selection if the user's prompt already makes their intent clear** — e.g., "build a new database", "analyze the codeql database in static_analysis_codeql_2", "run a full scan from scratch". -**Default pipeline for "test", "scan", "analyze", or similar:** Execute all three workflows sequentially: build → extensions → analysis. +**Default pipeline for "test", "scan", "analyze", or similar:** Discover existing databases first, then decide. ```bash -DB=$(ls -dt codeql_*.db 2>/dev/null | head -1) -if [ -n "$DB" ] && codeql resolve database -- "$DB" >/dev/null 2>&1; then - echo "DATABASE EXISTS ($DB) - can run analysis" -else - echo "NO DATABASE - need to build first" -fi +# Find ALL CodeQL databases by looking for codeql-database.yml marker file +# Search top-level dirs and one subdirectory deep +FOUND_DBS=() +while IFS= read -r yml; do + db_dir=$(dirname "$yml") + codeql resolve database -- "$db_dir" >/dev/null 2>&1 && FOUND_DBS+=("$db_dir") +done < <(find . -maxdepth 3 -name "codeql-database.yml" -not -path "*/\.*" 2>/dev/null) + +echo "Found ${#FOUND_DBS[@]} existing database(s)" ``` | Condition | Action | |-----------|--------| -| No database exists | Execute build → extensions → analysis (full pipeline) | -| Database exists, no extensions | Execute extensions → analysis | -| Database exists, extensions exist | Ask user: run analysis on existing DB, or rebuild? | -| User says "just run analysis" or "skip extensions" | Run analysis only | +| No databases found | Resolve new `$OUTPUT_DIR`, execute build → extensions → analysis (full pipeline) | +| One database found | Use `AskUserQuestion`: reuse it or build new? | +| Multiple databases found | Use `AskUserQuestion`: list all with metadata, let user pick one or build new | +| User explicitly stated intent | Skip `AskUserQuestion`, act on their instructions directly | + +### Database Selection Prompt + +When existing databases are found **and the user did not explicitly specify which to use**, present via `AskUserQuestion`: + +``` +header: "Existing CodeQL Databases" +question: "I found existing CodeQL database(s). What would you like to do?" +options: + - label: " (language: python, created: 2026-02-24)" + description: "Reuse this database" + - label: " (language: cpp, created: 2026-02-23)" + description: "Reuse this database" + - label: "Build a new database" + description: "Create a fresh database in a new output directory" +``` + +After selection: +- **If user picks an existing database:** Set `$OUTPUT_DIR` to its parent directory (or the directory containing it), set `$DB_NAME` to the selected path, then proceed to extensions → analysis. +- **If user picks "Build new":** Resolve a new `$OUTPUT_DIR`, execute build → extensions → analysis. -### Decision Prompt +### General Decision Prompt -If unclear, ask user: +If the user's intent is ambiguous (neither database selection nor workflow is clear), ask: ``` I can help with CodeQL analysis. What would you like to do? @@ -129,7 +230,8 @@ I can help with CodeQL analysis. What would you like to do? 3. **Create data extensions** - Generate custom source/sink models for project APIs 4. **Run analysis** - Run security queries on existing database -[If database exists: "I found an existing database at "] +[If databases found: "I found N existing database(s): "] +[Show output directory: "Output will be stored in "] ``` --- @@ -162,10 +264,14 @@ I can help with CodeQL analysis. What would you like to do? A complete CodeQL analysis run should satisfy: -- [ ] Database built with quality assessment passed (baseline LoC > 0, errors < 5%) -- [ ] Data extensions evaluated — either created for project-specific APIs or explicitly skipped with justification +- [ ] Output directory resolved (user-specified or auto-incremented default) +- [ ] All generated files stored inside `$OUTPUT_DIR` +- [ ] Database built (discovered via `codeql-database.yml` marker) with quality assessment passed (baseline LoC > 0, errors < 5%) +- [ ] Data extensions evaluated — either created in `$OUTPUT_DIR/extensions/` or explicitly skipped with justification - [ ] Analysis run with explicit suite reference (not default pack suite) - [ ] All installed query packs (official + Trail of Bits + Community) used or explicitly excluded -- [ ] Results processed with severity summary and file locations +- [ ] Selected query packs logged to `$OUTPUT_DIR/rulesets.txt` +- [ ] Unfiltered results preserved in `$OUTPUT_DIR/raw/results.sarif` +- [ ] Final results in `$OUTPUT_DIR/results/results.sarif` (filtered for important-only, copied for run-all) - [ ] Zero-finding results investigated (database quality, model coverage, suite selection) -- [ ] Build log preserved with all commands, fixes, and quality assessments +- [ ] Build log preserved at `$OUTPUT_DIR/build.log` with all commands, fixes, and quality assessments diff --git a/plugins/static-analysis/skills/codeql/references/extension-yaml-format.md b/plugins/static-analysis/skills/codeql/references/extension-yaml-format.md index 042ac60..9ecbbbb 100644 --- a/plugins/static-analysis/skills/codeql/references/extension-yaml-format.md +++ b/plugins/static-analysis/skills/codeql/references/extension-yaml-format.md @@ -40,7 +40,7 @@ Columns: `[package, type, subtypes, name, signature, ext, output, kind, provenan Example (Python): ```yaml -# codeql-extensions/sources.yml +# $OUTPUT_DIR/extensions/sources.yml extensions: - addsTo: pack: codeql/python-all @@ -53,7 +53,7 @@ extensions: Example (Java — note `True`, `Parameter[N]`, and signature): ```yaml -# codeql-extensions/sources.yml +# $OUTPUT_DIR/extensions/sources.yml extensions: - addsTo: pack: codeql/java-all @@ -82,7 +82,7 @@ Note: column 7 is `input` (which argument receives tainted data), not `output`. Example (Python): ```yaml -# codeql-extensions/sinks.yml +# $OUTPUT_DIR/extensions/sinks.yml extensions: - addsTo: pack: codeql/python-all @@ -115,7 +115,7 @@ Columns: `[package, type, subtypes, name, signature, ext, input, output, kind, p Example: ```yaml -# codeql-extensions/summaries.yml +# $OUTPUT_DIR/extensions/summaries.yml extensions: # Pass-through: taint propagates - addsTo: @@ -162,9 +162,9 @@ JAVA_ALL_EXT=$(find "$(codeql resolve qlpacks 2>/dev/null | grep 'java-queries' if [ -n "$JAVA_ALL_EXT" ]; then PROJECT_NAME=$(basename "$(pwd)") - cp codeql-extensions/sources.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sources.model.yml" - [ -f codeql-extensions/sinks.yml ] && cp codeql-extensions/sinks.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sinks.model.yml" - [ -f codeql-extensions/summaries.yml ] && cp codeql-extensions/summaries.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.summaries.model.yml" + cp $OUTPUT_DIR/extensions/sources.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sources.model.yml" + [ -f $OUTPUT_DIR/extensions/sinks.yml ] && cp $OUTPUT_DIR/extensions/sinks.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.sinks.model.yml" + [ -f $OUTPUT_DIR/extensions/summaries.yml ] && cp $OUTPUT_DIR/extensions/summaries.yml "$JAVA_ALL_EXT/${PROJECT_NAME}.summaries.model.yml" # Verify deployment — confirm files landed correctly DEPLOYED=$(ls "$JAVA_ALL_EXT/${PROJECT_NAME}".*.model.yml 2>/dev/null | wc -l) @@ -186,7 +186,7 @@ fi **Alternative (if query packs are NOT pre-compiled):** Use `--additional-packs=./codeql-extensions` with a proper model pack `qlpack.yml`: ```yaml -# codeql-extensions/qlpack.yml +# $OUTPUT_DIR/extensions/qlpack.yml name: custom/-extensions version: 0.0.1 library: true diff --git a/plugins/static-analysis/skills/codeql/references/important-only-suite.md b/plugins/static-analysis/skills/codeql/references/important-only-suite.md index 6e5b3b5..170b8f3 100644 --- a/plugins/static-analysis/skills/codeql/references/important-only-suite.md +++ b/plugins/static-analysis/skills/codeql/references/important-only-suite.md @@ -84,7 +84,7 @@ Generate this file as `important-only.qls` in the results directory before runni The agent should generate the suite file dynamically based on installed packs: ```bash -RESULTS_DIR="${DB_NAME%.db}-results" +RESULTS_DIR="$OUTPUT_DIR/results" SUITE_FILE="$RESULTS_DIR/important-only.qls" # NOTE: LANG must be set before running this script (e.g., LANG=cpp) diff --git a/plugins/static-analysis/skills/codeql/references/run-all-suite.md b/plugins/static-analysis/skills/codeql/references/run-all-suite.md index feab430..802980e 100644 --- a/plugins/static-analysis/skills/codeql/references/run-all-suite.md +++ b/plugins/static-analysis/skills/codeql/references/run-all-suite.md @@ -38,7 +38,7 @@ Generate this file as `run-all.qls` in the results directory before running anal ## Generation Script ```bash -RESULTS_DIR="${DB_NAME%.db}-results" +RESULTS_DIR="$OUTPUT_DIR/results" SUITE_FILE="$RESULTS_DIR/run-all.qls" # NOTE: LANG must be set before running this script (e.g., LANG=cpp) diff --git a/plugins/static-analysis/skills/codeql/references/sarif-processing.md b/plugins/static-analysis/skills/codeql/references/sarif-processing.md index 35b1bd7..7dee9a4 100644 --- a/plugins/static-analysis/skills/codeql/references/sarif-processing.md +++ b/plugins/static-analysis/skills/codeql/references/sarif-processing.md @@ -4,6 +4,8 @@ jq commands for processing CodeQL SARIF output. Used in the run-analysis workflo > **SARIF structure note:** `security-severity` and `level` are stored on rule definitions (`.runs[].tool.driver.rules[]`), NOT on individual result objects. Results reference rules by `ruleIndex`. The jq commands below join results with their rule metadata. +> **Directory convention:** Unfiltered output lives in `$RAW_DIR` (`$OUTPUT_DIR/raw`). Final results live in `$RESULTS_DIR` (`$OUTPUT_DIR/results`). The summary commands below operate on `$RESULTS_DIR/results.sarif` (the final output). + ## Count Findings ```bash @@ -46,12 +48,17 @@ jq -r '.runs[].results[] | .ruleId' "$RESULTS_DIR/results.sarif" \ ## Important-Only Post-Filter -If scan mode is "important only", filter out medium-precision results with `security-severity` < 6.0 from the report. The suite includes all medium-precision security queries to let CodeQL evaluate them, but low-severity medium-precision findings are noise: +If scan mode is "important only", filter out medium-precision results with `security-severity` < 6.0 from the report. The suite includes all medium-precision security queries to let CodeQL evaluate them, but low-severity medium-precision findings are noise. + +The filter reads from `$RAW_DIR/results.sarif` (unfiltered) and writes to `$RESULTS_DIR/results.sarif` (final). The raw file is preserved unmodified. ```bash # Filter important-only results: drop medium-precision findings with security-severity < 6.0 # Medium-precision queries without a security-severity score default to 0.0 (excluded). # Non-medium queries are always kept regardless of security-severity. +# Reads from raw/, writes to results/ — preserving the unfiltered original. +RAW_DIR="$OUTPUT_DIR/raw" +RESULTS_DIR="$OUTPUT_DIR/results" jq ' .runs[] |= ( . as $run | @@ -66,6 +73,5 @@ jq ' ) ] ) -' "$RESULTS_DIR/results.sarif" > "$RESULTS_DIR/results-filtered.sarif" -mv "$RESULTS_DIR/results-filtered.sarif" "$RESULTS_DIR/results.sarif" +' "$RAW_DIR/results.sarif" > "$RESULTS_DIR/results.sarif" ``` diff --git a/plugins/static-analysis/skills/codeql/workflows/build-database.md b/plugins/static-analysis/skills/codeql/workflows/build-database.md index dbf25df..418b3a9 100644 --- a/plugins/static-analysis/skills/codeql/workflows/build-database.md +++ b/plugins/static-analysis/skills/codeql/workflows/build-database.md @@ -39,20 +39,12 @@ Database creation differs by language type: --- -## Database Naming +## Output Directory + +This workflow receives `$OUTPUT_DIR` from the parent skill (resolved once at invocation). All files go inside it. ```bash -get_next_db_name() { - local prefix="${1:-codeql}" - local max=0 - for db in ${prefix}_*.db; do - [[ -d "$db" ]] || continue - num="${db#${prefix}_}"; num="${num%.db}" - [[ "$num" =~ ^[0-9]+$ ]] && (( num > max )) && max=$num - done - echo "${prefix}_$((max + 1)).db" -} -DB_NAME=$(get_next_db_name) +DB_NAME="$OUTPUT_DIR/codeql.db" ``` --- @@ -62,9 +54,10 @@ DB_NAME=$(get_next_db_name) Maintain a log file throughout. Initialize at start: ```bash -LOG_FILE="${DB_NAME%.db}-build.log" +LOG_FILE="$OUTPUT_DIR/build.log" echo "=== CodeQL Database Build Log ===" > "$LOG_FILE" echo "Started: $(date -Iseconds)" >> "$LOG_FILE" +echo "Output dir: $OUTPUT_DIR" >> "$LOG_FILE" echo "Database: $DB_NAME" >> "$LOG_FILE" ``` @@ -108,7 +101,7 @@ ls -la Makefile CMakeLists.txt build.gradle pom.xml Cargo.toml *.sln 2>/dev/null > **Skip for compiled languages** — exclusion config is not supported when build tracing is required. -Scan for irrelevant directories and create `codeql-config.yml` with `paths-ignore` entries for `node_modules`, `vendor`, `venv`, third-party code, and generated/minified files. +Scan for irrelevant directories and create `$OUTPUT_DIR/codeql-config.yml` with `paths-ignore` entries for `node_modules`, `vendor`, `venv`, third-party code, and generated/minified files. --- @@ -121,7 +114,7 @@ Scan for irrelevant directories and create `codeql-config.yml` with `paths-ignor ```bash log_step "Building database for interpreted language: " -CMD="codeql database create $DB_NAME --language= --source-root=. --codescanning-config=codeql-config.yml --overwrite" +CMD="codeql database create $DB_NAME --language= --source-root=. --codescanning-config=$OUTPUT_DIR/codeql-config.yml --overwrite" log_cmd "$CMD" $CMD 2>&1 | tee -a "$LOG_FILE" ``` @@ -269,6 +262,7 @@ Report to user: ``` ## Database Build Complete +**Output directory:** $OUTPUT_DIR **Database:** $DB_NAME **Language:** **Build method:** autobuild | custom | multi-step @@ -279,7 +273,7 @@ Report to user: - Coverage: ### Build Log: -See `$LOG_FILE` for complete details. +See `$OUTPUT_DIR/build.log` for complete details. **Final command used:** **Ready for analysis.** diff --git a/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md b/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md index 61c501b..28eea45 100644 --- a/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md +++ b/plugins/static-analysis/skills/codeql/workflows/create-data-extensions.md @@ -33,15 +33,15 @@ TaskCreate: "Validate with re-analysis" (Step 5) - blockedBy: Step 4 Search the project for existing data extensions and model packs. ```bash -# 1. In-repo model packs -fd '(qlpack|codeql-pack)\.yml$' . --exclude codeql_*.db | while read -r f; do +# 1. In-repo model packs (exclude output dirs and legacy database dirs) +fd '(qlpack|codeql-pack)\.yml$' . --exclude 'static_analysis_codeql_*' --exclude 'codeql_*.db' | while read -r f; do if grep -q 'dataExtensions' "$f"; then echo "MODEL PACK: $(dirname "$f") - $(grep '^name:' "$f")" fi done # 2. Standalone data extension files -rg -l '^extensions:' --glob '*.yml' --glob '!codeql_*.db/**' | head -20 +rg -l '^extensions:' --glob '*.yml' --glob '!static_analysis_codeql_*/**' --glob '!codeql_*.db/**' | head -20 # 3. Installed model packs codeql resolve qlpacks 2>/dev/null | grep -iE 'model|extension' @@ -62,10 +62,27 @@ Run custom QL queries against the database to enumerate all sources and sinks Co #### 2a: Select Database and Language +A CodeQL database is a directory containing a `codeql-database.yml` marker file. `$DB_NAME` may already be set by the parent skill. If not, discover inside `$OUTPUT_DIR`. + ```bash -DB_NAME=$(ls -dt codeql_*.db 2>/dev/null | head -1) +if [ -z "$DB_NAME" ]; then + FOUND_DBS=() + while IFS= read -r yml; do + FOUND_DBS+=("$(dirname "$yml")") + done < <(find "$OUTPUT_DIR" -maxdepth 2 -name "codeql-database.yml" 2>/dev/null) + + if [ ${#FOUND_DBS[@]} -eq 0 ]; then + echo "ERROR: No CodeQL database found in $OUTPUT_DIR"; exit 1 + elif [ ${#FOUND_DBS[@]} -eq 1 ]; then + DB_NAME="${FOUND_DBS[0]}" + else + # Multiple databases — use AskUserQuestion to select + # SKIP if user already specified which database in their prompt + fi +fi + LANG=$(codeql resolve database --format=json -- "$DB_NAME" | jq -r '.languages[0]') -DIAG_DIR="${DB_NAME%.db}-diagnostics" +DIAG_DIR="$OUTPUT_DIR/diagnostics" mkdir -p "$DIAG_DIR" ``` @@ -150,16 +167,16 @@ options: ### Step 4: Create Data Extension Files **Entry:** Step 3 identified gaps and user confirmed which to model -**Exit:** YAML extension files created in `codeql-extensions/` and deployed to `-all` ext/ directory +**Exit:** YAML extension files created in `$OUTPUT_DIR/extensions/` and deployed to `-all` ext/ directory Generate YAML data extension files for the gaps confirmed by the user. #### File Structure -Create files in a `codeql-extensions/` directory at project root: +Create files in `$OUTPUT_DIR/extensions/`: ``` -codeql-extensions/ +$OUTPUT_DIR/extensions/ sources.yml # sourceModel entries sinks.yml # sinkModel entries summaries.yml # summaryModel and neutralModel entries @@ -182,11 +199,11 @@ Run a full security analysis with and without extensions to measure the finding #### 5a: Run Baseline Analysis (without extensions) +Validation artifacts go in `$DIAG_DIR` (not `results/`) since these are intermediate comparisons, not the final analysis output. + ```bash -RESULTS_DIR="${DB_NAME%.db}-results" -mkdir -p "$RESULTS_DIR" codeql database analyze "$DB_NAME" \ - --format=sarif-latest --output="$RESULTS_DIR/baseline.sarif" --threads=0 \ + --format=sarif-latest --output="$DIAG_DIR/baseline.sarif" --threads=0 \ -- codeql/-queries:codeql-suites/-security-extended.qls ``` @@ -195,7 +212,7 @@ codeql database analyze "$DB_NAME" \ ```bash codeql database cleanup "$DB_NAME" codeql database analyze "$DB_NAME" \ - --format=sarif-latest --output="$RESULTS_DIR/with-extensions.sarif" --threads=0 --rerun \ + --format=sarif-latest --output="$DIAG_DIR/with-extensions.sarif" --threads=0 --rerun \ -- codeql/-queries:codeql-suites/-security-extended.qls ``` @@ -204,8 +221,8 @@ Use `-vvv` flag to verify extensions are being loaded. #### 5c: Compare Findings ```bash -BASELINE=$(python3 -c "import json; print(sum(len(r.get('results',[])) for r in json.load(open('$RESULTS_DIR/baseline.sarif')).get('runs',[])))") -WITH_EXT=$(python3 -c "import json; print(sum(len(r.get('results',[])) for r in json.load(open('$RESULTS_DIR/with-extensions.sarif')).get('runs',[])))") +BASELINE=$(python3 -c "import json; print(sum(len(r.get('results',[])) for r in json.load(open('$DIAG_DIR/baseline.sarif')).get('runs',[])))") +WITH_EXT=$(python3 -c "import json; print(sum(len(r.get('results',[])) for r in json.load(open('$DIAG_DIR/with-extensions.sarif')).get('runs',[])))") echo "Findings: $BASELINE → $WITH_EXT (+$((WITH_EXT - BASELINE)))" ``` @@ -218,13 +235,14 @@ echo "Findings: $BASELINE → $WITH_EXT (+$((WITH_EXT - BASELINE)))" ``` ## Data Extensions Created +**Output directory:** $OUTPUT_DIR **Database:** $DB_NAME **Language:** ### Files Created: -- codeql-extensions/sources.yml — source models -- codeql-extensions/sinks.yml — sink models -- codeql-extensions/summaries.yml — summary/neutral models +- $OUTPUT_DIR/extensions/sources.yml — source models +- $OUTPUT_DIR/extensions/sinks.yml — sink models +- $OUTPUT_DIR/extensions/summaries.yml — summary/neutral models ### Model Coverage: - Sources: (+) @@ -232,7 +250,7 @@ echo "Findings: $BASELINE → $WITH_EXT (+$((WITH_EXT - BASELINE)))" ### Usage: Extensions deployed to `-all` ext/ directory (auto-loaded). -Source files in `codeql-extensions/` for version control. +Source files in `$OUTPUT_DIR/extensions/` for version control. Run the run-analysis workflow to use them. ``` diff --git a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md index 2a78cc8..9a14f19 100644 --- a/plugins/static-analysis/skills/codeql/workflows/run-analysis.md +++ b/plugins/static-analysis/skills/codeql/workflows/run-analysis.md @@ -42,17 +42,48 @@ TaskCreate: "Process and report results" (Step 5) - blockedBy: Step 4 ### Step 1: Select Database and Detect Language -**Entry:** At least one CodeQL database exists in the working directory -**Exit:** `DB_NAME` and `LANG` variables set; database resolves successfully +**Entry:** `$OUTPUT_DIR` is set (from parent skill). `$DB_NAME` may already be set if the parent skill resolved database selection. +**Exit:** `DB_NAME` and `LANG` variables set; database resolves successfully. + +**If `$DB_NAME` is already set** (parent skill handled database selection): validate it and proceed. + +**If `$DB_NAME` is not set:** discover databases by looking for `codeql-database.yml` marker files. Search inside `$OUTPUT_DIR` first, then fall back to the project root (top-level and one subdirectory deep). ```bash -DB_NAME=$(ls -dt codeql_*.db 2>/dev/null | head -1) -[[ -z "$DB_NAME" ]] && echo "ERROR: No CodeQL database found." && exit 1 +# Skip discovery if DB_NAME was already resolved by parent skill +if [ -z "$DB_NAME" ]; then + # Discover databases inside OUTPUT_DIR + FOUND_DBS=() + while IFS= read -r yml; do + FOUND_DBS+=("$(dirname "$yml")") + done < <(find "$OUTPUT_DIR" -maxdepth 2 -name "codeql-database.yml" 2>/dev/null) + + # Fallback: search project root (top-level and one subdir deep) + if [ ${#FOUND_DBS[@]} -eq 0 ]; then + while IFS= read -r yml; do + FOUND_DBS+=("$(dirname "$yml")") + done < <(find . -maxdepth 3 -name "codeql-database.yml" -not -path "*/\.*" 2>/dev/null) + fi + + if [ ${#FOUND_DBS[@]} -eq 0 ]; then + echo "ERROR: No CodeQL database found in $OUTPUT_DIR or project root" + exit 1 + elif [ ${#FOUND_DBS[@]} -eq 1 ]; then + DB_NAME="${FOUND_DBS[0]}" + else + # Multiple databases found — present to user + # Use AskUserQuestion with each DB's path and language + # SKIP if user already specified which database in their prompt + fi +fi + LANG=$(codeql resolve database --format=json -- "$DB_NAME" | jq -r '.languages[0]') echo "Using: $DB_NAME (language: $LANG)" ``` -If multiple databases exist, use `AskUserQuestion` to let user select. If multi-language database, ask which language to analyze. +**When multiple databases are found**, use `AskUserQuestion` to let user select — list each database with its path and language. **Skip `AskUserQuestion` if the user already specified which database to use in their prompt.** + +If multi-language database, ask which language to analyze. --- @@ -154,7 +185,30 @@ Build the flag: `THREAT_MODEL_FLAG=""` (remote only needs no flag), `--threat-mo ### Step 4: Execute Analysis **Entry:** Step 3 complete (all flags and pack selections finalized) -**Exit:** `$RESULTS_DIR/results.sarif` exists and contains valid SARIF output +**Exit:** `$RAW_DIR/results.sarif` exists and contains valid SARIF output + +#### Log selected query packs + +Write the selected query packs, model packs, and threat models to `$OUTPUT_DIR/rulesets.txt`: + +```bash +cat > "$OUTPUT_DIR/rulesets.txt" << RULESETS +# CodeQL Analysis — Selected Query Packs +# Generated: $(date -Iseconds) +# Scan mode: +# Database: $DB_NAME +# Language: $LANG + +## Query packs: + + +## Model packs: + + +## Threat models: + +RULESETS +``` #### Generate custom suite @@ -163,9 +217,10 @@ Build the flag: `THREAT_MODEL_FLAG=""` (remote only needs no flag), `--threat-mo **Run-all mode:** Generate the custom `.qls` suite using the template in [run-all-suite.md](../references/run-all-suite.md). ```bash -RESULTS_DIR="${DB_NAME%.db}-results" -mkdir -p "$RESULTS_DIR" -SUITE_FILE="$RESULTS_DIR/.qls" +RAW_DIR="$OUTPUT_DIR/raw" +RESULTS_DIR="$OUTPUT_DIR/results" +mkdir -p "$RAW_DIR" "$RESULTS_DIR" +SUITE_FILE="$RAW_DIR/.qls" # Verify suite resolves correctly before running codeql resolve queries "$SUITE_FILE" | wc -l @@ -173,10 +228,12 @@ codeql resolve queries "$SUITE_FILE" | wc -l #### Run analysis +Output goes to `$RAW_DIR/results.sarif` (unfiltered). The final results are produced in Step 5. + ```bash codeql database analyze $DB_NAME \ --format=sarif-latest \ - --output="$RESULTS_DIR/results.sarif" \ + --output="$RAW_DIR/results.sarif" \ --threads=0 \ $THREAT_MODEL_FLAG \ $MODEL_PACK_FLAGS \ @@ -200,12 +257,19 @@ If codebase is large, read [performance-tuning.md](../references/performance-tun ### Step 5: Process and Report Results -**Entry:** Step 4 complete (`results.sarif` exists) -**Exit:** Findings summarized by severity, rule, and location; zero-finding results investigated; final report presented to user +**Entry:** Step 4 complete (`$RAW_DIR/results.sarif` exists) +**Exit:** `$RESULTS_DIR/results.sarif` contains final results; findings summarized by severity, rule, and location; zero-finding results investigated; final report presented to user + +#### Produce final results + +- **Run-all mode:** Copy unfiltered results to the final location: + ```bash + cp "$RAW_DIR/results.sarif" "$RESULTS_DIR/results.sarif" + ``` -Process the SARIF output using the jq commands in [sarif-processing.md](../references/sarif-processing.md): count findings, summarize by level, summarize by security severity, summarize by rule. +- **Important-only mode:** Apply the post-analysis filter from [sarif-processing.md](../references/sarif-processing.md#important-only-post-filter) to remove medium-precision results with `security-severity` < 6.0. The filter reads from `$RAW_DIR/results.sarif` and writes to `$RESULTS_DIR/results.sarif`, preserving the unfiltered original. -**Important-only mode:** Apply the post-analysis filter from [sarif-processing.md](../references/sarif-processing.md#important-only-post-filter) to remove medium-precision results with `security-severity` < 6.0. +Process the final SARIF output (`$RESULTS_DIR/results.sarif`) using the jq commands in [sarif-processing.md](../references/sarif-processing.md): count findings, summarize by level, summarize by security severity, summarize by rule. --- @@ -216,6 +280,7 @@ Report to user: ``` ## CodeQL Analysis Complete +**Output directory:** $OUTPUT_DIR **Database:** $DB_NAME **Language:** **Scan mode:** Run all | Important only @@ -230,5 +295,7 @@ Report to user: - Note: ### Output Files: -- SARIF: $RESULTS_DIR/results.sarif +- SARIF (final): $OUTPUT_DIR/results/results.sarif +- SARIF (unfiltered): $OUTPUT_DIR/raw/results.sarif +- Rulesets: $OUTPUT_DIR/rulesets.txt ``` diff --git a/plugins/static-analysis/skills/semgrep/SKILL.md b/plugins/static-analysis/skills/semgrep/SKILL.md index 5a3bd7a..29ce64e 100644 --- a/plugins/static-analysis/skills/semgrep/SKILL.md +++ b/plugins/static-analysis/skills/semgrep/SKILL.md @@ -46,6 +46,45 @@ Run a Semgrep scan with automatic language detection, parallel execution via Tas - Creating custom Semgrep rules → Use `semgrep-rule-creator` skill - Porting existing rules to other languages → Use `semgrep-rule-variant-creator` skill +## Output Directory + +All scan results, SARIF files, and temporary data are stored in a single output directory. + +- **If the user specifies an output directory** in their prompt, use it as `OUTPUT_DIR`. +- **If not specified**, default to `./static_analysis_semgrep_1`. If that already exists, increment to `_2`, `_3`, etc. + +In both cases, **always create the directory** with `mkdir -p` before writing any files. + +```bash +# Resolve output directory +if [ -n "$USER_SPECIFIED_DIR" ]; then + OUTPUT_DIR="$USER_SPECIFIED_DIR" +else + BASE="static_analysis_semgrep" + N=1 + while [ -e "${BASE}_${N}" ]; do + N=$((N + 1)) + done + OUTPUT_DIR="${BASE}_${N}" +fi +mkdir -p "$OUTPUT_DIR/raw" "$OUTPUT_DIR/results" +``` + +The output directory is resolved **once** at the start of Step 1 and used throughout all subsequent steps. + +``` +$OUTPUT_DIR/ +├── rulesets.txt # Approved rulesets (logged after Step 3) +├── raw/ # Per-scan raw output (unfiltered) +│ ├── python-python.json +│ ├── python-python.sarif +│ ├── python-django.json +│ ├── python-django.sarif +│ └── ... +└── results/ # Final merged output + └── results.sarif +``` + ## Prerequisites **Required:** Semgrep CLI (`semgrep --version`). If not installed, see [Semgrep installation docs](https://semgrep.dev/docs/getting-started/). @@ -103,7 +142,7 @@ See [scan-modes.md](references/scan-modes.md) for metadata criteria and jq filte | Step | Action | Gate | Key Reference | |------|--------|------|---------------| -| 1 | Detect languages + Pro availability | — | Use Glob, not Bash | +| 1 | Resolve output dir, detect languages + Pro availability | — | Use Glob, not Bash | | 2 | Select scan mode + rulesets | — | [rulesets.md](references/rulesets.md) | | 3 | Present plan, get explicit approval | ⛔ HARD | AskUserQuestion | | 4 | Spawn parallel scan Tasks | — | [scanner-task-prompt.md](references/scanner-task-prompt.md) | @@ -114,7 +153,7 @@ See [scan-modes.md](references/scan-modes.md) for metadata criteria and jq filte **Merge command (Step 5):** ```bash -uv run {baseDir}/scripts/merge_triaged_sarif.py [OUTPUT_DIR] +uv run {baseDir}/scripts/merge_triaged_sarif.py $OUTPUT_DIR/raw $OUTPUT_DIR/results/results.sarif ``` ## Agents @@ -141,6 +180,7 @@ Use `subagent_type: static-analysis:semgrep-scanner` in Step 4 when spawning Tas | "Semgrep handles GitHub URLs natively" | URL handling fails on repos with non-standard YAML; always clone first | | "Cleanup is optional" | Cloned repos pollute the user's workspace and accumulate across runs | | "Use `.` or relative path as target" | Subagents need absolute paths to avoid ambiguity | +| "Let the user pick an output dir later" | Output directory must be resolved at Step 1, before any files are created | ## Reference Index @@ -156,13 +196,17 @@ Use `subagent_type: static-analysis:semgrep-scanner` in Step 4 when spawning Tas ## Success Criteria +- [ ] Output directory resolved (user-specified or auto-incremented default) +- [ ] All generated files stored inside `$OUTPUT_DIR` - [ ] Languages detected with file counts; Pro status checked - [ ] Scan mode selected by user (run all / important only) - [ ] Rulesets include third-party rules for all detected languages - [ ] User explicitly approved the scan plan (Step 3 gate passed) - [ ] All scan Tasks spawned in a single message and completed - [ ] Every `semgrep` command used `--metrics=off` -- [ ] `findings.sarif` exists in the output directory and is valid JSON -- [ ] Important-only mode: post-filter applied before merge +- [ ] Approved rulesets logged to `$OUTPUT_DIR/rulesets.txt` +- [ ] Raw per-scan outputs stored in `$OUTPUT_DIR/raw/` +- [ ] `results.sarif` exists in `$OUTPUT_DIR/results/` and is valid JSON +- [ ] Important-only mode: post-filter applied before merge; unfiltered results preserved in `raw/` - [ ] Results summary reported with severity and category breakdown -- [ ] Cloned repos (if any) cleaned up from `[OUTPUT_DIR]/repos/` +- [ ] Cloned repos (if any) cleaned up from `$OUTPUT_DIR/repos/` diff --git a/plugins/static-analysis/skills/semgrep/references/scan-modes.md b/plugins/static-analysis/skills/semgrep/references/scan-modes.md index 5a2b171..2d9de70 100644 --- a/plugins/static-analysis/skills/semgrep/references/scan-modes.md +++ b/plugins/static-analysis/skills/semgrep/references/scan-modes.md @@ -71,9 +71,11 @@ Default values (`// "security"`, `// "HIGH"`) handle third-party rules without m ### Filter All Result Files in a Directory +Raw scan output lives in `$OUTPUT_DIR/raw/`. The filter creates `*-important.json` files alongside the originals — the raw files are preserved unmodified. + ```bash -# Apply important-only filter to all scan result JSON files -for f in "$OUTPUT_DIR"/*-*.json; do +# Apply important-only filter to all scan result JSON files in raw/ +for f in "$OUTPUT_DIR/raw"/*-*.json; do [[ "$f" == *-triage.json || "$f" == *-important.json ]] && continue jq '{ results: [.results[] | @@ -100,7 +102,7 @@ done In important-only mode, add `[SEVERITY_FLAGS]` to the scanner template: ```bash -semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/[lang]-[ruleset].sarif [TARGET] & +semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/raw/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/raw/[lang]-[ruleset].sarif [TARGET] & ``` Where `[SEVERITY_FLAGS]` is: diff --git a/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md b/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md index ca88851..4e4bb44 100644 --- a/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md +++ b/plugins/static-analysis/skills/semgrep/references/scanner-task-prompt.md @@ -8,7 +8,7 @@ Use this prompt template when spawning scanner Tasks in Step 4. Use `subagent_ty You are a Semgrep scanner for [LANGUAGE_CATEGORY]. ## Task -Run Semgrep scans for [LANGUAGE] files and save results to [OUTPUT_DIR]. +Run Semgrep scans for [LANGUAGE] files and save results to [OUTPUT_DIR]/raw. ## Pro Engine Status: [PRO_AVAILABLE: true/false] @@ -35,7 +35,7 @@ git clone --depth 1 https://github.com/org/repo [OUTPUT_DIR]/repos/repo-name ### Generate commands for EACH approved ruleset: ```bash -semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] [INCLUDE_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/[lang]-[ruleset].sarif [TARGET] & +semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] [INCLUDE_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/raw/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/raw/[lang]-[ruleset].sarif [TARGET] & ``` Wait for all to complete: @@ -63,7 +63,7 @@ rm -rf [OUTPUT_DIR]/repos Report: - Number of findings per ruleset - Any scan errors -- File paths of JSON results +- File paths of JSON results (in [OUTPUT_DIR]/raw/) - [If Pro] Note any cross-file findings detected ``` @@ -73,7 +73,7 @@ Report: |----------|-------------|---------| | `[LANGUAGE_CATEGORY]` | Language group being scanned | Python, JavaScript, Docker | | `[LANGUAGE]` | Specific language | Python, TypeScript, Go | -| `[OUTPUT_DIR]` | Results directory with run number | semgrep-results-001 | +| `[OUTPUT_DIR]` | Output directory (absolute path, resolved in Step 1) | /path/to/static_analysis_semgrep_1 | | `[PRO_AVAILABLE]` | Whether Pro engine is available | true, false | | `[SEVERITY_FLAGS]` | Severity pre-filter flags | *(empty)* for run-all, `--severity MEDIUM --severity HIGH --severity CRITICAL` for important-only | | `[INCLUDE_FLAGS]` | File extension filter for language-specific rulesets | `--include="*.py"` for Python rulesets, *(empty)* for cross-language rulesets like p/security-audit, p/secrets, or third-party repos | @@ -86,7 +86,7 @@ Report: You are a Semgrep scanner for Python. ## Task -Run Semgrep scans for Python files and save results to /path/to/semgrep-results-001. +Run Semgrep scans for Python files and save results to /path/to/static_analysis_semgrep_1/raw. ## Pro Engine Status: true @@ -103,23 +103,23 @@ Run Semgrep scans for Python files and save results to /path/to/semgrep-results- ### Clone GitHub URL rulesets first: ```bash -mkdir -p /path/to/semgrep-results-001/repos -git clone --depth 1 https://github.com/trailofbits/semgrep-rules /path/to/semgrep-results-001/repos/trailofbits +mkdir -p /path/to/static_analysis_semgrep_1/repos +git clone --depth 1 https://github.com/trailofbits/semgrep-rules /path/to/static_analysis_semgrep_1/repos/trailofbits ``` ### Run scans: ```bash -semgrep --pro --metrics=off --include="*.py" --config p/python --json -o /path/to/semgrep-results-001/python-python.json --sarif-output=/path/to/semgrep-results-001/python-python.sarif /path/to/codebase & -semgrep --pro --metrics=off --include="*.py" --config p/django --json -o /path/to/semgrep-results-001/python-django.json --sarif-output=/path/to/semgrep-results-001/python-django.sarif /path/to/codebase & -semgrep --pro --metrics=off --config p/security-audit --json -o /path/to/semgrep-results-001/python-security-audit.json --sarif-output=/path/to/semgrep-results-001/python-security-audit.sarif /path/to/codebase & -semgrep --pro --metrics=off --config p/secrets --json -o /path/to/semgrep-results-001/python-secrets.json --sarif-output=/path/to/semgrep-results-001/python-secrets.sarif /path/to/codebase & -semgrep --pro --metrics=off --config /path/to/semgrep-results-001/repos/trailofbits --json -o /path/to/semgrep-results-001/python-trailofbits.json --sarif-output=/path/to/semgrep-results-001/python-trailofbits.sarif /path/to/codebase & +semgrep --pro --metrics=off --include="*.py" --config p/python --json -o /path/to/static_analysis_semgrep_1/raw/python-python.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-python.sarif /path/to/codebase & +semgrep --pro --metrics=off --include="*.py" --config p/django --json -o /path/to/static_analysis_semgrep_1/raw/python-django.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-django.sarif /path/to/codebase & +semgrep --pro --metrics=off --config p/security-audit --json -o /path/to/static_analysis_semgrep_1/raw/python-security-audit.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-security-audit.sarif /path/to/codebase & +semgrep --pro --metrics=off --config p/secrets --json -o /path/to/static_analysis_semgrep_1/raw/python-secrets.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-secrets.sarif /path/to/codebase & +semgrep --pro --metrics=off --config /path/to/static_analysis_semgrep_1/repos/trailofbits --json -o /path/to/static_analysis_semgrep_1/raw/python-trailofbits.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-trailofbits.sarif /path/to/codebase & wait ``` ### Clean up cloned repos: ```bash -rm -rf /path/to/semgrep-results-001/repos +rm -rf /path/to/static_analysis_semgrep_1/repos ``` ## Critical Rules @@ -135,6 +135,6 @@ rm -rf /path/to/semgrep-results-001/repos Report: - Number of findings per ruleset - Any scan errors -- File paths of JSON results +- File paths of JSON results (in raw/ subdirectory) - Note any cross-file findings detected ``` diff --git a/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py b/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py index 10baca4..c9bb0eb 100644 --- a/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py +++ b/plugins/static-analysis/skills/semgrep/scripts/merge_triaged_sarif.py @@ -5,10 +5,11 @@ """Merge SARIF files into a single consolidated output. Usage: - uv run merge_triaged_sarif.py OUTPUT_DIR + uv run merge_triaged_sarif.py RAW_DIR OUTPUT_FILE -Reads *.sarif files from OUTPUT_DIR, produces -OUTPUT_DIR/findings.sarif containing all findings merged. +Reads *.sarif files from RAW_DIR (e.g., $OUTPUT_DIR/raw), produces +OUTPUT_FILE (e.g., $OUTPUT_DIR/results/results.sarif) containing all +findings merged and deduplicated. Attempts to use SARIF Multitool for merging if available, falls back to pure Python implementation. @@ -39,9 +40,8 @@ def has_sarif_multitool() -> bool: return False -def merge_with_multitool(sarif_dir: Path) -> dict | None: +def merge_with_multitool(sarif_files: list[Path]) -> dict | None: """Use SARIF Multitool to merge SARIF files. Returns merged SARIF or None.""" - sarif_files = list(sarif_dir.glob("*.sarif")) if not sarif_files: return None @@ -72,7 +72,7 @@ def merge_with_multitool(sarif_dir: Path) -> dict | None: tmp_path.unlink(missing_ok=True) -def merge_sarif_pure_python(sarif_dir: Path) -> dict: +def merge_sarif_pure_python(sarif_files: list[Path]) -> dict: """Pure Python SARIF merge (fallback).""" merged = { "version": "2.1.0", @@ -85,7 +85,7 @@ def merge_sarif_pure_python(sarif_dir: Path) -> dict: seen_results: set[tuple[str, str, int]] = set() tool_info: dict | None = None - for sarif_file in sorted(sarif_dir.glob("*.sarif")): + for sarif_file in sorted(sarif_files): try: data = json.loads(sarif_file.read_text()) except json.JSONDecodeError as e: @@ -129,40 +129,44 @@ def merge_sarif_pure_python(sarif_dir: Path) -> dict: def main() -> int: - if len(sys.argv) != 2: - print(f"Usage: {sys.argv[0]} OUTPUT_DIR", file=sys.stderr) + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} RAW_DIR OUTPUT_FILE", file=sys.stderr) return 1 - output_dir = Path(sys.argv[1]) - if not output_dir.is_dir(): - print(f"Error: {output_dir} is not a directory", file=sys.stderr) + raw_dir = Path(sys.argv[1]) + output_file = Path(sys.argv[2]) + + if not raw_dir.is_dir(): + print(f"Error: {raw_dir} is not a directory", file=sys.stderr) return 1 - # Count SARIF files - sarif_files = list(output_dir.glob("*.sarif")) - print(f"Found {len(sarif_files)} SARIF files to merge") + # Collect SARIF files from raw directory only + sarif_files = sorted(raw_dir.glob("*.sarif")) + print(f"Found {len(sarif_files)} SARIF files to merge in {raw_dir}") if not sarif_files: print("No SARIF files found, nothing to merge", file=sys.stderr) return 1 + # Ensure output directory exists + output_file.parent.mkdir(parents=True, exist_ok=True) + # Try SARIF Multitool first, fall back to pure Python merged: dict | None = None if has_sarif_multitool(): print("Using SARIF Multitool for merge...") - merged = merge_with_multitool(output_dir) + merged = merge_with_multitool(sarif_files) if merged: print("SARIF Multitool merge successful") if merged is None: print("Using pure Python merge (SARIF Multitool not available or failed)") - merged = merge_sarif_pure_python(output_dir) + merged = merge_sarif_pure_python(sarif_files) result_count = sum(len(run.get("results", [])) for run in merged.get("runs", [])) print(f"Merged SARIF contains {result_count} findings") # Write output - output_file = output_dir / "findings.sarif" output_file.write_text(json.dumps(merged, indent=2)) print(f"Written to {output_file}") diff --git a/plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md b/plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md index 3dff901..5723e8c 100644 --- a/plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md +++ b/plugins/static-analysis/skills/semgrep/workflows/scan-workflow.md @@ -24,10 +24,31 @@ Mark Step 3 as `completed` ONLY after user says "yes", "proceed", "approved", or --- -## Step 1: Detect Languages and Pro Availability +## Step 1: Resolve Output Directory, Detect Languages and Pro Availability > **Entry:** User has specified or confirmed the target directory. -> **Exit:** Language list with file counts produced; Pro availability determined. +> **Exit:** `OUTPUT_DIR` resolved and created; language list with file counts produced; Pro availability determined. + +### Resolve Output Directory + +If the user specified an output directory in their prompt, use it as `OUTPUT_DIR`. Otherwise, auto-increment. In both cases, **always `mkdir -p`** to ensure the directory exists. + +```bash +if [ -n "$USER_SPECIFIED_DIR" ]; then + OUTPUT_DIR="$USER_SPECIFIED_DIR" +else + BASE="static_analysis_semgrep" + N=1 + while [ -e "${BASE}_${N}" ]; do + N=$((N + 1)) + done + OUTPUT_DIR="${BASE}_${N}" +fi +mkdir -p "$OUTPUT_DIR/raw" "$OUTPUT_DIR/results" +echo "Output directory: $OUTPUT_DIR" +``` + +`$OUTPUT_DIR` is used by all subsequent steps. Pass its **absolute path** to scanner subagents. Scanners write raw output to `$OUTPUT_DIR/raw/`; merged/filtered results go to `$OUTPUT_DIR/results/`. **Detect Pro availability** (requires Bash): @@ -119,7 +140,7 @@ Present plan to user with **explicit ruleset listing**: ## Semgrep Scan Plan **Target:** /path/to/codebase -**Output directory:** ./semgrep-results-001/ +**Output directory:** $OUTPUT_DIR **Engine:** Semgrep Pro (cross-file analysis) | Semgrep OSS (single-file) **Scan mode:** Run all | Important only (security vulns, medium-high confidence/impact) @@ -176,22 +197,34 @@ Before marking Step 3 complete: - [ ] **Final ruleset list captured for Step 4** - [ ] Agent type listed: `static-analysis:semgrep-scanner` +### Log Approved Rulesets + +After approval, write the approved rulesets to `$OUTPUT_DIR/rulesets.txt`: + +```bash +cat > "$OUTPUT_DIR/rulesets.txt" << 'RULESETS' +# Semgrep Scan — Approved Rulesets +# Generated: $(date -Iseconds) +# Scan mode: + +## Rulesets: + +p/security-audit +p/secrets +p/python +p/django +https://github.com/trailofbits/semgrep-rules +RULESETS +``` + --- ## Step 4: Spawn Parallel Scan Tasks > **Entry:** Step 3 approved — user explicitly confirmed the plan. -> **Exit:** All scan Tasks completed; result files exist in output directory. - -**Create output directory** with run number to avoid collisions: +> **Exit:** All scan Tasks completed; result files exist in `$OUTPUT_DIR/raw/`. -```bash -LAST=$(ls -d semgrep-results-[0-9][0-9][0-9] 2>/dev/null | sort | tail -1 | grep -o '[0-9]*$' || true) -NEXT_NUM=$(printf "%03d" $(( ${LAST:-0} + 1 ))) -OUTPUT_DIR="semgrep-results-${NEXT_NUM}" -mkdir -p "$OUTPUT_DIR" -echo "Output directory: $OUTPUT_DIR" -``` +**Use `$OUTPUT_DIR` resolved in Step 1.** It already exists; no need to create it again. Scanners write all output to `$OUTPUT_DIR/raw/`. **Spawn N Tasks in a SINGLE message** (one per language category) using `subagent_type: static-analysis:semgrep-scanner`. @@ -205,15 +238,15 @@ Use the scanner task prompt template from [scanner-task-prompt.md](../references Spawn these 3 Tasks in a SINGLE message: -1. **Task: Python Scanner** — Rulesets: p/python, p/django, p/security-audit, p/secrets, trailofbits → `semgrep-results-001/python-*.json` -2. **Task: JavaScript Scanner** — Rulesets: p/javascript, p/react, p/nodejs, p/security-audit, p/secrets, trailofbits → `semgrep-results-001/js-*.json` -3. **Task: Docker Scanner** — Rulesets: p/dockerfile → `semgrep-results-001/docker-*.json` +1. **Task: Python Scanner** — Rulesets: p/python, p/django, p/security-audit, p/secrets, trailofbits → `$OUTPUT_DIR/raw/python-*.json` +2. **Task: JavaScript Scanner** — Rulesets: p/javascript, p/react, p/nodejs, p/security-audit, p/secrets, trailofbits → `$OUTPUT_DIR/raw/js-*.json` +3. **Task: Docker Scanner** — Rulesets: p/dockerfile → `$OUTPUT_DIR/raw/docker-*.json` ### Operational Notes - Always use **absolute paths** for `[TARGET]` — subagents can't resolve relative paths -- Clone GitHub URL rulesets into `[OUTPUT_DIR]/repos/` — never pass URLs directly to `--config` (semgrep's URL handling fails on repos with non-standard YAML) -- Delete `[OUTPUT_DIR]/repos/` after all scans complete +- Clone GitHub URL rulesets into `$OUTPUT_DIR/repos/` — never pass URLs directly to `--config` (semgrep's URL handling fails on repos with non-standard YAML) +- Delete `$OUTPUT_DIR/repos/` after all scans complete - Run rulesets in parallel with `&` and `wait`, not sequentially - Use `--include="*.py"` for language-specific rulesets, but NOT for cross-language rulesets (p/security-audit, p/secrets, third-party repos) @@ -222,20 +255,23 @@ Spawn these 3 Tasks in a SINGLE message: ## Step 5: Merge Results and Report > **Entry:** Step 4 complete — all scan Tasks finished. -> **Exit:** `findings.sarif` exists in output directory and is valid JSON. +> **Exit:** `results.sarif` exists in `$OUTPUT_DIR/results/` and is valid JSON. -**Important-only mode: Post-filter before merge.** Apply the filter from [scan-modes.md](../references/scan-modes.md) ("Filter All Result Files in a Directory" section) to each result JSON. +**Important-only mode: Post-filter before merge.** Apply the filter from [scan-modes.md](../references/scan-modes.md) ("Filter All Result Files in a Directory" section) to each result JSON in `$OUTPUT_DIR/raw/`. The filter creates `*-important.json` files alongside the originals — the originals are preserved unmodified. **Generate merged SARIF** using the merge script. The resolved path is in SKILL.md's "Merge command" section — use that exact path: ```bash -uv run {baseDir}/scripts/merge_triaged_sarif.py [OUTPUT_DIR] +uv run {baseDir}/scripts/merge_triaged_sarif.py $OUTPUT_DIR/raw $OUTPUT_DIR/results/results.sarif ``` +- **Run-all mode:** The script merges all `*.sarif` files from `$OUTPUT_DIR/raw/`. +- **Important-only mode:** Run the post-filter first (creates `*-important.json` in `raw/`), then run the merge script. Raw SARIF files are unaffected by the JSON post-filter, so the merge operates on the unfiltered SARIF. For SARIF-level filtering, apply the jq post-filter from scan-modes.md to `$OUTPUT_DIR/results/results.sarif` after merge. + **Verify merged SARIF is valid:** ```bash -python -c "import json; d=json.load(open('[OUTPUT_DIR]/findings.sarif')); print(f'{sum(len(r.get(\"results\",[]))for r in d.get(\"runs\",[]))} findings in merged SARIF')" +python -c "import json; d=json.load(open('$OUTPUT_DIR/results/results.sarif')); print(f'{sum(len(r.get(\"results\",[]))for r in d.get(\"runs\",[]))} findings in merged SARIF')" ``` If verification fails, the merge script produced invalid output — investigate before reporting. @@ -262,9 +298,9 @@ If verification fails, the merge script produced invalid output — investigate - Code quality: 8 Results written to: -- semgrep-results-001/findings.sarif (merged SARIF) -- semgrep-results-001/*.json (raw scan results per ruleset) -- semgrep-results-001/*.sarif (raw SARIF per ruleset) +- $OUTPUT_DIR/results/results.sarif (merged SARIF) +- $OUTPUT_DIR/raw/ (per-scan raw results, unfiltered) +- $OUTPUT_DIR/rulesets.txt (approved rulesets) ``` -**Verify** before reporting: confirm `findings.sarif` exists and is valid JSON. +**Verify** before reporting: confirm `results.sarif` exists and is valid JSON.