From 8229b2e553dda2a07eaa744fff8558faf8e196ed Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 06:51:45 +0000
Subject: [PATCH 1/5] Initial plan


From 5f09a9e35c8c0c421b0e09d7020ed2c66ffc4027 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 07:12:50 +0000
Subject: [PATCH 2/5] docs: add guide for consuming audit reports with an agent

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/e0e43f13-47ea-4037-997f-40a0fa71e6d8

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 docs/astro.config.mjs                         |   1 +
 .../content/docs/guides/audit-with-agents.md  | 238 ++++++++++++++++++
 pkg/agentdrain/data/default_weights.json      |  56 +----
 3 files changed, 248 insertions(+), 47 deletions(-)
 create mode 100644 docs/src/content/docs/guides/audit-with-agents.md
diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs
index fd093eb9b07..6b6fdb600c3 100644
--- a/docs/astro.config.mjs
+++ b/docs/astro.config.mjs
@@ -264,6 +264,7 @@ export default defineConfig({
 						{ label: 'Self-Hosted Runners', link: '/guides/self-hosted-runners/' },
 						{ label: 'Ephemerals', link: '/guides/ephemerals/' },
 						{ label: 'Web Search', link: '/guides/web-search/' },
+						{ label: 'Audit Reports with Agents', link: '/guides/audit-with-agents/' },
 					],
 				},
 				{
diff --git a/docs/src/content/docs/guides/audit-with-agents.md b/docs/src/content/docs/guides/audit-with-agents.md
new file mode 100644
index 00000000000..fd57c51077e
--- /dev/null
+++ b/docs/src/content/docs/guides/audit-with-agents.md
@@ -0,0 +1,238 @@
+---
+title: Consuming Audit Reports with Agents
+description: How to feed structured audit output into agentic workflows for automated triage, trend analysis, and remediation.
+---
+
+The audit commands produce structured JSON that agents can consume programmatically for automated triage, cost monitoring, and incident response. This guide shows how to connect audit data to workflow agents.
+
+## Getting structured audit data
+
+All three audit commands support `--json`, which writes structured output to stdout:
+
+```bash
+# Single run audit
+gh aw audit <run-id> --json
+
+# Cross-run analysis
+gh aw logs [workflow] --last 10 --json
+
+# Before/after comparison
+gh aw audit diff <run-id-1> <run-id-2> --json
+```
+
+### Key fields for agent consumption
+
+| Field | Description |
+|-------|-------------|
+| `key_findings` | Categorized issues with severity and impact |
+| `recommendations` | Prioritized actions with example fixes |
+| `firewall_analysis` | Network request stats per domain |
+| `mcp_tool_usage` | Per-tool invocation counts and error rates |
+| `metrics` | Token usage, estimated cost, and run duration |
+| `errors` / `warnings` | Structured error details with file and line |
+
+Use `jq` to extract only the fields an agent needs before passing to a model:
+
+```bash
+# Key findings and recommendations only
+gh aw audit <run-id> --json | jq '{findings: .key_findings, recommendations: .recommendations}'
+
+# Domains that were blocked
+gh aw audit <run-id> --json | jq '.firewall_analysis.domains[] | select(.blocked > 0)'
+
+# MCP tools with errors
+gh aw audit <run-id> --json | jq '.mcp_tool_usage.summary[] | select(.error_count > 0)'
+```
+
+For cross-run reports, extract the fields relevant to trend analysis:
+
+```bash
+# Per-run cost and token data
+gh aw logs my-workflow --last 10 --json | jq '.per_run_breakdown[] | {run_id, cost, tokens, turns}'
+
+# Domain inventory showing policy status across runs
+gh aw logs my-workflow --last 10 --json | jq '.domain_inventory[] | {domain, overall_status, seen_in_runs}'
+```
+
+## Feeding audit data into a workflow agent
+
+### Post findings as a review comment
+
+This workflow runs after each completed agent run and posts audit findings as a pull request comment:
+
+```aw wrap
+---
+description: Post audit findings as a PR comment after each agent run
+on:
+  workflow_run:
+    workflows: ['my-workflow']
+    types: [completed]
+engine: copilot
+tools:
+  github:
+    toolsets: [pull_requests]
+  agentic-workflows:
+permissions:
+  contents: read
+  actions: read
+  pull-requests: write
+---
+
+# Summarize Audit Findings
+
+Run ID: ${{ github.event.workflow_run.id }}
+
+1. Fetch the audit report for run ${{ github.event.workflow_run.id }} using the `audit` tool
+2. Identify the pull request that triggered this workflow run
+3. Post a comment summarizing the key findings, any blocked domains, and MCP tool errors
+4. Highlight critical issues (severity: high or error) that need immediate attention
+5. If there are no findings, post a brief "no issues found" comment
+```
+
+### Detect regressions with diff
+
+This workflow compares a baseline run against a current run and opens an issue if regressions are found:
+
+```aw wrap
+---
+description: Detect regressions between two workflow runs
+on:
+  workflow_dispatch:
+    inputs:
+      base_run_id:
+        description: 'Baseline run ID'
+        required: true
+      current_run_id:
+        description: 'Current run ID to compare'
+        required: true
+engine: copilot
+tools:
+  github:
+    toolsets: [issues]
+  agentic-workflows:
+permissions:
+  contents: read
+  actions: read
+  issues: write
+---
+
+# Regression Detection
+
+Compare run ${{ inputs.base_run_id }} (baseline) against ${{ inputs.current_run_id }} (current).
+
+1. Run `gh aw audit diff ${{ inputs.base_run_id }} ${{ inputs.current_run_id }} --json` using the shell tool
+2. Check for: new blocked domains, increased MCP error rates, cost increase > 20%, or token usage increase > 50%
+3. If regressions are found, open a GitHub issue titled "Regression detected in [workflow name]" with:
+   - A table of changes from `run_metrics_diff`
+   - List of new or changed domains from `firewall_diff`
+   - Affected MCP tools from `mcp_tools_diff`
+4. If no regressions are found, output a summary confirming stable behavior
+```
+
+### Auto-file issues from audit findings
+
+This workflow runs `gh aw audit` after each agent run and creates GitHub issues for high-severity findings:
+
+```aw wrap
+---
+description: File GitHub issues for high-severity audit findings
+on:
+  workflow_run:
+    workflows: ['my-workflow']
+    types: [completed]
+engine: copilot
+tools:
+  github:
+    toolsets: [issues]
+  agentic-workflows:
+permissions:
+  contents: read
+  actions: read
+  issues: write
+---
+
+# Auto-File Issues for Critical Findings
+
+Run ID: ${{ github.event.workflow_run.id }}
+
+1. Fetch the audit report for run ${{ github.event.workflow_run.id }} using the `audit` tool
+2. Filter `key_findings` for entries with severity `high` or `critical`
+3. For each critical finding, check if a GitHub issue with the same title already exists
+4. If no duplicate exists, create an issue with:
+   - Title: the finding title
+   - Body: description, impact, and recommendations from the audit report
+   - Label: `audit-finding`
+5. If no critical findings, call the `noop` safe output tool
+```
+
+## Building an audit monitoring agent
+
+This full example monitors a workflow over time, detecting cost spikes, new blocked domains, and error rate increases, then posts a weekly digest:
+
+```aw wrap
+---
+description: Weekly audit digest with trend analysis
+on:
+  schedule: weekly
+engine: copilot
+tools:
+  github:
+    toolsets: [discussions]
+  agentic-workflows:
+  cache-memory:
+    key: audit-monitoring-trends
+permissions:
+  contents: read
+  actions: read
+  discussions: write
+---
+
+# Weekly Audit Monitoring Digest
+
+Workflow to monitor: my-workflow
+
+## Step 1: Collect data
+
+Run `gh aw logs my-workflow --last 10 --json` using the shell tool and capture the output.
+
+## Step 2: Load previous trends
+
+Read `/tmp/gh-aw/cache-memory/audit-trends.json` if it exists (previous week's baseline).
+
+## Step 3: Analyze trends
+
+Compare current data against the baseline to detect:
+
+- **Cost spikes**: runs where `cost > 2× average` (indicated by `cost_spike: true` in `per_run_breakdown`)
+- **New blocked domains**: domains in `domain_inventory` with `overall_status: denied` not present in the baseline
+- **MCP reliability**: servers in `mcp_health` with `error_rate > 0.10` or `unreliable: true`
+- **Error trend**: check if `error_trend.runs_with_errors` is increasing week-over-week
+
+## Step 4: Post discussion
+
+Create a GitHub discussion titled "Audit Digest — [date]" with:
+- Executive summary: runs analyzed, total cost, avg tokens, overall deny rate
+- Anomalies table: any spikes or new blocked domains
+- MCP health table: servers with elevated error rates
+- Trend direction (improving / stable / degrading) based on comparison
+
+## Step 5: Update cache
+
+Write updated aggregate metrics to `/tmp/gh-aw/cache-memory/audit-trends.json`:
+- Use filesystem-safe timestamps (YYYY-MM-DD, not ISO 8601 with colons)
+- Store rolling averages for cost, tokens, error count, and deny rate
+- Keep only the last 30 days of data to limit cache size
+```
+
+> [!TIP]
+> Store aggregate metrics (rolling averages, domain counts) in `cache-memory` rather than full audit JSON. Full cross-run reports can be large; caching only the summary fields keeps well within GitHub Actions cache limits.
+
+## Tips
+
+**JSON schema stability**: The top-level fields (`key_findings`, `recommendations`, `metrics`, `firewall_analysis`, `mcp_tool_usage`) are stable. Nested sub-fields may be extended in minor releases but are not removed without deprecation. Pin your `jq` filters to the fields you rely on and treat unknown fields as optional.
+
+**Combining with `--parse`**: Add `--parse` to run log parsers before generating JSON output. This populates `behavior_fingerprint` and `agentic_assessments`, which give agents richer context for behavioral analysis and pattern detection.
+
+**Before/after optimization**: Use `gh aw audit diff` in optimization workflows to verify that prompt or configuration changes reduced cost and domain access without introducing new errors. The `run_metrics_diff.cost_change` and `run_metrics_diff.token_usage_change` fields give direct before/after comparisons.
+
+**Filtering for context windows**: Cross-run JSON from `gh aw logs --json` can be large. Extract only the fields your agent needs — for example, `per_run_breakdown` for cost tracking or `domain_inventory` for firewall policy analysis — before passing to a model with a limited context window.
diff --git a/pkg/agentdrain/data/default_weights.json b/pkg/agentdrain/data/default_weights.json
index 963f9eae348..3c1f2ab812a 100644
--- a/pkg/agentdrain/data/default_weights.json
+++ b/pkg/agentdrain/data/default_weights.json
@@ -3,12 +3,7 @@
     "clusters": null,
     "config": {
       "Depth": 4,
-      "ExcludeFields": [
-        "session_id",
-        "trace_id",
-        "span_id",
-        "timestamp"
-      ],
+      "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"],
       "MaskRules": [
         {
           "Name": "uuid",
@@ -54,21 +49,12 @@
         "id": 1,
         "size": 100,
         "stage": "finish",
-        "template": [
-          "stage=finish",
-          "\u003c*\u003e",
-          "tokens=\u003cNUM\u003e"
-        ]
+        "template": ["stage=finish", "\u003c*\u003e", "tokens=\u003cNUM\u003e"]
       }
     ],
     "config": {
       "Depth": 4,
-      "ExcludeFields": [
-        "session_id",
-        "trace_id",
-        "span_id",
-        "timestamp"
-      ],
+      "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"],
       "MaskRules": [
         {
           "Name": "uuid",
@@ -114,21 +100,12 @@
         "id": 1,
         "size": 17,
         "stage": "plan",
-        "template": [
-          "stage=plan",
-          "errors=\u003cNUM\u003e",
-          "turns=\u003cNUM\u003e"
-        ]
+        "template": ["stage=plan", "errors=\u003cNUM\u003e", "turns=\u003cNUM\u003e"]
       }
     ],
     "config": {
       "Depth": 4,
-      "ExcludeFields": [
-        "session_id",
-        "trace_id",
-        "span_id",
-        "timestamp"
-      ],
+      "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"],
       "MaskRules": [
         {
           "Name": "uuid",
@@ -172,12 +149,7 @@
     "clusters": null,
     "config": {
       "Depth": 4,
-      "ExcludeFields": [
-        "session_id",
-        "trace_id",
-        "span_id",
-        "timestamp"
-      ],
+      "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"],
       "MaskRules": [
         {
           "Name": "uuid",
@@ -221,12 +193,7 @@
     "clusters": null,
     "config": {
       "Depth": 4,
-      "ExcludeFields": [
-        "session_id",
-        "trace_id",
-        "span_id",
-        "timestamp"
-      ],
+      "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"],
       "MaskRules": [
         {
           "Name": "uuid",
@@ -564,12 +531,7 @@
     ],
     "config": {
       "Depth": 4,
-      "ExcludeFields": [
-        "session_id",
-        "trace_id",
-        "span_id",
-        "timestamp"
-      ],
+      "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"],
       "MaskRules": [
         {
           "Name": "uuid",
@@ -609,4 +571,4 @@
     },
     "next_id": 8
   }
-}
\ No newline at end of file
+}

From 6a99e4b176589327d83246a2f4beac17257c5ab4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 07:22:26 +0000
Subject: [PATCH 3/5] docs: unbloat audit-with-agents guide

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/e488f891-67d4-4110-9131-7eace4ba470f

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 .../content/docs/guides/audit-with-agents.md  | 140 +++---------------
 1 file changed, 19 insertions(+), 121 deletions(-)

diff --git a/docs/src/content/docs/guides/audit-with-agents.md b/docs/src/content/docs/guides/audit-with-agents.md
index fd57c51077e..06293c075e2 100644
--- a/docs/src/content/docs/guides/audit-with-agents.md
+++ b/docs/src/content/docs/guides/audit-with-agents.md
@@ -3,62 +3,23 @@ title: Consuming Audit Reports with Agents
 description: How to feed structured audit output into agentic workflows for automated triage, trend analysis, and remediation.
 ---
 
-The audit commands produce structured JSON that agents can consume programmatically for automated triage, cost monitoring, and incident response. This guide shows how to connect audit data to workflow agents.
-
-## Getting structured audit data
-
-All three audit commands support `--json`, which writes structured output to stdout:
+All three audit commands support `--json`, which writes structured output to stdout.
 
 ```bash
-# Single run audit
-gh aw audit <run-id> --json
-
-# Cross-run analysis
-gh aw logs [workflow] --last 10 --json
-
-# Before/after comparison
-gh aw audit diff <run-id-1> <run-id-2> --json
+gh aw audit <run-id> --json                         # single run
+gh aw logs [workflow] --last 10 --json              # cross-run analysis
+gh aw audit diff <run-id-1> <run-id-2> --json       # before/after comparison
 ```
 
-### Key fields for agent consumption
-
-| Field | Description |
-|-------|-------------|
-| `key_findings` | Categorized issues with severity and impact |
-| `recommendations` | Prioritized actions with example fixes |
-| `firewall_analysis` | Network request stats per domain |
-| `mcp_tool_usage` | Per-tool invocation counts and error rates |
-| `metrics` | Token usage, estimated cost, and run duration |
-| `errors` / `warnings` | Structured error details with file and line |
-
-Use `jq` to extract only the fields an agent needs before passing to a model:
+Key fields for agent consumption: `key_findings`, `recommendations`, `firewall_analysis`, `mcp_tool_usage`, `metrics`, `errors`. Use `jq` to extract only what the model needs:
 
 ```bash
-# Key findings and recommendations only
 gh aw audit <run-id> --json | jq '{findings: .key_findings, recommendations: .recommendations}'
-
-# Domains that were blocked
 gh aw audit <run-id> --json | jq '.firewall_analysis.domains[] | select(.blocked > 0)'
-
-# MCP tools with errors
-gh aw audit <run-id> --json | jq '.mcp_tool_usage.summary[] | select(.error_count > 0)'
-```
-
-For cross-run reports, extract the fields relevant to trend analysis:
-
-```bash
-# Per-run cost and token data
 gh aw logs my-workflow --last 10 --json | jq '.per_run_breakdown[] | {run_id, cost, tokens, turns}'
-
-# Domain inventory showing policy status across runs
-gh aw logs my-workflow --last 10 --json | jq '.domain_inventory[] | {domain, overall_status, seen_in_runs}'
 ```
 
-## Feeding audit data into a workflow agent
-
-### Post findings as a review comment
-
-This workflow runs after each completed agent run and posts audit findings as a pull request comment:
+## Posting findings as a PR comment
 
 ```aw wrap
 ---
@@ -80,18 +41,10 @@ permissions:
 
 # Summarize Audit Findings
 
-Run ID: ${{ github.event.workflow_run.id }}
-
-1. Fetch the audit report for run ${{ github.event.workflow_run.id }} using the `audit` tool
-2. Identify the pull request that triggered this workflow run
-3. Post a comment summarizing the key findings, any blocked domains, and MCP tool errors
-4. Highlight critical issues (severity: high or error) that need immediate attention
-5. If there are no findings, post a brief "no issues found" comment
+Fetch the audit report for run ${{ github.event.workflow_run.id }}, identify the pull request that triggered it, and post a comment summarizing key findings and blocked domains. Highlight issues with severity `high` or `critical`. If there are no findings, post a brief "no issues found" comment.
 ```
 
-### Detect regressions with diff
-
-This workflow compares a baseline run against a current run and opens an issue if regressions are found:
+## Detecting regressions with diff
 
 ```aw wrap
 ---
@@ -118,20 +71,10 @@ permissions:
 
 # Regression Detection
 
-Compare run ${{ inputs.base_run_id }} (baseline) against ${{ inputs.current_run_id }} (current).
-
-1. Run `gh aw audit diff ${{ inputs.base_run_id }} ${{ inputs.current_run_id }} --json` using the shell tool
-2. Check for: new blocked domains, increased MCP error rates, cost increase > 20%, or token usage increase > 50%
-3. If regressions are found, open a GitHub issue titled "Regression detected in [workflow name]" with:
-   - A table of changes from `run_metrics_diff`
-   - List of new or changed domains from `firewall_diff`
-   - Affected MCP tools from `mcp_tools_diff`
-4. If no regressions are found, output a summary confirming stable behavior
+Run `gh aw audit diff ${{ inputs.base_run_id }} ${{ inputs.current_run_id }} --json`. Check for new blocked domains, increased MCP error rates, cost increase > 20%, or token usage increase > 50%. If regressions are found, open a GitHub issue with a table from `run_metrics_diff`, affected domains from `firewall_diff`, and affected MCP tools from `mcp_tools_diff`.
 ```
 
-### Auto-file issues from audit findings
-
-This workflow runs `gh aw audit` after each agent run and creates GitHub issues for high-severity findings:
+## Filing issues from audit findings
 
 ```aw wrap
 ---
@@ -153,21 +96,10 @@ permissions:
 
 # Auto-File Issues for Critical Findings
 
-Run ID: ${{ github.event.workflow_run.id }}
-
-1. Fetch the audit report for run ${{ github.event.workflow_run.id }} using the `audit` tool
-2. Filter `key_findings` for entries with severity `high` or `critical`
-3. For each critical finding, check if a GitHub issue with the same title already exists
-4. If no duplicate exists, create an issue with:
-   - Title: the finding title
-   - Body: description, impact, and recommendations from the audit report
-   - Label: `audit-finding`
-5. If no critical findings, call the `noop` safe output tool
+Fetch the audit report for run ${{ github.event.workflow_run.id }}. Filter `key_findings` for severity `high` or `critical`. For each finding without a matching open issue, create one with the finding title, description, impact, and recommendations, labelled `audit-finding`. If no critical findings, call the `noop` safe output tool.
 ```
 
-## Building an audit monitoring agent
-
-This full example monitors a workflow over time, detecting cost spikes, new blocked domains, and error rate increases, then posts a weekly digest:
+## Weekly audit monitoring agent
 
 ```aw wrap
 ---
@@ -189,50 +121,16 @@ permissions:
 
 # Weekly Audit Monitoring Digest
 
-Workflow to monitor: my-workflow
-
-## Step 1: Collect data
-
-Run `gh aw logs my-workflow --last 10 --json` using the shell tool and capture the output.
-
-## Step 2: Load previous trends
-
-Read `/tmp/gh-aw/cache-memory/audit-trends.json` if it exists (previous week's baseline).
-
-## Step 3: Analyze trends
-
-Compare current data against the baseline to detect:
-
-- **Cost spikes**: runs where `cost > 2× average` (indicated by `cost_spike: true` in `per_run_breakdown`)
-- **New blocked domains**: domains in `domain_inventory` with `overall_status: denied` not present in the baseline
-- **MCP reliability**: servers in `mcp_health` with `error_rate > 0.10` or `unreliable: true`
-- **Error trend**: check if `error_trend.runs_with_errors` is increasing week-over-week
-
-## Step 4: Post discussion
-
-Create a GitHub discussion titled "Audit Digest — [date]" with:
-- Executive summary: runs analyzed, total cost, avg tokens, overall deny rate
-- Anomalies table: any spikes or new blocked domains
-- MCP health table: servers with elevated error rates
-- Trend direction (improving / stable / degrading) based on comparison
-
-## Step 5: Update cache
-
-Write updated aggregate metrics to `/tmp/gh-aw/cache-memory/audit-trends.json`:
-- Use filesystem-safe timestamps (YYYY-MM-DD, not ISO 8601 with colons)
-- Store rolling averages for cost, tokens, error count, and deny rate
-- Keep only the last 30 days of data to limit cache size
+1. Run `gh aw logs my-workflow --last 10 --json` and read `/tmp/gh-aw/cache-memory/audit-trends.json` as the previous baseline.
+2. Detect: cost spikes (`cost_spike: true` in `per_run_breakdown`), new denied domains in `domain_inventory`, MCP servers with `error_rate > 0.10` or `unreliable: true`, and week-over-week changes in `error_trend.runs_with_errors`.
+3. Create a GitHub discussion "Audit Digest — [YYYY-MM-DD]" with an executive summary, anomalies table, and MCP health table.
+4. Update `/tmp/gh-aw/cache-memory/audit-trends.json` with rolling averages (cost, tokens, error count, deny rate), keeping only the last 30 days.
 ```
 
-> [!TIP]
-> Store aggregate metrics (rolling averages, domain counts) in `cache-memory` rather than full audit JSON. Full cross-run reports can be large; caching only the summary fields keeps well within GitHub Actions cache limits.
-
 ## Tips
 
-**JSON schema stability**: The top-level fields (`key_findings`, `recommendations`, `metrics`, `firewall_analysis`, `mcp_tool_usage`) are stable. Nested sub-fields may be extended in minor releases but are not removed without deprecation. Pin your `jq` filters to the fields you rely on and treat unknown fields as optional.
-
-**Combining with `--parse`**: Add `--parse` to run log parsers before generating JSON output. This populates `behavior_fingerprint` and `agentic_assessments`, which give agents richer context for behavioral analysis and pattern detection.
+The top-level fields (`key_findings`, `recommendations`, `metrics`, `firewall_analysis`, `mcp_tool_usage`) are stable across releases; nested sub-fields may be extended but are not removed without deprecation.
 
-**Before/after optimization**: Use `gh aw audit diff` in optimization workflows to verify that prompt or configuration changes reduced cost and domain access without introducing new errors. The `run_metrics_diff.cost_change` and `run_metrics_diff.token_usage_change` fields give direct before/after comparisons.
+Add `--parse` to populate `behavior_fingerprint` and `agentic_assessments` for richer behavioral context.
 
-**Filtering for context windows**: Cross-run JSON from `gh aw logs --json` can be large. Extract only the fields your agent needs — for example, `per_run_breakdown` for cost tracking or `domain_inventory` for firewall policy analysis — before passing to a model with a limited context window.
+Cross-run JSON from `gh aw logs --json` can be large — extract only the fields needed (e.g. `per_run_breakdown`, `domain_inventory`) before passing to a model.

From a79c21ef7c65b7e03d64998a03988bafd74dae70 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 13:20:59 +0000
Subject: [PATCH 4/5] chore: merge main and unbloat audit guide further

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c40af69e-56b6-4b2b-9e3b-2085d6fbf650

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 .../content/docs/guides/audit-with-agents.md  | 26 +++++--------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/docs/src/content/docs/guides/audit-with-agents.md b/docs/src/content/docs/guides/audit-with-agents.md
index 06293c075e2..6ee56512722 100644
--- a/docs/src/content/docs/guides/audit-with-agents.md
+++ b/docs/src/content/docs/guides/audit-with-agents.md
@@ -3,21 +3,13 @@ title: Consuming Audit Reports with Agents
 description: How to feed structured audit output into agentic workflows for automated triage, trend analysis, and remediation.
 ---
 
-All three audit commands support `--json`, which writes structured output to stdout.
+All three audit commands accept `--json` to write structured output to stdout. Pipe through `jq` to extract the fields a model needs before passing to a workflow.
 
-```bash
-gh aw audit <run-id> --json                         # single run
-gh aw logs [workflow] --last 10 --json              # cross-run analysis
-gh aw audit diff <run-id-1> <run-id-2> --json       # before/after comparison
-```
-
-Key fields for agent consumption: `key_findings`, `recommendations`, `firewall_analysis`, `mcp_tool_usage`, `metrics`, `errors`. Use `jq` to extract only what the model needs:
-
-```bash
-gh aw audit <run-id> --json | jq '{findings: .key_findings, recommendations: .recommendations}'
-gh aw audit <run-id> --json | jq '.firewall_analysis.domains[] | select(.blocked > 0)'
-gh aw logs my-workflow --last 10 --json | jq '.per_run_breakdown[] | {run_id, cost, tokens, turns}'
-```
+| Command | Use case |
+|---------|----------|
+| `gh aw audit <run-id> --json` | Single run — `key_findings`, `recommendations`, `metrics` |
+| `gh aw logs [workflow] --last 10 --json` | Trend analysis — `per_run_breakdown`, `domain_inventory` |
+| `gh aw audit diff <id1> <id2> --json` | Before/after — `run_metrics_diff`, `firewall_diff` |
 
 ## Posting findings as a PR comment
 
@@ -129,8 +121,4 @@ permissions:
 
 ## Tips
 
-The top-level fields (`key_findings`, `recommendations`, `metrics`, `firewall_analysis`, `mcp_tool_usage`) are stable across releases; nested sub-fields may be extended but are not removed without deprecation.
-
-Add `--parse` to populate `behavior_fingerprint` and `agentic_assessments` for richer behavioral context.
-
-Cross-run JSON from `gh aw logs --json` can be large — extract only the fields needed (e.g. `per_run_breakdown`, `domain_inventory`) before passing to a model.
+Top-level fields (`key_findings`, `recommendations`, `metrics`, `firewall_analysis`, `mcp_tool_usage`) are stable; nested sub-fields may be extended but are not removed without deprecation. Add `--parse` to populate `behavior_fingerprint` and `agentic_assessments`. Cross-run JSON can be large — extract only the slices your model needs.

From 1ccc24fe0bf34d38ce3b2117a84d09a6d4aa5af4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 4 Apr 2026 13:28:48 +0000
Subject: [PATCH 5/5] docs: use agentic-workflows MCP tool in audit guide
 prompts

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/a3f17d4f-e05e-4619-918a-75d428984ea6

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 docs/src/content/docs/guides/audit-with-agents.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/src/content/docs/guides/audit-with-agents.md b/docs/src/content/docs/guides/audit-with-agents.md
index 6ee56512722..2e708e05ee5 100644
--- a/docs/src/content/docs/guides/audit-with-agents.md
+++ b/docs/src/content/docs/guides/audit-with-agents.md
@@ -3,7 +3,7 @@ title: Consuming Audit Reports with Agents
 description: How to feed structured audit output into agentic workflows for automated triage, trend analysis, and remediation.
 ---
 
-All three audit commands accept `--json` to write structured output to stdout. Pipe through `jq` to extract the fields a model needs before passing to a workflow.
+When running locally, all three audit commands accept `--json` to write structured output to stdout. Pipe through `jq` to extract the fields a model needs.
 
 | Command | Use case |
 |---------|----------|
@@ -11,6 +11,8 @@ All three audit commands accept `--json` to write structured output to stdout. P
 | `gh aw logs [workflow] --last 10 --json` | Trend analysis — `per_run_breakdown`, `domain_inventory` |
 | `gh aw audit diff <id1> <id2> --json` | Before/after — `run_metrics_diff`, `firewall_diff` |
 
+Inside GitHub Actions workflows, agents access these commands through the `agentic-workflows` MCP tool rather than calling the CLI directly.
+
 ## Posting findings as a PR comment
 
 ```aw wrap
@@ -33,7 +35,7 @@ permissions:
 
 # Summarize Audit Findings
 
-Fetch the audit report for run ${{ github.event.workflow_run.id }}, identify the pull request that triggered it, and post a comment summarizing key findings and blocked domains. Highlight issues with severity `high` or `critical`. If there are no findings, post a brief "no issues found" comment.
+Use the `agentic-workflows` MCP tool `audit` with run ID ${{ github.event.workflow_run.id }}, identify the pull request that triggered it, and post a comment summarizing key findings and blocked domains. Highlight issues with severity `high` or `critical`. If there are no findings, post a brief "no issues found" comment.
 ```
 
 ## Detecting regressions with diff
@@ -63,7 +65,7 @@ permissions:
 
 # Regression Detection
 
-Run `gh aw audit diff ${{ inputs.base_run_id }} ${{ inputs.current_run_id }} --json`. Check for new blocked domains, increased MCP error rates, cost increase > 20%, or token usage increase > 50%. If regressions are found, open a GitHub issue with a table from `run_metrics_diff`, affected domains from `firewall_diff`, and affected MCP tools from `mcp_tools_diff`.
+Use the `agentic-workflows` MCP tool `audit diff` with base run ID ${{ inputs.base_run_id }} and current run ID ${{ inputs.current_run_id }}. Check for new blocked domains, increased MCP error rates, cost increase > 20%, or token usage increase > 50%. If regressions are found, open a GitHub issue with a table from `run_metrics_diff`, affected domains from `firewall_diff`, and affected MCP tools from `mcp_tools_diff`.
 ```
 
 ## Filing issues from audit findings
@@ -88,7 +90,7 @@ permissions:
 
 # Auto-File Issues for Critical Findings
 
-Fetch the audit report for run ${{ github.event.workflow_run.id }}. Filter `key_findings` for severity `high` or `critical`. For each finding without a matching open issue, create one with the finding title, description, impact, and recommendations, labelled `audit-finding`. If no critical findings, call the `noop` safe output tool.
+Use the `agentic-workflows` MCP tool `audit` with run ID ${{ github.event.workflow_run.id }}. Filter `key_findings` for severity `high` or `critical`. For each finding without a matching open issue, create one with the finding title, description, impact, and recommendations, labelled `audit-finding`. If no critical findings, call the `noop` safe output tool.
 ```
 
 ## Weekly audit monitoring agent
@@ -113,7 +115,7 @@ permissions:
 
 # Weekly Audit Monitoring Digest
 
-1. Run `gh aw logs my-workflow --last 10 --json` and read `/tmp/gh-aw/cache-memory/audit-trends.json` as the previous baseline.
+1. Use the `agentic-workflows` MCP tool `logs` with parameters `workflow: my-workflow, last: 10` and read `/tmp/gh-aw/cache-memory/audit-trends.json` as the previous baseline.
 2. Detect: cost spikes (`cost_spike: true` in `per_run_breakdown`), new denied domains in `domain_inventory`, MCP servers with `error_rate > 0.10` or `unreliable: true`, and week-over-week changes in `error_trend.runs_with_errors`.
 3. Create a GitHub discussion "Audit Digest — [YYYY-MM-DD]" with an executive summary, anomalies table, and MCP health table.
 4. Update `/tmp/gh-aw/cache-memory/audit-trends.json` with rolling averages (cost, tokens, error count, deny rate), keeping only the last 30 days.