From 9670d215320e386f68d0c023a8af21ee03c0d53b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Apr 2026 17:45:34 +0000 Subject: [PATCH 1/5] feat: support token usage diff and multiple comparison runs in audit diff command Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c2cbe169-434c-4f94-8c73-7af8797b7c5d Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit_diff.go | 128 ++++++++++++++++-- pkg/cli/audit_diff_command.go | 136 ++++++++++++-------- pkg/cli/audit_diff_render.go | 117 +++++++++++++++-- pkg/cli/audit_diff_test.go | 235 ++++++++++++++++++++++++++++++++++ 4 files changed, 544 insertions(+), 72 deletions(-) diff --git a/pkg/cli/audit_diff.go b/pkg/cli/audit_diff.go index 8d79fb805d5..04e1f2b1a9a 100644 --- a/pkg/cli/audit_diff.go +++ b/pkg/cli/audit_diff.go @@ -260,17 +260,43 @@ type MCPToolsDiffSummary struct { AnomalyCount int `json:"anomaly_count"` } +// TokenUsageDiff represents the detailed diff of token usage between two runs, +// based on the firewall proxy token-usage.jsonl data from RunSummary.TokenUsage. +type TokenUsageDiff struct { + Run1InputTokens int `json:"run1_input_tokens"` + Run2InputTokens int `json:"run2_input_tokens"` + InputTokensChange string `json:"input_tokens_change,omitempty"` + Run1OutputTokens int `json:"run1_output_tokens"` + Run2OutputTokens int `json:"run2_output_tokens"` + OutputTokensChange string `json:"output_tokens_change,omitempty"` + Run1CacheReadTokens int `json:"run1_cache_read_tokens"` + Run2CacheReadTokens int `json:"run2_cache_read_tokens"` + CacheReadTokensChange string `json:"cache_read_tokens_change,omitempty"` + Run1CacheWriteTokens int `json:"run1_cache_write_tokens"` + Run2CacheWriteTokens int `json:"run2_cache_write_tokens"` + CacheWriteTokensChange string `json:"cache_write_tokens_change,omitempty"` + Run1EffectiveTokens int `json:"run1_effective_tokens"` + Run2EffectiveTokens int `json:"run2_effective_tokens"` + EffectiveTokensChange string `json:"effective_tokens_change,omitempty"` + Run1TotalRequests int `json:"run1_total_requests"` + Run2TotalRequests int `json:"run2_total_requests"` + RequestsChange string `json:"requests_change,omitempty"` + Run1CacheEfficiency float64 `json:"run1_cache_efficiency"` + Run2CacheEfficiency float64 `json:"run2_cache_efficiency"` +} + // RunMetricsDiff represents the diff of run-level metrics (token usage, duration, turns) between two runs type RunMetricsDiff struct { - Run1TokenUsage int `json:"run1_token_usage"` - Run2TokenUsage int `json:"run2_token_usage"` - TokenUsageChange string `json:"token_usage_change,omitempty"` // e.g. "+15%", "-5%" - Run1Duration string `json:"run1_duration,omitempty"` - Run2Duration string `json:"run2_duration,omitempty"` - DurationChange string `json:"duration_change,omitempty"` // e.g. "+2m30s", "-1m" - Run1Turns int `json:"run1_turns,omitempty"` - Run2Turns int `json:"run2_turns,omitempty"` - TurnsChange int `json:"turns_change,omitempty"` + Run1TokenUsage int `json:"run1_token_usage"` + Run2TokenUsage int `json:"run2_token_usage"` + TokenUsageChange string `json:"token_usage_change,omitempty"` // e.g. "+15%", "-5%" + Run1Duration string `json:"run1_duration,omitempty"` + Run2Duration string `json:"run2_duration,omitempty"` + DurationChange string `json:"duration_change,omitempty"` // e.g. "+2m30s", "-1m" + Run1Turns int `json:"run1_turns,omitempty"` + Run2Turns int `json:"run2_turns,omitempty"` + TurnsChange int `json:"turns_change,omitempty"` + TokenUsageDetails *TokenUsageDiff `json:"token_usage_details,omitempty"` // Detailed breakdown from firewall proxy } // AuditDiff is the top-level diff combining firewall behavior, MCP tool invocations, @@ -429,20 +455,24 @@ func computeRunMetricsDiff(summary1, summary2 *RunSummary) *RunMetricsDiff { var run1Tokens, run2Tokens int var run1Duration, run2Duration time.Duration var run1Turns, run2Turns int + var tu1, tu2 *TokenUsageSummary if summary1 != nil { run1Tokens = summary1.Run.TokenUsage run1Duration = summary1.Run.Duration run1Turns = summary1.Run.Turns + tu1 = summary1.TokenUsage } if summary2 != nil { run2Tokens = summary2.Run.TokenUsage run2Duration = summary2.Run.Duration run2Turns = summary2.Run.Turns + tu2 = summary2.TokenUsage } // Skip if there is no meaningful data - if run1Tokens == 0 && run2Tokens == 0 && run1Duration == 0 && run2Duration == 0 && run1Turns == 0 && run2Turns == 0 { + hasTokenDetails := tu1 != nil || tu2 != nil + if run1Tokens == 0 && run2Tokens == 0 && run1Duration == 0 && run2Duration == 0 && run1Turns == 0 && run2Turns == 0 && !hasTokenDetails { return nil } @@ -473,6 +503,84 @@ func computeRunMetricsDiff(summary1, summary2 *RunSummary) *RunMetricsDiff { } } + diff.TokenUsageDetails = computeTokenUsageDiff(tu1, tu2) + + return diff +} + +// computeTokenUsageDiff computes a detailed diff of token usage between two runs using +// the firewall proxy token-usage.jsonl data (TokenUsageSummary). Returns nil when both +// summaries are nil. +func computeTokenUsageDiff(tu1, tu2 *TokenUsageSummary) *TokenUsageDiff { + if tu1 == nil && tu2 == nil { + return nil + } + + var ( + run1Input, run2Input int + run1Output, run2Output int + run1CacheRead, run2CacheRead int + run1CacheWrite, run2CacheWrite int + run1Effective, run2Effective int + run1Requests, run2Requests int + run1CacheEff, run2CacheEff float64 + ) + + if tu1 != nil { + run1Input = tu1.TotalInputTokens + run1Output = tu1.TotalOutputTokens + run1CacheRead = tu1.TotalCacheReadTokens + run1CacheWrite = tu1.TotalCacheWriteTokens + run1Effective = tu1.TotalEffectiveTokens + run1Requests = tu1.TotalRequests + run1CacheEff = tu1.CacheEfficiency + } + if tu2 != nil { + run2Input = tu2.TotalInputTokens + run2Output = tu2.TotalOutputTokens + run2CacheRead = tu2.TotalCacheReadTokens + run2CacheWrite = tu2.TotalCacheWriteTokens + run2Effective = tu2.TotalEffectiveTokens + run2Requests = tu2.TotalRequests + run2CacheEff = tu2.CacheEfficiency + } + + diff := &TokenUsageDiff{ + Run1InputTokens: run1Input, + Run2InputTokens: run2Input, + Run1OutputTokens: run1Output, + Run2OutputTokens: run2Output, + Run1CacheReadTokens: run1CacheRead, + Run2CacheReadTokens: run2CacheRead, + Run1CacheWriteTokens: run1CacheWrite, + Run2CacheWriteTokens: run2CacheWrite, + Run1EffectiveTokens: run1Effective, + Run2EffectiveTokens: run2Effective, + Run1TotalRequests: run1Requests, + Run2TotalRequests: run2Requests, + Run1CacheEfficiency: run1CacheEff, + Run2CacheEfficiency: run2CacheEff, + } + + if run1Input > 0 || run2Input > 0 { + diff.InputTokensChange = formatVolumeChange(run1Input, run2Input) + } + if run1Output > 0 || run2Output > 0 { + diff.OutputTokensChange = formatVolumeChange(run1Output, run2Output) + } + if run1CacheRead > 0 || run2CacheRead > 0 { + diff.CacheReadTokensChange = formatVolumeChange(run1CacheRead, run2CacheRead) + } + if run1CacheWrite > 0 || run2CacheWrite > 0 { + diff.CacheWriteTokensChange = formatVolumeChange(run1CacheWrite, run2CacheWrite) + } + if run1Effective > 0 || run2Effective > 0 { + diff.EffectiveTokensChange = formatVolumeChange(run1Effective, run2Effective) + } + if run1Requests > 0 || run2Requests > 0 { + diff.RequestsChange = formatCountChange(run1Requests, run2Requests) + } + return diff } diff --git a/pkg/cli/audit_diff_command.go b/pkg/cli/audit_diff_command.go index 097fa97e00c..e552289d4dc 100644 --- a/pkg/cli/audit_diff_command.go +++ b/pkg/cli/audit_diff_command.go @@ -15,35 +15,54 @@ import ( // NewAuditDiffSubcommand creates the audit diff subcommand func NewAuditDiffSubcommand() *cobra.Command { cmd := &cobra.Command{ - Use: "diff ", - Short: "Compare behavior across two workflow runs", - Long: `Compare workflow run behavior between two workflow runs to detect policy regressions, -new unauthorized domains, behavioral drift, and changes in MCP tool usage or run metrics. + Use: "diff [...]", + Short: "Compare behavior across workflow runs", + Long: `Compare workflow run behavior between a base run and one or more comparison runs +to detect policy regressions, new unauthorized domains, behavioral drift, and changes in +MCP tool usage, token usage, or run metrics. -This command downloads artifacts for both runs (using cached data when available), +The first argument is the base (reference) run. All subsequent arguments are compared +against that base. This enables tracking behavioral drift across multiple runs at once. + +This command downloads artifacts for all runs (using cached data when available), analyzes their data, and produces a diff showing: -- New domains that appeared in the second run -- Removed domains that were in the first run but not the second +- New domains that appeared in the comparison run +- Removed domains that were in the base run but not the comparison - Status changes (domains that flipped between allowed and denied) - Volume changes (significant request count changes, >100% threshold) - Anomaly flags (new denied domains, previously-denied now allowed) - MCP tool invocation changes (new/removed tools, call count and error count diffs) - Run metrics comparison (token usage, duration, turns) when cached data is available +- Detailed token usage breakdown (input/output/cache/effective tokens) from firewall proxy Examples: - ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 # Compare two runs - ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 --format markdown # Markdown output for PR comments - ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 --json # JSON for CI integration - ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 --repo owner/repo # Specify repository`, - Args: cobra.ExactArgs(2), + ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 # Compare two runs + ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 12347 12348 # Compare base against 3 runs + ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 --format markdown # Markdown output for PR comments + ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 --json # JSON for CI integration + ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 --repo owner/repo # Specify repository`, + Args: cobra.MinimumNArgs(2), RunE: func(cmd *cobra.Command, args []string) error { - runID1, err := strconv.ParseInt(args[0], 10, 64) + baseRunID, err := strconv.ParseInt(args[0], 10, 64) if err != nil { - return fmt.Errorf("invalid run ID %q: must be a numeric run ID", args[0]) + return fmt.Errorf("invalid base run ID %q: must be a numeric run ID", args[0]) } - runID2, err := strconv.ParseInt(args[1], 10, 64) - if err != nil { - return fmt.Errorf("invalid run ID %q: must be a numeric run ID", args[1]) + + compareRunIDs := make([]int64, 0, len(args)-1) + seen := make(map[int64]bool) + for _, arg := range args[1:] { + id, err := strconv.ParseInt(arg, 10, 64) + if err != nil { + return fmt.Errorf("invalid run ID %q: must be a numeric run ID", arg) + } + if id == baseRunID { + return fmt.Errorf("comparison run ID %d is the same as the base run ID: cannot diff a run against itself", id) + } + if seen[id] { + return fmt.Errorf("duplicate comparison run ID %d: each run ID must appear only once", id) + } + seen[id] = true + compareRunIDs = append(compareRunIDs, id) } outputDir, _ := cmd.Flags().GetString("output") @@ -62,7 +81,7 @@ Examples: repo = parts[1] } - return RunAuditDiff(cmd.Context(), runID1, runID2, owner, repo, hostname, outputDir, verbose, jsonOutput, format) + return RunAuditDiff(cmd.Context(), baseRunID, compareRunIDs, owner, repo, hostname, outputDir, verbose, jsonOutput, format) }, } @@ -74,9 +93,10 @@ Examples: return cmd } -// RunAuditDiff compares behavior between two workflow runs -func RunAuditDiff(ctx context.Context, runID1, runID2 int64, owner, repo, hostname, outputDir string, verbose, jsonOutput bool, format string) error { - auditDiffLog.Printf("Starting audit diff: run1=%d, run2=%d", runID1, runID2) +// RunAuditDiff compares behavior between a base workflow run and one or more comparison runs. +// The base run is the reference point; each comparison run is diffed against it independently. +func RunAuditDiff(ctx context.Context, baseRunID int64, compareRunIDs []int64, owner, repo, hostname, outputDir string, verbose, jsonOutput bool, format string) error { + auditDiffLog.Printf("Starting audit diff: base=%d, compare=%v", baseRunID, compareRunIDs) // Auto-detect GHES host from git remote if hostname is not provided if hostname == "" { @@ -94,57 +114,65 @@ func RunAuditDiff(ctx context.Context, runID1, runID2 int64, owner, repo, hostna default: } - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Comparing workflow runs: Run #%d → Run #%d", runID1, runID2))) + if len(compareRunIDs) == 1 { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Comparing workflow runs: Run #%d → Run #%d", baseRunID, compareRunIDs[0]))) + } else { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Comparing workflow runs: Run #%d (base) vs %d comparison runs", baseRunID, len(compareRunIDs)))) + } - // Load run summaries for both runs - fmt.Fprintln(os.Stderr, console.FormatProgressMessage(fmt.Sprintf("Loading data for run %d...", runID1))) - summary1, err := loadRunSummaryForDiff(runID1, outputDir, owner, repo, hostname, verbose) + // Load base run summary once (shared across all comparisons) + fmt.Fprintln(os.Stderr, console.FormatProgressMessage(fmt.Sprintf("Loading data for base run %d...", baseRunID))) + baseSummary, err := loadRunSummaryForDiff(baseRunID, outputDir, owner, repo, hostname, verbose) if err != nil { - return fmt.Errorf("failed to load data for run %d: %w", runID1, err) + return fmt.Errorf("failed to load data for base run %d: %w", baseRunID, err) } - // Check context cancellation between downloads - select { - case <-ctx.Done(): - fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Operation cancelled")) - return ctx.Err() - default: - } + diffs := make([]*AuditDiff, 0, len(compareRunIDs)) - fmt.Fprintln(os.Stderr, console.FormatProgressMessage(fmt.Sprintf("Loading data for run %d...", runID2))) - summary2, err := loadRunSummaryForDiff(runID2, outputDir, owner, repo, hostname, verbose) - if err != nil { - return fmt.Errorf("failed to load data for run %d: %w", runID2, err) - } + for _, compareRunID := range compareRunIDs { + // Check context cancellation between downloads + select { + case <-ctx.Done(): + fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Operation cancelled")) + return ctx.Err() + default: + } - // Warn if no firewall data found - fw1 := summary1.FirewallAnalysis - fw2 := summary2.FirewallAnalysis - if fw1 == nil && fw2 == nil { - fmt.Fprintln(os.Stderr, console.FormatWarningMessage("No firewall data found in either run. Both runs may predate firewall logging.")) - } else { - if fw1 == nil { - fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("No firewall data found for run %d (older run may lack firewall logs)", runID1))) + fmt.Fprintln(os.Stderr, console.FormatProgressMessage(fmt.Sprintf("Loading data for run %d...", compareRunID))) + compareSummary, err := loadRunSummaryForDiff(compareRunID, outputDir, owner, repo, hostname, verbose) + if err != nil { + return fmt.Errorf("failed to load data for run %d: %w", compareRunID, err) } - if fw2 == nil { - fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("No firewall data found for run %d", runID2))) + + // Warn if no firewall data found for this pair + fw1 := baseSummary.FirewallAnalysis + fw2 := compareSummary.FirewallAnalysis + if fw1 == nil && fw2 == nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("No firewall data found for run pair %d→%d. Both runs may predate firewall logging.", baseRunID, compareRunID))) + } else { + if fw1 == nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("No firewall data found for base run %d (older run may lack firewall logs)", baseRunID))) + } + if fw2 == nil { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("No firewall data found for run %d", compareRunID))) + } } - } - // Compute the full diff - diff := computeAuditDiff(runID1, runID2, summary1, summary2) + diff := computeAuditDiff(baseRunID, compareRunID, baseSummary, compareSummary) + diffs = append(diffs, diff) + } // Render output if jsonOutput || format == "json" { - return renderAuditDiffJSON(diff) + return renderAuditDiffJSON(diffs) } if format == "markdown" { - renderAuditDiffMarkdown(diff) + renderAuditDiffMarkdown(diffs) return nil } // Default: pretty console output - renderAuditDiffPretty(diff) + renderAuditDiffPretty(diffs) return nil } diff --git a/pkg/cli/audit_diff_render.go b/pkg/cli/audit_diff_render.go index 9c318ce2cb3..0a7615ad168 100644 --- a/pkg/cli/audit_diff_render.go +++ b/pkg/cli/audit_diff_render.go @@ -2,6 +2,7 @@ package cli import ( "encoding/json" + "errors" "fmt" "os" "strings" @@ -12,16 +13,50 @@ import ( var auditDiffRenderLog = logger.New("cli:audit_diff_render") -// renderAuditDiffJSON outputs the full audit diff as JSON to stdout -func renderAuditDiffJSON(diff *AuditDiff) error { - auditDiffRenderLog.Printf("Rendering audit diff as JSON: run1=%d, run2=%d", diff.Run1ID, diff.Run2ID) +// renderAuditDiffJSON outputs audit diffs as JSON to stdout. +// When a single diff is provided, outputs a JSON object for backward compatibility. +// When multiple diffs are provided, outputs a JSON array. +func renderAuditDiffJSON(diffs []*AuditDiff) error { + auditDiffRenderLog.Printf("Rendering %d audit diff(s) as JSON", len(diffs)) + if len(diffs) == 0 { + return errors.New("no diffs to render") + } encoder := json.NewEncoder(os.Stdout) encoder.SetIndent("", " ") - return encoder.Encode(diff) + if len(diffs) == 1 { + return encoder.Encode(diffs[0]) + } + return encoder.Encode(diffs) +} + +// renderAuditDiffMarkdown outputs audit diffs as markdown to stdout. +// Multiple diffs are separated by a horizontal rule. +func renderAuditDiffMarkdown(diffs []*AuditDiff) { + auditDiffRenderLog.Printf("Rendering %d audit diff(s) as markdown", len(diffs)) + for i, diff := range diffs { + if i > 0 { + fmt.Println("---") + fmt.Println() + } + renderSingleAuditDiffMarkdown(diff) + } +} + +// renderAuditDiffPretty outputs audit diffs as formatted console output to stderr. +// Multiple diffs are separated by a visual divider. +func renderAuditDiffPretty(diffs []*AuditDiff) { + auditDiffRenderLog.Printf("Rendering %d audit diff(s) as pretty output", len(diffs)) + for i, diff := range diffs { + if i > 0 { + fmt.Fprintln(os.Stderr, strings.Repeat("─", 60)) + fmt.Fprintln(os.Stderr) + } + renderSingleAuditDiffPretty(diff) + } } -// renderAuditDiffMarkdown outputs the full audit diff as markdown to stdout -func renderAuditDiffMarkdown(diff *AuditDiff) { +// renderSingleAuditDiffMarkdown outputs a single audit diff as markdown to stdout +func renderSingleAuditDiffMarkdown(diff *AuditDiff) { auditDiffRenderLog.Printf("Rendering audit diff as markdown: run1=%d, run2=%d", diff.Run1ID, diff.Run2ID) fmt.Printf("### Audit Diff: Run #%d → Run #%d\n\n", diff.Run1ID, diff.Run2ID) @@ -35,8 +70,8 @@ func renderAuditDiffMarkdown(diff *AuditDiff) { renderRunMetricsDiffMarkdownSection(diff.Run1ID, diff.Run2ID, diff.RunMetricsDiff) } -// renderAuditDiffPretty outputs the full audit diff as formatted console output to stderr -func renderAuditDiffPretty(diff *AuditDiff) { +// renderSingleAuditDiffPretty outputs a single audit diff as formatted console output to stderr +func renderSingleAuditDiffPretty(diff *AuditDiff) { auditDiffRenderLog.Printf("Rendering audit diff as pretty output: run1=%d, run2=%d", diff.Run1ID, diff.Run2ID) fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Audit Diff: Run #%d → Run #%d", diff.Run1ID, diff.Run2ID))) fmt.Fprintln(os.Stderr) @@ -228,6 +263,41 @@ func renderRunMetricsDiffMarkdownSection(run1ID, run2ID int64, diff *RunMetricsD fmt.Printf("| Turns | %d | %d | %s |\n", diff.Run1Turns, diff.Run2Turns, turnsChange) } fmt.Println() + + if diff.TokenUsageDetails != nil { + renderTokenUsageDiffMarkdownSection(run1ID, run2ID, diff.TokenUsageDetails) + } +} + +// renderTokenUsageDiffMarkdownSection renders detailed token usage as a markdown sub-section +func renderTokenUsageDiffMarkdownSection(run1ID, run2ID int64, diff *TokenUsageDiff) { + fmt.Println("#### Token Usage Details") + fmt.Println() + fmt.Printf("| Token Type | Run #%d | Run #%d | Change |\n", run1ID, run2ID) + fmt.Println("|------------|---------|---------|--------|") + + if diff.Run1InputTokens > 0 || diff.Run2InputTokens > 0 { + fmt.Printf("| Input | %d | %d | %s |\n", diff.Run1InputTokens, diff.Run2InputTokens, diff.InputTokensChange) + } + if diff.Run1OutputTokens > 0 || diff.Run2OutputTokens > 0 { + fmt.Printf("| Output | %d | %d | %s |\n", diff.Run1OutputTokens, diff.Run2OutputTokens, diff.OutputTokensChange) + } + if diff.Run1CacheReadTokens > 0 || diff.Run2CacheReadTokens > 0 { + fmt.Printf("| Cache read | %d | %d | %s |\n", diff.Run1CacheReadTokens, diff.Run2CacheReadTokens, diff.CacheReadTokensChange) + } + if diff.Run1CacheWriteTokens > 0 || diff.Run2CacheWriteTokens > 0 { + fmt.Printf("| Cache write | %d | %d | %s |\n", diff.Run1CacheWriteTokens, diff.Run2CacheWriteTokens, diff.CacheWriteTokensChange) + } + if diff.Run1EffectiveTokens > 0 || diff.Run2EffectiveTokens > 0 { + fmt.Printf("| Effective | %d | %d | %s |\n", diff.Run1EffectiveTokens, diff.Run2EffectiveTokens, diff.EffectiveTokensChange) + } + if diff.Run1TotalRequests > 0 || diff.Run2TotalRequests > 0 { + fmt.Printf("| API requests | %d | %d | %s |\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsChange) + } + if diff.Run1CacheEfficiency > 0 || diff.Run2CacheEfficiency > 0 { + fmt.Printf("| Cache efficiency | %.1f%% | %.1f%% | |\n", diff.Run1CacheEfficiency*100, diff.Run2CacheEfficiency*100) + } + fmt.Println() } // renderFirewallDiffPrettySection renders the firewall diff as a pretty console sub-section @@ -353,9 +423,40 @@ func renderRunMetricsDiffPrettySection(run1ID, run2ID int64, diff *RunMetricsDif fmt.Fprintf(os.Stderr, " Turns: %d → %d (%+d)\n", diff.Run1Turns, diff.Run2Turns, diff.TurnsChange) } + if diff.TokenUsageDetails != nil { + renderTokenUsageDiffPrettySection(diff.TokenUsageDetails) + } + fmt.Fprintln(os.Stderr) } +// renderTokenUsageDiffPrettySection renders detailed token usage as a pretty console sub-section +func renderTokenUsageDiffPrettySection(diff *TokenUsageDiff) { + fmt.Fprintf(os.Stderr, " Token Usage Details:\n") + + if diff.Run1InputTokens > 0 || diff.Run2InputTokens > 0 { + fmt.Fprintf(os.Stderr, " Input: %d → %d (%s)\n", diff.Run1InputTokens, diff.Run2InputTokens, diff.InputTokensChange) + } + if diff.Run1OutputTokens > 0 || diff.Run2OutputTokens > 0 { + fmt.Fprintf(os.Stderr, " Output: %d → %d (%s)\n", diff.Run1OutputTokens, diff.Run2OutputTokens, diff.OutputTokensChange) + } + if diff.Run1CacheReadTokens > 0 || diff.Run2CacheReadTokens > 0 { + fmt.Fprintf(os.Stderr, " Cache read: %d → %d (%s)\n", diff.Run1CacheReadTokens, diff.Run2CacheReadTokens, diff.CacheReadTokensChange) + } + if diff.Run1CacheWriteTokens > 0 || diff.Run2CacheWriteTokens > 0 { + fmt.Fprintf(os.Stderr, " Cache write: %d → %d (%s)\n", diff.Run1CacheWriteTokens, diff.Run2CacheWriteTokens, diff.CacheWriteTokensChange) + } + if diff.Run1EffectiveTokens > 0 || diff.Run2EffectiveTokens > 0 { + fmt.Fprintf(os.Stderr, " Effective: %d → %d (%s)\n", diff.Run1EffectiveTokens, diff.Run2EffectiveTokens, diff.EffectiveTokensChange) + } + if diff.Run1TotalRequests > 0 || diff.Run2TotalRequests > 0 { + fmt.Fprintf(os.Stderr, " API requests: %d → %d (%s)\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsChange) + } + if diff.Run1CacheEfficiency > 0 || diff.Run2CacheEfficiency > 0 { + fmt.Fprintf(os.Stderr, " Cache efficiency: %.1f%% → %.1f%%\n", diff.Run1CacheEfficiency*100, diff.Run2CacheEfficiency*100) + } +} + // statusEmoji returns the status emoji for a domain status func statusEmoji(status string) string { switch status { diff --git a/pkg/cli/audit_diff_test.go b/pkg/cli/audit_diff_test.go index 774dcb12dbb..395f8f5a62d 100644 --- a/pkg/cli/audit_diff_test.go +++ b/pkg/cli/audit_diff_test.go @@ -693,3 +693,238 @@ func TestIsEmptyAuditDiff(t *testing.T) { RunMetricsDiff: &RunMetricsDiff{Run1TokenUsage: 100}, }), "AuditDiff with metrics diff should not be empty") } + +func TestComputeTokenUsageDiff_BothNil(t *testing.T) { + diff := computeTokenUsageDiff(nil, nil) + assert.Nil(t, diff, "Should return nil when both summaries are nil") +} + +func TestComputeTokenUsageDiff_WithData(t *testing.T) { + tu1 := &TokenUsageSummary{ + TotalInputTokens: 10000, + TotalOutputTokens: 2000, + TotalCacheReadTokens: 5000, + TotalCacheWriteTokens: 1000, + TotalEffectiveTokens: 8000, + TotalRequests: 10, + CacheEfficiency: 0.333, + } + tu2 := &TokenUsageSummary{ + TotalInputTokens: 15000, + TotalOutputTokens: 3000, + TotalCacheReadTokens: 7000, + TotalCacheWriteTokens: 800, + TotalEffectiveTokens: 12000, + TotalRequests: 14, + CacheEfficiency: 0.318, + } + + diff := computeTokenUsageDiff(tu1, tu2) + + require.NotNil(t, diff, "Should produce token usage diff when data is available") + assert.Equal(t, 10000, diff.Run1InputTokens, "Run1 input tokens should be 10000") + assert.Equal(t, 15000, diff.Run2InputTokens, "Run2 input tokens should be 15000") + assert.Equal(t, "+50%", diff.InputTokensChange, "Input tokens should increase by 50%") + + assert.Equal(t, 2000, diff.Run1OutputTokens, "Run1 output tokens should be 2000") + assert.Equal(t, 3000, diff.Run2OutputTokens, "Run2 output tokens should be 3000") + assert.Equal(t, "+50%", diff.OutputTokensChange, "Output tokens should increase by 50%") + + assert.Equal(t, 5000, diff.Run1CacheReadTokens, "Run1 cache read tokens should be 5000") + assert.Equal(t, 7000, diff.Run2CacheReadTokens, "Run2 cache read tokens should be 7000") + assert.Equal(t, "+40%", diff.CacheReadTokensChange, "Cache read tokens should increase by 40%") + + assert.Equal(t, 1000, diff.Run1CacheWriteTokens, "Run1 cache write tokens should be 1000") + assert.Equal(t, 800, diff.Run2CacheWriteTokens, "Run2 cache write tokens should be 800") + assert.Equal(t, "-20%", diff.CacheWriteTokensChange, "Cache write tokens should decrease by 20%") + + assert.Equal(t, 8000, diff.Run1EffectiveTokens, "Run1 effective tokens should be 8000") + assert.Equal(t, 12000, diff.Run2EffectiveTokens, "Run2 effective tokens should be 12000") + assert.Equal(t, "+50%", diff.EffectiveTokensChange, "Effective tokens should increase by 50%") + + assert.Equal(t, 10, diff.Run1TotalRequests, "Run1 requests should be 10") + assert.Equal(t, 14, diff.Run2TotalRequests, "Run2 requests should be 14") + + assert.InDelta(t, 0.333, diff.Run1CacheEfficiency, 0.001, "Run1 cache efficiency should match") + assert.InDelta(t, 0.318, diff.Run2CacheEfficiency, 0.001, "Run2 cache efficiency should match") +} + +func TestComputeTokenUsageDiff_Run1Nil(t *testing.T) { + tu2 := &TokenUsageSummary{ + TotalInputTokens: 5000, + TotalOutputTokens: 1000, + TotalRequests: 5, + } + + diff := computeTokenUsageDiff(nil, tu2) + + require.NotNil(t, diff, "Should produce diff when run2 has data") + assert.Equal(t, 0, diff.Run1InputTokens, "Run1 input tokens should be 0 when nil") + assert.Equal(t, 5000, diff.Run2InputTokens, "Run2 input tokens should be 5000") + assert.Equal(t, "+∞", diff.InputTokensChange, "Input change should be +∞ from zero") +} + +func TestComputeTokenUsageDiff_Run2Nil(t *testing.T) { + tu1 := &TokenUsageSummary{ + TotalInputTokens: 5000, + TotalOutputTokens: 1000, + } + + diff := computeTokenUsageDiff(tu1, nil) + + require.NotNil(t, diff, "Should produce diff when run1 has data") + assert.Equal(t, 5000, diff.Run1InputTokens, "Run1 input tokens should be 5000") + assert.Equal(t, 0, diff.Run2InputTokens, "Run2 input tokens should be 0 when nil") + assert.Equal(t, "-100%", diff.InputTokensChange, "Input change should be -100%") +} + +func TestComputeRunMetricsDiff_WithTokenUsageDetails(t *testing.T) { + summary1 := &RunSummary{ + RunID: 100, + Run: WorkflowRun{Duration: 5 * time.Minute, Turns: 4}, + TokenUsage: &TokenUsageSummary{ + TotalInputTokens: 8000, + TotalOutputTokens: 1500, + TotalEffectiveTokens: 6000, + TotalRequests: 8, + CacheEfficiency: 0.25, + }, + } + summary2 := &RunSummary{ + RunID: 200, + Run: WorkflowRun{Duration: 7 * time.Minute, Turns: 6}, + TokenUsage: &TokenUsageSummary{ + TotalInputTokens: 12000, + TotalOutputTokens: 2000, + TotalEffectiveTokens: 9000, + TotalRequests: 11, + CacheEfficiency: 0.30, + }, + } + + diff := computeRunMetricsDiff(summary1, summary2) + + require.NotNil(t, diff, "Should produce metrics diff") + require.NotNil(t, diff.TokenUsageDetails, "Should populate TokenUsageDetails from RunSummary.TokenUsage") + + assert.Equal(t, 8000, diff.TokenUsageDetails.Run1InputTokens, "Run1 input tokens should be 8000") + assert.Equal(t, 12000, diff.TokenUsageDetails.Run2InputTokens, "Run2 input tokens should be 12000") + assert.Equal(t, "+50%", diff.TokenUsageDetails.InputTokensChange, "Input tokens change should be +50%") + + assert.Equal(t, 6000, diff.TokenUsageDetails.Run1EffectiveTokens, "Run1 effective tokens should be 6000") + assert.Equal(t, 9000, diff.TokenUsageDetails.Run2EffectiveTokens, "Run2 effective tokens should be 9000") + assert.Equal(t, "+50%", diff.TokenUsageDetails.EffectiveTokensChange, "Effective tokens change should be +50%") +} + +func TestComputeRunMetricsDiff_TokenUsageDetailsAloneNotNil(t *testing.T) { + // Verify that detailed token usage data alone (without Run.TokenUsage set) + // still produces a non-nil RunMetricsDiff + summary1 := &RunSummary{ + Run: WorkflowRun{}, + TokenUsage: &TokenUsageSummary{ + TotalInputTokens: 5000, + TotalRequests: 5, + }, + } + summary2 := &RunSummary{ + Run: WorkflowRun{}, + TokenUsage: &TokenUsageSummary{ + TotalInputTokens: 8000, + TotalRequests: 7, + }, + } + + diff := computeRunMetricsDiff(summary1, summary2) + + require.NotNil(t, diff, "Should produce non-nil diff when only TokenUsage data is present") + require.NotNil(t, diff.TokenUsageDetails, "Should have TokenUsageDetails") + assert.Equal(t, 5000, diff.TokenUsageDetails.Run1InputTokens, "Run1 input tokens should be 5000") + assert.Equal(t, 8000, diff.TokenUsageDetails.Run2InputTokens, "Run2 input tokens should be 8000") +} + +func TestComputeAuditDiff_MultipleRuns(t *testing.T) { + base := &RunSummary{ + RunID: 100, + FirewallAnalysis: &FirewallAnalysis{ + RequestsByDomain: map[string]DomainRequestStats{ + "api.github.com:443": {Allowed: 5, Blocked: 0}, + }, + }, + MCPToolUsage: &MCPToolUsageData{ + Summary: []MCPToolSummary{ + {ServerName: "github", ToolName: "issue_read", CallCount: 3, ErrorCount: 0}, + }, + }, + Run: WorkflowRun{Turns: 5}, + TokenUsage: &TokenUsageSummary{ + TotalInputTokens: 10000, + TotalOutputTokens: 2000, + TotalRequests: 10, + }, + } + + compare1 := &RunSummary{ + RunID: 200, + FirewallAnalysis: &FirewallAnalysis{ + RequestsByDomain: map[string]DomainRequestStats{ + "api.github.com:443": {Allowed: 5, Blocked: 0}, + "new1.example.com:443": {Allowed: 3, Blocked: 0}, + }, + }, + MCPToolUsage: &MCPToolUsageData{ + Summary: []MCPToolSummary{ + {ServerName: "github", ToolName: "issue_read", CallCount: 5, ErrorCount: 0}, + }, + }, + Run: WorkflowRun{Turns: 7}, + TokenUsage: &TokenUsageSummary{ + TotalInputTokens: 15000, + TotalOutputTokens: 3000, + TotalRequests: 12, + }, + } + + compare2 := &RunSummary{ + RunID: 300, + FirewallAnalysis: &FirewallAnalysis{ + RequestsByDomain: map[string]DomainRequestStats{ + "api.github.com:443": {Allowed: 5, Blocked: 0}, + "new2.example.com:443": {Allowed: 1, Blocked: 2}, + }, + }, + Run: WorkflowRun{Turns: 4}, + TokenUsage: &TokenUsageSummary{ + TotalInputTokens: 8000, + TotalOutputTokens: 1500, + TotalRequests: 8, + }, + } + + // Compute two diffs from the same base + diff1 := computeAuditDiff(base.RunID, compare1.RunID, base, compare1) + diff2 := computeAuditDiff(base.RunID, compare2.RunID, base, compare2) + + // Diff 1: base vs compare1 + assert.Equal(t, int64(100), diff1.Run1ID, "Diff1 Run1ID should be base") + assert.Equal(t, int64(200), diff1.Run2ID, "Diff1 Run2ID should be compare1") + require.NotNil(t, diff1.FirewallDiff, "Diff1 should have firewall diff") + assert.Len(t, diff1.FirewallDiff.NewDomains, 1, "Diff1 should have 1 new domain") + assert.Equal(t, "new1.example.com:443", diff1.FirewallDiff.NewDomains[0].Domain, "Diff1 new domain should be new1") + require.NotNil(t, diff1.RunMetricsDiff, "Diff1 should have run metrics diff") + require.NotNil(t, diff1.RunMetricsDiff.TokenUsageDetails, "Diff1 should have token usage details") + assert.Equal(t, "+50%", diff1.RunMetricsDiff.TokenUsageDetails.InputTokensChange, "Diff1 input tokens should increase by 50%") + + // Diff 2: base vs compare2 + assert.Equal(t, int64(100), diff2.Run1ID, "Diff2 Run1ID should be base") + assert.Equal(t, int64(300), diff2.Run2ID, "Diff2 Run2ID should be compare2") + require.NotNil(t, diff2.FirewallDiff, "Diff2 should have firewall diff") + assert.Len(t, diff2.FirewallDiff.NewDomains, 1, "Diff2 should have 1 new domain") + assert.Equal(t, "new2.example.com:443", diff2.FirewallDiff.NewDomains[0].Domain, "Diff2 new domain should be new2") + assert.True(t, diff2.FirewallDiff.NewDomains[0].IsAnomaly, "Diff2 new domain should be anomaly (blocked)") + require.NotNil(t, diff2.RunMetricsDiff, "Diff2 should have run metrics diff") + require.NotNil(t, diff2.RunMetricsDiff.TokenUsageDetails, "Diff2 should have token usage details") + assert.Equal(t, "-20%", diff2.RunMetricsDiff.TokenUsageDetails.InputTokensChange, "Diff2 input tokens should decrease by 20%") + + // The two diffs should be independent (no shared state) + assert.NotEqual(t, diff1.Run2ID, diff2.Run2ID, "The two diffs should have different Run2IDs") +} From dceb4485d572bcaab62f7d80e51b21c103f79286 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Apr 2026 17:58:19 +0000 Subject: [PATCH 2/5] refactor: use console pkg helpers for section headers and sub-section labels in audit diff pretty render Agent-Logs-Url: https://github.com/github/gh-aw/sessions/2e45f51d-dc34-4f35-a127-3c42a1b0ea44 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit_diff_render.go | 72 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/pkg/cli/audit_diff_render.go b/pkg/cli/audit_diff_render.go index 0a7615ad168..967147a4c3d 100644 --- a/pkg/cli/audit_diff_render.go +++ b/pkg/cli/audit_diff_render.go @@ -306,10 +306,11 @@ func renderFirewallDiffPrettySection(diff *FirewallDiff) { return } - fmt.Fprintf(os.Stderr, " Firewall Changes:\n") + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Firewall Changes")) + fmt.Fprintln(os.Stderr) if len(diff.NewDomains) > 0 { - fmt.Fprintf(os.Stderr, " New Domains (%d):\n", len(diff.NewDomains)) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("New Domains (%d):", len(diff.NewDomains)))) for _, entry := range diff.NewDomains { total := entry.Run2Allowed + entry.Run2Blocked statusIcon := statusEmoji(entry.Run2Status) @@ -317,20 +318,22 @@ func renderFirewallDiffPrettySection(diff *FirewallDiff) { if entry.IsAnomaly { anomalyTag = " [ANOMALY: " + entry.AnomalyNote + "]" } - fmt.Fprintf(os.Stderr, " %s %s (%d requests, %s)%s\n", statusIcon, entry.Domain, total, entry.Run2Status, anomalyTag) + fmt.Fprintf(os.Stderr, " • %s %s (%d requests, %s)%s\n", statusIcon, entry.Domain, total, entry.Run2Status, anomalyTag) } + fmt.Fprintln(os.Stderr) } if len(diff.RemovedDomains) > 0 { - fmt.Fprintf(os.Stderr, " Removed Domains (%d):\n", len(diff.RemovedDomains)) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Removed Domains (%d):", len(diff.RemovedDomains)))) for _, entry := range diff.RemovedDomains { total := entry.Run1Allowed + entry.Run1Blocked - fmt.Fprintf(os.Stderr, " %s (was %s, %d requests)\n", entry.Domain, entry.Run1Status, total) + fmt.Fprintf(os.Stderr, " • %s (was %s, %d requests)\n", entry.Domain, entry.Run1Status, total) } + fmt.Fprintln(os.Stderr) } if len(diff.StatusChanges) > 0 { - fmt.Fprintf(os.Stderr, " Status Changes (%d):\n", len(diff.StatusChanges)) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Status Changes (%d):", len(diff.StatusChanges)))) for _, entry := range diff.StatusChanges { icon1 := statusEmoji(entry.Run1Status) icon2 := statusEmoji(entry.Run2Status) @@ -338,20 +341,20 @@ func renderFirewallDiffPrettySection(diff *FirewallDiff) { if entry.IsAnomaly { anomalyTag = " [ANOMALY: " + entry.AnomalyNote + "]" } - fmt.Fprintf(os.Stderr, " %s: %s %s → %s %s%s\n", entry.Domain, icon1, entry.Run1Status, icon2, entry.Run2Status, anomalyTag) + fmt.Fprintf(os.Stderr, " • %s: %s %s → %s %s%s\n", entry.Domain, icon1, entry.Run1Status, icon2, entry.Run2Status, anomalyTag) } + fmt.Fprintln(os.Stderr) } if len(diff.VolumeChanges) > 0 { - fmt.Fprintf(os.Stderr, " Volume Changes:\n") + fmt.Fprintln(os.Stderr, console.FormatInfoMessage("Volume Changes:")) for _, entry := range diff.VolumeChanges { total1 := entry.Run1Allowed + entry.Run1Blocked total2 := entry.Run2Allowed + entry.Run2Blocked - fmt.Fprintf(os.Stderr, " %s: %d → %d requests (%s)\n", entry.Domain, total1, total2, entry.VolumeChange) + fmt.Fprintf(os.Stderr, " • %s: %d → %d requests (%s)\n", entry.Domain, total1, total2, entry.VolumeChange) } + fmt.Fprintln(os.Stderr) } - - fmt.Fprintln(os.Stderr) } // renderMCPToolsDiffPrettySection renders the MCP tools diff as a pretty console sub-section @@ -360,28 +363,31 @@ func renderMCPToolsDiffPrettySection(diff *MCPToolsDiff) { return } - fmt.Fprintf(os.Stderr, " MCP Tool Changes:\n") + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("MCP Tool Changes")) + fmt.Fprintln(os.Stderr) if len(diff.NewTools) > 0 { - fmt.Fprintf(os.Stderr, " New Tools (%d):\n", len(diff.NewTools)) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("New Tools (%d):", len(diff.NewTools)))) for _, entry := range diff.NewTools { anomalyTag := "" if entry.IsAnomaly { anomalyTag = " [ANOMALY: " + entry.AnomalyNote + "]" } - fmt.Fprintf(os.Stderr, " + %s/%s (%d calls)%s\n", entry.ServerName, entry.ToolName, entry.Run2CallCount, anomalyTag) + fmt.Fprintf(os.Stderr, " • + %s/%s (%d calls)%s\n", entry.ServerName, entry.ToolName, entry.Run2CallCount, anomalyTag) } + fmt.Fprintln(os.Stderr) } if len(diff.RemovedTools) > 0 { - fmt.Fprintf(os.Stderr, " Removed Tools (%d):\n", len(diff.RemovedTools)) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Removed Tools (%d):", len(diff.RemovedTools)))) for _, entry := range diff.RemovedTools { - fmt.Fprintf(os.Stderr, " - %s/%s (was %d calls)\n", entry.ServerName, entry.ToolName, entry.Run1CallCount) + fmt.Fprintf(os.Stderr, " • - %s/%s (was %d calls)\n", entry.ServerName, entry.ToolName, entry.Run1CallCount) } + fmt.Fprintln(os.Stderr) } if len(diff.ChangedTools) > 0 { - fmt.Fprintf(os.Stderr, " Changed Tools (%d):\n", len(diff.ChangedTools)) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Changed Tools (%d):", len(diff.ChangedTools)))) for _, entry := range diff.ChangedTools { anomalyTag := "" if entry.IsAnomaly { @@ -391,14 +397,13 @@ func renderMCPToolsDiffPrettySection(diff *MCPToolsDiff) { if entry.Run1ErrorCount > 0 || entry.Run2ErrorCount > 0 { errInfo = fmt.Sprintf(", errors: %d → %d", entry.Run1ErrorCount, entry.Run2ErrorCount) } - fmt.Fprintf(os.Stderr, " ~ %s/%s: %d → %d calls (%s%s)%s\n", + fmt.Fprintf(os.Stderr, " • ~ %s/%s: %d → %d calls (%s%s)%s\n", entry.ServerName, entry.ToolName, entry.Run1CallCount, entry.Run2CallCount, entry.CallCountChange, errInfo, anomalyTag) } + fmt.Fprintln(os.Stderr) } - - fmt.Fprintln(os.Stderr) } // renderRunMetricsDiffPrettySection renders the run metrics diff as a pretty console sub-section @@ -407,23 +412,25 @@ func renderRunMetricsDiffPrettySection(run1ID, run2ID int64, diff *RunMetricsDif return } - fmt.Fprintf(os.Stderr, " Run Metrics (Run #%d → Run #%d):\n", run1ID, run2ID) + fmt.Fprintln(os.Stderr, console.FormatSectionHeader(fmt.Sprintf("Run Metrics (Run #%d → Run #%d)", run1ID, run2ID))) + fmt.Fprintln(os.Stderr) if diff.Run1TokenUsage > 0 || diff.Run2TokenUsage > 0 { - fmt.Fprintf(os.Stderr, " Token usage: %d → %d (%s)\n", diff.Run1TokenUsage, diff.Run2TokenUsage, diff.TokenUsageChange) + fmt.Fprintf(os.Stderr, " Token usage: %d → %d (%s)\n", diff.Run1TokenUsage, diff.Run2TokenUsage, diff.TokenUsageChange) } if diff.Run1Duration != "" || diff.Run2Duration != "" { changeStr := "" if diff.DurationChange != "" { changeStr = " (" + diff.DurationChange + ")" } - fmt.Fprintf(os.Stderr, " Duration: %s → %s%s\n", diff.Run1Duration, diff.Run2Duration, changeStr) + fmt.Fprintf(os.Stderr, " Duration: %s → %s%s\n", diff.Run1Duration, diff.Run2Duration, changeStr) } if diff.Run1Turns > 0 || diff.Run2Turns > 0 { - fmt.Fprintf(os.Stderr, " Turns: %d → %d (%+d)\n", diff.Run1Turns, diff.Run2Turns, diff.TurnsChange) + fmt.Fprintf(os.Stderr, " Turns: %d → %d (%+d)\n", diff.Run1Turns, diff.Run2Turns, diff.TurnsChange) } if diff.TokenUsageDetails != nil { + fmt.Fprintln(os.Stderr) renderTokenUsageDiffPrettySection(diff.TokenUsageDetails) } @@ -432,28 +439,29 @@ func renderRunMetricsDiffPrettySection(run1ID, run2ID int64, diff *RunMetricsDif // renderTokenUsageDiffPrettySection renders detailed token usage as a pretty console sub-section func renderTokenUsageDiffPrettySection(diff *TokenUsageDiff) { - fmt.Fprintf(os.Stderr, " Token Usage Details:\n") + fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Token Usage Details")) + fmt.Fprintln(os.Stderr) if diff.Run1InputTokens > 0 || diff.Run2InputTokens > 0 { - fmt.Fprintf(os.Stderr, " Input: %d → %d (%s)\n", diff.Run1InputTokens, diff.Run2InputTokens, diff.InputTokensChange) + fmt.Fprintf(os.Stderr, " Input: %d → %d (%s)\n", diff.Run1InputTokens, diff.Run2InputTokens, diff.InputTokensChange) } if diff.Run1OutputTokens > 0 || diff.Run2OutputTokens > 0 { - fmt.Fprintf(os.Stderr, " Output: %d → %d (%s)\n", diff.Run1OutputTokens, diff.Run2OutputTokens, diff.OutputTokensChange) + fmt.Fprintf(os.Stderr, " Output: %d → %d (%s)\n", diff.Run1OutputTokens, diff.Run2OutputTokens, diff.OutputTokensChange) } if diff.Run1CacheReadTokens > 0 || diff.Run2CacheReadTokens > 0 { - fmt.Fprintf(os.Stderr, " Cache read: %d → %d (%s)\n", diff.Run1CacheReadTokens, diff.Run2CacheReadTokens, diff.CacheReadTokensChange) + fmt.Fprintf(os.Stderr, " Cache read: %d → %d (%s)\n", diff.Run1CacheReadTokens, diff.Run2CacheReadTokens, diff.CacheReadTokensChange) } if diff.Run1CacheWriteTokens > 0 || diff.Run2CacheWriteTokens > 0 { - fmt.Fprintf(os.Stderr, " Cache write: %d → %d (%s)\n", diff.Run1CacheWriteTokens, diff.Run2CacheWriteTokens, diff.CacheWriteTokensChange) + fmt.Fprintf(os.Stderr, " Cache write: %d → %d (%s)\n", diff.Run1CacheWriteTokens, diff.Run2CacheWriteTokens, diff.CacheWriteTokensChange) } if diff.Run1EffectiveTokens > 0 || diff.Run2EffectiveTokens > 0 { - fmt.Fprintf(os.Stderr, " Effective: %d → %d (%s)\n", diff.Run1EffectiveTokens, diff.Run2EffectiveTokens, diff.EffectiveTokensChange) + fmt.Fprintf(os.Stderr, " Effective: %d → %d (%s)\n", diff.Run1EffectiveTokens, diff.Run2EffectiveTokens, diff.EffectiveTokensChange) } if diff.Run1TotalRequests > 0 || diff.Run2TotalRequests > 0 { - fmt.Fprintf(os.Stderr, " API requests: %d → %d (%s)\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsChange) + fmt.Fprintf(os.Stderr, " API requests: %d → %d (%s)\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsChange) } if diff.Run1CacheEfficiency > 0 || diff.Run2CacheEfficiency > 0 { - fmt.Fprintf(os.Stderr, " Cache efficiency: %.1f%% → %.1f%%\n", diff.Run1CacheEfficiency*100, diff.Run2CacheEfficiency*100) + fmt.Fprintf(os.Stderr, " Cache efficiency: %.1f%% → %.1f%%\n", diff.Run1CacheEfficiency*100, diff.Run2CacheEfficiency*100) } } From 453a36b80b6489b2868f9302ac23987d417c9f8b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:25:59 +0000 Subject: [PATCH 3/5] refactor: use console.RenderTable for all diff section pretty renderers Agent-Logs-Url: https://github.com/github/gh-aw/sessions/3ec821db-7712-419a-ba3e-3bbeda023d5e Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit_diff_render.go | 232 ++++++++++++++++++++++++++--------- 1 file changed, 175 insertions(+), 57 deletions(-) diff --git a/pkg/cli/audit_diff_render.go b/pkg/cli/audit_diff_render.go index 967147a4c3d..fdc41fcf1b1 100644 --- a/pkg/cli/audit_diff_render.go +++ b/pkg/cli/audit_diff_render.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "strconv" "strings" "github.com/github/gh-aw/pkg/console" @@ -310,50 +311,82 @@ func renderFirewallDiffPrettySection(diff *FirewallDiff) { fmt.Fprintln(os.Stderr) if len(diff.NewDomains) > 0 { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("New Domains (%d):", len(diff.NewDomains)))) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("New Domains (%d)", len(diff.NewDomains)))) + config := console.TableConfig{ + Headers: []string{"Domain", "Status", "Requests", "Anomaly"}, + Rows: make([][]string, 0, len(diff.NewDomains)), + } for _, entry := range diff.NewDomains { total := entry.Run2Allowed + entry.Run2Blocked - statusIcon := statusEmoji(entry.Run2Status) - anomalyTag := "" + anomalyNote := "" if entry.IsAnomaly { - anomalyTag = " [ANOMALY: " + entry.AnomalyNote + "]" + anomalyNote = "⚠️ " + entry.AnomalyNote } - fmt.Fprintf(os.Stderr, " • %s %s (%d requests, %s)%s\n", statusIcon, entry.Domain, total, entry.Run2Status, anomalyTag) + config.Rows = append(config.Rows, []string{ + entry.Domain, + statusEmoji(entry.Run2Status) + " " + entry.Run2Status, + strconv.Itoa(total), + anomalyNote, + }) } - fmt.Fprintln(os.Stderr) + fmt.Fprint(os.Stderr, console.RenderTable(config)) } if len(diff.RemovedDomains) > 0 { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Removed Domains (%d):", len(diff.RemovedDomains)))) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Removed Domains (%d)", len(diff.RemovedDomains)))) + config := console.TableConfig{ + Headers: []string{"Domain", "Previous Status", "Previous Requests"}, + Rows: make([][]string, 0, len(diff.RemovedDomains)), + } for _, entry := range diff.RemovedDomains { total := entry.Run1Allowed + entry.Run1Blocked - fmt.Fprintf(os.Stderr, " • %s (was %s, %d requests)\n", entry.Domain, entry.Run1Status, total) + config.Rows = append(config.Rows, []string{ + entry.Domain, + statusEmoji(entry.Run1Status) + " " + entry.Run1Status, + strconv.Itoa(total), + }) } - fmt.Fprintln(os.Stderr) + fmt.Fprint(os.Stderr, console.RenderTable(config)) } if len(diff.StatusChanges) > 0 { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Status Changes (%d):", len(diff.StatusChanges)))) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Status Changes (%d)", len(diff.StatusChanges)))) + config := console.TableConfig{ + Headers: []string{"Domain", "Before", "After", "Anomaly"}, + Rows: make([][]string, 0, len(diff.StatusChanges)), + } for _, entry := range diff.StatusChanges { - icon1 := statusEmoji(entry.Run1Status) - icon2 := statusEmoji(entry.Run2Status) - anomalyTag := "" + anomalyNote := "" if entry.IsAnomaly { - anomalyTag = " [ANOMALY: " + entry.AnomalyNote + "]" + anomalyNote = "⚠️ " + entry.AnomalyNote } - fmt.Fprintf(os.Stderr, " • %s: %s %s → %s %s%s\n", entry.Domain, icon1, entry.Run1Status, icon2, entry.Run2Status, anomalyTag) + config.Rows = append(config.Rows, []string{ + entry.Domain, + statusEmoji(entry.Run1Status) + " " + entry.Run1Status, + statusEmoji(entry.Run2Status) + " " + entry.Run2Status, + anomalyNote, + }) } - fmt.Fprintln(os.Stderr) + fmt.Fprint(os.Stderr, console.RenderTable(config)) } if len(diff.VolumeChanges) > 0 { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage("Volume Changes:")) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage("Volume Changes")) + config := console.TableConfig{ + Headers: []string{"Domain", "Requests (before)", "Requests (after)", "Change"}, + Rows: make([][]string, 0, len(diff.VolumeChanges)), + } for _, entry := range diff.VolumeChanges { total1 := entry.Run1Allowed + entry.Run1Blocked total2 := entry.Run2Allowed + entry.Run2Blocked - fmt.Fprintf(os.Stderr, " • %s: %d → %d requests (%s)\n", entry.Domain, total1, total2, entry.VolumeChange) + config.Rows = append(config.Rows, []string{ + entry.Domain, + strconv.Itoa(total1), + strconv.Itoa(total2), + entry.VolumeChange, + }) } - fmt.Fprintln(os.Stderr) + fmt.Fprint(os.Stderr, console.RenderTable(config)) } } @@ -367,42 +400,65 @@ func renderMCPToolsDiffPrettySection(diff *MCPToolsDiff) { fmt.Fprintln(os.Stderr) if len(diff.NewTools) > 0 { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("New Tools (%d):", len(diff.NewTools)))) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("New Tools (%d)", len(diff.NewTools)))) + config := console.TableConfig{ + Headers: []string{"Server", "Tool", "Calls", "Anomaly"}, + Rows: make([][]string, 0, len(diff.NewTools)), + } for _, entry := range diff.NewTools { - anomalyTag := "" + anomalyNote := "" if entry.IsAnomaly { - anomalyTag = " [ANOMALY: " + entry.AnomalyNote + "]" + anomalyNote = "⚠️ " + entry.AnomalyNote } - fmt.Fprintf(os.Stderr, " • + %s/%s (%d calls)%s\n", entry.ServerName, entry.ToolName, entry.Run2CallCount, anomalyTag) + config.Rows = append(config.Rows, []string{ + entry.ServerName, + entry.ToolName, + strconv.Itoa(entry.Run2CallCount), + anomalyNote, + }) } - fmt.Fprintln(os.Stderr) + fmt.Fprint(os.Stderr, console.RenderTable(config)) } if len(diff.RemovedTools) > 0 { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Removed Tools (%d):", len(diff.RemovedTools)))) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Removed Tools (%d)", len(diff.RemovedTools)))) + config := console.TableConfig{ + Headers: []string{"Server", "Tool", "Previous Calls"}, + Rows: make([][]string, 0, len(diff.RemovedTools)), + } for _, entry := range diff.RemovedTools { - fmt.Fprintf(os.Stderr, " • - %s/%s (was %d calls)\n", entry.ServerName, entry.ToolName, entry.Run1CallCount) + config.Rows = append(config.Rows, []string{ + entry.ServerName, + entry.ToolName, + strconv.Itoa(entry.Run1CallCount), + }) } - fmt.Fprintln(os.Stderr) + fmt.Fprint(os.Stderr, console.RenderTable(config)) } if len(diff.ChangedTools) > 0 { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Changed Tools (%d):", len(diff.ChangedTools)))) + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Changed Tools (%d)", len(diff.ChangedTools)))) + config := console.TableConfig{ + Headers: []string{"Server", "Tool", "Calls (before)", "Calls (after)", "Change", "Errors (before)", "Errors (after)", "Anomaly"}, + Rows: make([][]string, 0, len(diff.ChangedTools)), + } for _, entry := range diff.ChangedTools { - anomalyTag := "" + anomalyNote := "" if entry.IsAnomaly { - anomalyTag = " [ANOMALY: " + entry.AnomalyNote + "]" + anomalyNote = "⚠️ " + entry.AnomalyNote } - errInfo := "" - if entry.Run1ErrorCount > 0 || entry.Run2ErrorCount > 0 { - errInfo = fmt.Sprintf(", errors: %d → %d", entry.Run1ErrorCount, entry.Run2ErrorCount) - } - fmt.Fprintf(os.Stderr, " • ~ %s/%s: %d → %d calls (%s%s)%s\n", - entry.ServerName, entry.ToolName, - entry.Run1CallCount, entry.Run2CallCount, - entry.CallCountChange, errInfo, anomalyTag) + config.Rows = append(config.Rows, []string{ + entry.ServerName, + entry.ToolName, + strconv.Itoa(entry.Run1CallCount), + strconv.Itoa(entry.Run2CallCount), + entry.CallCountChange, + strconv.Itoa(entry.Run1ErrorCount), + strconv.Itoa(entry.Run2ErrorCount), + anomalyNote, + }) } - fmt.Fprintln(os.Stderr) + fmt.Fprint(os.Stderr, console.RenderTable(config)) } } @@ -415,53 +471,115 @@ func renderRunMetricsDiffPrettySection(run1ID, run2ID int64, diff *RunMetricsDif fmt.Fprintln(os.Stderr, console.FormatSectionHeader(fmt.Sprintf("Run Metrics (Run #%d → Run #%d)", run1ID, run2ID))) fmt.Fprintln(os.Stderr) + config := console.TableConfig{ + Headers: []string{"Metric", fmt.Sprintf("Run #%d", run1ID), fmt.Sprintf("Run #%d", run2ID), "Change"}, + Rows: make([][]string, 0), + } + if diff.Run1TokenUsage > 0 || diff.Run2TokenUsage > 0 { - fmt.Fprintf(os.Stderr, " Token usage: %d → %d (%s)\n", diff.Run1TokenUsage, diff.Run2TokenUsage, diff.TokenUsageChange) + config.Rows = append(config.Rows, []string{ + "Token usage", + strconv.Itoa(diff.Run1TokenUsage), + strconv.Itoa(diff.Run2TokenUsage), + diff.TokenUsageChange, + }) } if diff.Run1Duration != "" || diff.Run2Duration != "" { - changeStr := "" - if diff.DurationChange != "" { - changeStr = " (" + diff.DurationChange + ")" - } - fmt.Fprintf(os.Stderr, " Duration: %s → %s%s\n", diff.Run1Duration, diff.Run2Duration, changeStr) + config.Rows = append(config.Rows, []string{ + "Duration", + diff.Run1Duration, + diff.Run2Duration, + diff.DurationChange, + }) } if diff.Run1Turns > 0 || diff.Run2Turns > 0 { - fmt.Fprintf(os.Stderr, " Turns: %d → %d (%+d)\n", diff.Run1Turns, diff.Run2Turns, diff.TurnsChange) + config.Rows = append(config.Rows, []string{ + "Turns", + strconv.Itoa(diff.Run1Turns), + strconv.Itoa(diff.Run2Turns), + fmt.Sprintf("%+d", diff.TurnsChange), + }) + } + + if len(config.Rows) > 0 { + fmt.Fprint(os.Stderr, console.RenderTable(config)) } if diff.TokenUsageDetails != nil { fmt.Fprintln(os.Stderr) - renderTokenUsageDiffPrettySection(diff.TokenUsageDetails) + renderTokenUsageDiffPrettySection(run1ID, run2ID, diff.TokenUsageDetails) } - - fmt.Fprintln(os.Stderr) } // renderTokenUsageDiffPrettySection renders detailed token usage as a pretty console sub-section -func renderTokenUsageDiffPrettySection(diff *TokenUsageDiff) { +func renderTokenUsageDiffPrettySection(run1ID, run2ID int64, diff *TokenUsageDiff) { fmt.Fprintln(os.Stderr, console.FormatSectionHeader("Token Usage Details")) fmt.Fprintln(os.Stderr) + config := console.TableConfig{ + Headers: []string{"Token Type", fmt.Sprintf("Run #%d", run1ID), fmt.Sprintf("Run #%d", run2ID), "Change"}, + Rows: make([][]string, 0), + } + if diff.Run1InputTokens > 0 || diff.Run2InputTokens > 0 { - fmt.Fprintf(os.Stderr, " Input: %d → %d (%s)\n", diff.Run1InputTokens, diff.Run2InputTokens, diff.InputTokensChange) + config.Rows = append(config.Rows, []string{ + "Input", + strconv.Itoa(diff.Run1InputTokens), + strconv.Itoa(diff.Run2InputTokens), + diff.InputTokensChange, + }) } if diff.Run1OutputTokens > 0 || diff.Run2OutputTokens > 0 { - fmt.Fprintf(os.Stderr, " Output: %d → %d (%s)\n", diff.Run1OutputTokens, diff.Run2OutputTokens, diff.OutputTokensChange) + config.Rows = append(config.Rows, []string{ + "Output", + strconv.Itoa(diff.Run1OutputTokens), + strconv.Itoa(diff.Run2OutputTokens), + diff.OutputTokensChange, + }) } if diff.Run1CacheReadTokens > 0 || diff.Run2CacheReadTokens > 0 { - fmt.Fprintf(os.Stderr, " Cache read: %d → %d (%s)\n", diff.Run1CacheReadTokens, diff.Run2CacheReadTokens, diff.CacheReadTokensChange) + config.Rows = append(config.Rows, []string{ + "Cache read", + strconv.Itoa(diff.Run1CacheReadTokens), + strconv.Itoa(diff.Run2CacheReadTokens), + diff.CacheReadTokensChange, + }) } if diff.Run1CacheWriteTokens > 0 || diff.Run2CacheWriteTokens > 0 { - fmt.Fprintf(os.Stderr, " Cache write: %d → %d (%s)\n", diff.Run1CacheWriteTokens, diff.Run2CacheWriteTokens, diff.CacheWriteTokensChange) + config.Rows = append(config.Rows, []string{ + "Cache write", + strconv.Itoa(diff.Run1CacheWriteTokens), + strconv.Itoa(diff.Run2CacheWriteTokens), + diff.CacheWriteTokensChange, + }) } if diff.Run1EffectiveTokens > 0 || diff.Run2EffectiveTokens > 0 { - fmt.Fprintf(os.Stderr, " Effective: %d → %d (%s)\n", diff.Run1EffectiveTokens, diff.Run2EffectiveTokens, diff.EffectiveTokensChange) + config.Rows = append(config.Rows, []string{ + "Effective", + strconv.Itoa(diff.Run1EffectiveTokens), + strconv.Itoa(diff.Run2EffectiveTokens), + diff.EffectiveTokensChange, + }) } if diff.Run1TotalRequests > 0 || diff.Run2TotalRequests > 0 { - fmt.Fprintf(os.Stderr, " API requests: %d → %d (%s)\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsChange) + config.Rows = append(config.Rows, []string{ + "API requests", + strconv.Itoa(diff.Run1TotalRequests), + strconv.Itoa(diff.Run2TotalRequests), + diff.RequestsChange, + }) } if diff.Run1CacheEfficiency > 0 || diff.Run2CacheEfficiency > 0 { - fmt.Fprintf(os.Stderr, " Cache efficiency: %.1f%% → %.1f%%\n", diff.Run1CacheEfficiency*100, diff.Run2CacheEfficiency*100) + config.Rows = append(config.Rows, []string{ + "Cache efficiency", + fmt.Sprintf("%.1f%%", diff.Run1CacheEfficiency*100), + fmt.Sprintf("%.1f%%", diff.Run2CacheEfficiency*100), + "", + }) + } + + if len(config.Rows) > 0 { + fmt.Fprint(os.Stderr, console.RenderTable(config)) } } From 3f36c01a03dc43e2e164bbb1a0697c06303c9c0c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:54:16 +0000 Subject: [PATCH 4/5] fix: rename RequestsChange to RequestsDelta and add CacheEfficiencyChange Agent-Logs-Url: https://github.com/github/gh-aw/sessions/2a4a1640-5be5-4d7d-9dbe-61f73600db6b Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit_diff.go | 18 ++++++++++++++++-- pkg/cli/audit_diff_render.go | 8 ++++---- pkg/cli/audit_diff_test.go | 2 ++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/pkg/cli/audit_diff.go b/pkg/cli/audit_diff.go index 04e1f2b1a9a..26e0db2f933 100644 --- a/pkg/cli/audit_diff.go +++ b/pkg/cli/audit_diff.go @@ -280,9 +280,10 @@ type TokenUsageDiff struct { EffectiveTokensChange string `json:"effective_tokens_change,omitempty"` Run1TotalRequests int `json:"run1_total_requests"` Run2TotalRequests int `json:"run2_total_requests"` - RequestsChange string `json:"requests_change,omitempty"` + RequestsDelta string `json:"requests_delta,omitempty"` // Absolute request-count delta, e.g. "+4" Run1CacheEfficiency float64 `json:"run1_cache_efficiency"` Run2CacheEfficiency float64 `json:"run2_cache_efficiency"` + CacheEfficiencyChange string `json:"cache_efficiency_change,omitempty"` // Percentage-point delta, e.g. "+1.5pp" } // RunMetricsDiff represents the diff of run-level metrics (token usage, duration, turns) between two runs @@ -578,12 +579,25 @@ func computeTokenUsageDiff(tu1, tu2 *TokenUsageSummary) *TokenUsageDiff { diff.EffectiveTokensChange = formatVolumeChange(run1Effective, run2Effective) } if run1Requests > 0 || run2Requests > 0 { - diff.RequestsChange = formatCountChange(run1Requests, run2Requests) + diff.RequestsDelta = formatCountChange(run1Requests, run2Requests) + } + if run1CacheEff > 0 || run2CacheEff > 0 { + diff.CacheEfficiencyChange = formatPercentagePointChange(run1CacheEff, run2CacheEff) } return diff } +// formatPercentagePointChange formats the change between two ratio values (0.0-1.0) as a +// percentage-point delta (e.g. "+1.5pp", "-2.3pp") +func formatPercentagePointChange(ratio1, ratio2 float64) string { + delta := (ratio2 - ratio1) * 100 + if delta >= 0 { + return fmt.Sprintf("+%.1fpp", delta) + } + return fmt.Sprintf("%.1fpp", delta) +} + // formatCountChange formats the absolute change in a count value (e.g. "+3", "-1") func formatCountChange(count1, count2 int) string { delta := count2 - count1 diff --git a/pkg/cli/audit_diff_render.go b/pkg/cli/audit_diff_render.go index fdc41fcf1b1..58d1bfe0402 100644 --- a/pkg/cli/audit_diff_render.go +++ b/pkg/cli/audit_diff_render.go @@ -293,10 +293,10 @@ func renderTokenUsageDiffMarkdownSection(run1ID, run2ID int64, diff *TokenUsageD fmt.Printf("| Effective | %d | %d | %s |\n", diff.Run1EffectiveTokens, diff.Run2EffectiveTokens, diff.EffectiveTokensChange) } if diff.Run1TotalRequests > 0 || diff.Run2TotalRequests > 0 { - fmt.Printf("| API requests | %d | %d | %s |\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsChange) + fmt.Printf("| API requests | %d | %d | %s |\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsDelta) } if diff.Run1CacheEfficiency > 0 || diff.Run2CacheEfficiency > 0 { - fmt.Printf("| Cache efficiency | %.1f%% | %.1f%% | |\n", diff.Run1CacheEfficiency*100, diff.Run2CacheEfficiency*100) + fmt.Printf("| Cache efficiency | %.1f%% | %.1f%% | %s |\n", diff.Run1CacheEfficiency*100, diff.Run2CacheEfficiency*100, diff.CacheEfficiencyChange) } fmt.Println() } @@ -566,7 +566,7 @@ func renderTokenUsageDiffPrettySection(run1ID, run2ID int64, diff *TokenUsageDif "API requests", strconv.Itoa(diff.Run1TotalRequests), strconv.Itoa(diff.Run2TotalRequests), - diff.RequestsChange, + diff.RequestsDelta, }) } if diff.Run1CacheEfficiency > 0 || diff.Run2CacheEfficiency > 0 { @@ -574,7 +574,7 @@ func renderTokenUsageDiffPrettySection(run1ID, run2ID int64, diff *TokenUsageDif "Cache efficiency", fmt.Sprintf("%.1f%%", diff.Run1CacheEfficiency*100), fmt.Sprintf("%.1f%%", diff.Run2CacheEfficiency*100), - "", + diff.CacheEfficiencyChange, }) } diff --git a/pkg/cli/audit_diff_test.go b/pkg/cli/audit_diff_test.go index 395f8f5a62d..23f82c2d1e3 100644 --- a/pkg/cli/audit_diff_test.go +++ b/pkg/cli/audit_diff_test.go @@ -744,9 +744,11 @@ func TestComputeTokenUsageDiff_WithData(t *testing.T) { assert.Equal(t, 10, diff.Run1TotalRequests, "Run1 requests should be 10") assert.Equal(t, 14, diff.Run2TotalRequests, "Run2 requests should be 14") + assert.Equal(t, "+4", diff.RequestsDelta, "Requests delta should be +4") assert.InDelta(t, 0.333, diff.Run1CacheEfficiency, 0.001, "Run1 cache efficiency should match") assert.InDelta(t, 0.318, diff.Run2CacheEfficiency, 0.001, "Run2 cache efficiency should match") + assert.Equal(t, "-1.5pp", diff.CacheEfficiencyChange, "Cache efficiency change should be -1.5pp") } func TestComputeTokenUsageDiff_Run1Nil(t *testing.T) { From 87402bef980abe44b136da5104815f4ce0bf7fc5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:07:20 +0000 Subject: [PATCH 5/5] fix: use conventional variadic syntax in audit diff Use string Agent-Logs-Url: https://github.com/github/gh-aw/sessions/edc57949-d44b-4550-b62b-31a85eab8f5c Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit_diff_command.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cli/audit_diff_command.go b/pkg/cli/audit_diff_command.go index e552289d4dc..58425461de1 100644 --- a/pkg/cli/audit_diff_command.go +++ b/pkg/cli/audit_diff_command.go @@ -15,7 +15,7 @@ import ( // NewAuditDiffSubcommand creates the audit diff subcommand func NewAuditDiffSubcommand() *cobra.Command { cmd := &cobra.Command{ - Use: "diff [...]", + Use: "diff ...", Short: "Compare behavior across workflow runs", Long: `Compare workflow run behavior between a base run and one or more comparison runs to detect policy regressions, new unauthorized domains, behavioral drift, and changes in