@@ -15,35 +15,54 @@ import (
1515// NewAuditDiffSubcommand creates the audit diff subcommand
1616func NewAuditDiffSubcommand () * cobra.Command {
1717 cmd := & cobra.Command {
18- Use : "diff <run-id-1> <run-id-2>" ,
19- Short : "Compare behavior across two workflow runs" ,
20- Long : `Compare workflow run behavior between two workflow runs to detect policy regressions,
21- new unauthorized domains, behavioral drift, and changes in MCP tool usage or run metrics.
18+ Use : "diff <base-run-id> <compare-run-id>..." ,
19+ Short : "Compare behavior across workflow runs" ,
20+ Long : `Compare workflow run behavior between a base run and one or more comparison runs
21+ to detect policy regressions, new unauthorized domains, behavioral drift, and changes in
22+ MCP tool usage, token usage, or run metrics.
2223
23- This command downloads artifacts for both runs (using cached data when available),
24+ The first argument is the base (reference) run. All subsequent arguments are compared
25+ against that base. This enables tracking behavioral drift across multiple runs at once.
26+
27+ This command downloads artifacts for all runs (using cached data when available),
2428analyzes their data, and produces a diff showing:
25- - New domains that appeared in the second run
26- - Removed domains that were in the first run but not the second
29+ - New domains that appeared in the comparison run
30+ - Removed domains that were in the base run but not the comparison
2731- Status changes (domains that flipped between allowed and denied)
2832- Volume changes (significant request count changes, >100% threshold)
2933- Anomaly flags (new denied domains, previously-denied now allowed)
3034- MCP tool invocation changes (new/removed tools, call count and error count diffs)
3135- Run metrics comparison (token usage, duration, turns) when cached data is available
36+ - Detailed token usage breakdown (input/output/cache/effective tokens) from firewall proxy
3237
3338Examples:
34- ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 # Compare two runs
35- ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 --format markdown # Markdown output for PR comments
36- ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 --json # JSON for CI integration
37- ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 --repo owner/repo # Specify repository` ,
38- Args : cobra .ExactArgs (2 ),
39+ ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 # Compare two runs
40+ ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 12347 12348 # Compare base against 3 runs
41+ ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 --format markdown # Markdown output for PR comments
42+ ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 --json # JSON for CI integration
43+ ` + string (constants .CLIExtensionPrefix ) + ` audit diff 12345 12346 --repo owner/repo # Specify repository` ,
44+ Args : cobra .MinimumNArgs (2 ),
3945 RunE : func (cmd * cobra.Command , args []string ) error {
40- runID1 , err := strconv .ParseInt (args [0 ], 10 , 64 )
46+ baseRunID , err := strconv .ParseInt (args [0 ], 10 , 64 )
4147 if err != nil {
42- return fmt .Errorf ("invalid run ID %q: must be a numeric run ID" , args [0 ])
48+ return fmt .Errorf ("invalid base run ID %q: must be a numeric run ID" , args [0 ])
4349 }
44- runID2 , err := strconv .ParseInt (args [1 ], 10 , 64 )
45- if err != nil {
46- return fmt .Errorf ("invalid run ID %q: must be a numeric run ID" , args [1 ])
50+
51+ compareRunIDs := make ([]int64 , 0 , len (args )- 1 )
52+ seen := make (map [int64 ]bool )
53+ for _ , arg := range args [1 :] {
54+ id , err := strconv .ParseInt (arg , 10 , 64 )
55+ if err != nil {
56+ return fmt .Errorf ("invalid run ID %q: must be a numeric run ID" , arg )
57+ }
58+ if id == baseRunID {
59+ return fmt .Errorf ("comparison run ID %d is the same as the base run ID: cannot diff a run against itself" , id )
60+ }
61+ if seen [id ] {
62+ return fmt .Errorf ("duplicate comparison run ID %d: each run ID must appear only once" , id )
63+ }
64+ seen [id ] = true
65+ compareRunIDs = append (compareRunIDs , id )
4766 }
4867
4968 outputDir , _ := cmd .Flags ().GetString ("output" )
@@ -62,7 +81,7 @@ Examples:
6281 repo = parts [1 ]
6382 }
6483
65- return RunAuditDiff (cmd .Context (), runID1 , runID2 , owner , repo , hostname , outputDir , verbose , jsonOutput , format )
84+ return RunAuditDiff (cmd .Context (), baseRunID , compareRunIDs , owner , repo , hostname , outputDir , verbose , jsonOutput , format )
6685 },
6786 }
6887
@@ -74,9 +93,10 @@ Examples:
7493 return cmd
7594}
7695
77- // RunAuditDiff compares behavior between two workflow runs
78- func RunAuditDiff (ctx context.Context , runID1 , runID2 int64 , owner , repo , hostname , outputDir string , verbose , jsonOutput bool , format string ) error {
79- auditDiffLog .Printf ("Starting audit diff: run1=%d, run2=%d" , runID1 , runID2 )
96+ // RunAuditDiff compares behavior between a base workflow run and one or more comparison runs.
97+ // The base run is the reference point; each comparison run is diffed against it independently.
98+ func RunAuditDiff (ctx context.Context , baseRunID int64 , compareRunIDs []int64 , owner , repo , hostname , outputDir string , verbose , jsonOutput bool , format string ) error {
99+ auditDiffLog .Printf ("Starting audit diff: base=%d, compare=%v" , baseRunID , compareRunIDs )
80100
81101 // Auto-detect GHES host from git remote if hostname is not provided
82102 if hostname == "" {
@@ -94,57 +114,65 @@ func RunAuditDiff(ctx context.Context, runID1, runID2 int64, owner, repo, hostna
94114 default :
95115 }
96116
97- fmt .Fprintln (os .Stderr , console .FormatInfoMessage (fmt .Sprintf ("Comparing workflow runs: Run #%d → Run #%d" , runID1 , runID2 )))
117+ if len (compareRunIDs ) == 1 {
118+ fmt .Fprintln (os .Stderr , console .FormatInfoMessage (fmt .Sprintf ("Comparing workflow runs: Run #%d → Run #%d" , baseRunID , compareRunIDs [0 ])))
119+ } else {
120+ fmt .Fprintln (os .Stderr , console .FormatInfoMessage (fmt .Sprintf ("Comparing workflow runs: Run #%d (base) vs %d comparison runs" , baseRunID , len (compareRunIDs ))))
121+ }
98122
99- // Load run summaries for both runs
100- fmt .Fprintln (os .Stderr , console .FormatProgressMessage (fmt .Sprintf ("Loading data for run %d..." , runID1 )))
101- summary1 , err := loadRunSummaryForDiff (runID1 , outputDir , owner , repo , hostname , verbose )
123+ // Load base run summary once (shared across all comparisons)
124+ fmt .Fprintln (os .Stderr , console .FormatProgressMessage (fmt .Sprintf ("Loading data for base run %d..." , baseRunID )))
125+ baseSummary , err := loadRunSummaryForDiff (baseRunID , outputDir , owner , repo , hostname , verbose )
102126 if err != nil {
103- return fmt .Errorf ("failed to load data for run %d: %w" , runID1 , err )
127+ return fmt .Errorf ("failed to load data for base run %d: %w" , baseRunID , err )
104128 }
105129
106- // Check context cancellation between downloads
107- select {
108- case <- ctx .Done ():
109- fmt .Fprintln (os .Stderr , console .FormatWarningMessage ("Operation cancelled" ))
110- return ctx .Err ()
111- default :
112- }
130+ diffs := make ([]* AuditDiff , 0 , len (compareRunIDs ))
113131
114- fmt .Fprintln (os .Stderr , console .FormatProgressMessage (fmt .Sprintf ("Loading data for run %d..." , runID2 )))
115- summary2 , err := loadRunSummaryForDiff (runID2 , outputDir , owner , repo , hostname , verbose )
116- if err != nil {
117- return fmt .Errorf ("failed to load data for run %d: %w" , runID2 , err )
118- }
132+ for _ , compareRunID := range compareRunIDs {
133+ // Check context cancellation between downloads
134+ select {
135+ case <- ctx .Done ():
136+ fmt .Fprintln (os .Stderr , console .FormatWarningMessage ("Operation cancelled" ))
137+ return ctx .Err ()
138+ default :
139+ }
119140
120- // Warn if no firewall data found
121- fw1 := summary1 .FirewallAnalysis
122- fw2 := summary2 .FirewallAnalysis
123- if fw1 == nil && fw2 == nil {
124- fmt .Fprintln (os .Stderr , console .FormatWarningMessage ("No firewall data found in either run. Both runs may predate firewall logging." ))
125- } else {
126- if fw1 == nil {
127- fmt .Fprintln (os .Stderr , console .FormatWarningMessage (fmt .Sprintf ("No firewall data found for run %d (older run may lack firewall logs)" , runID1 )))
141+ fmt .Fprintln (os .Stderr , console .FormatProgressMessage (fmt .Sprintf ("Loading data for run %d..." , compareRunID )))
142+ compareSummary , err := loadRunSummaryForDiff (compareRunID , outputDir , owner , repo , hostname , verbose )
143+ if err != nil {
144+ return fmt .Errorf ("failed to load data for run %d: %w" , compareRunID , err )
128145 }
129- if fw2 == nil {
130- fmt .Fprintln (os .Stderr , console .FormatWarningMessage (fmt .Sprintf ("No firewall data found for run %d" , runID2 )))
146+
147+ // Warn if no firewall data found for this pair
148+ fw1 := baseSummary .FirewallAnalysis
149+ fw2 := compareSummary .FirewallAnalysis
150+ if fw1 == nil && fw2 == nil {
151+ fmt .Fprintln (os .Stderr , console .FormatWarningMessage (fmt .Sprintf ("No firewall data found for run pair %d→%d. Both runs may predate firewall logging." , baseRunID , compareRunID )))
152+ } else {
153+ if fw1 == nil {
154+ fmt .Fprintln (os .Stderr , console .FormatWarningMessage (fmt .Sprintf ("No firewall data found for base run %d (older run may lack firewall logs)" , baseRunID )))
155+ }
156+ if fw2 == nil {
157+ fmt .Fprintln (os .Stderr , console .FormatWarningMessage (fmt .Sprintf ("No firewall data found for run %d" , compareRunID )))
158+ }
131159 }
132- }
133160
134- // Compute the full diff
135- diff := computeAuditDiff (runID1 , runID2 , summary1 , summary2 )
161+ diff := computeAuditDiff (baseRunID , compareRunID , baseSummary , compareSummary )
162+ diffs = append (diffs , diff )
163+ }
136164
137165 // Render output
138166 if jsonOutput || format == "json" {
139- return renderAuditDiffJSON (diff )
167+ return renderAuditDiffJSON (diffs )
140168 }
141169
142170 if format == "markdown" {
143- renderAuditDiffMarkdown (diff )
171+ renderAuditDiffMarkdown (diffs )
144172 return nil
145173 }
146174
147175 // Default: pretty console output
148- renderAuditDiffPretty (diff )
176+ renderAuditDiffPretty (diffs )
149177 return nil
150178}
0 commit comments