diff --git a/pkg/agentdrain/data/default_weights.json b/pkg/agentdrain/data/default_weights.json index 963f9eae34..3c1f2ab812 100644 --- a/pkg/agentdrain/data/default_weights.json +++ b/pkg/agentdrain/data/default_weights.json @@ -3,12 +3,7 @@ "clusters": null, "config": { "Depth": 4, - "ExcludeFields": [ - "session_id", - "trace_id", - "span_id", - "timestamp" - ], + "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"], "MaskRules": [ { "Name": "uuid", @@ -54,21 +49,12 @@ "id": 1, "size": 100, "stage": "finish", - "template": [ - "stage=finish", - "\u003c*\u003e", - "tokens=\u003cNUM\u003e" - ] + "template": ["stage=finish", "\u003c*\u003e", "tokens=\u003cNUM\u003e"] } ], "config": { "Depth": 4, - "ExcludeFields": [ - "session_id", - "trace_id", - "span_id", - "timestamp" - ], + "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"], "MaskRules": [ { "Name": "uuid", @@ -114,21 +100,12 @@ "id": 1, "size": 17, "stage": "plan", - "template": [ - "stage=plan", - "errors=\u003cNUM\u003e", - "turns=\u003cNUM\u003e" - ] + "template": ["stage=plan", "errors=\u003cNUM\u003e", "turns=\u003cNUM\u003e"] } ], "config": { "Depth": 4, - "ExcludeFields": [ - "session_id", - "trace_id", - "span_id", - "timestamp" - ], + "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"], "MaskRules": [ { "Name": "uuid", @@ -172,12 +149,7 @@ "clusters": null, "config": { "Depth": 4, - "ExcludeFields": [ - "session_id", - "trace_id", - "span_id", - "timestamp" - ], + "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"], "MaskRules": [ { "Name": "uuid", @@ -221,12 +193,7 @@ "clusters": null, "config": { "Depth": 4, - "ExcludeFields": [ - "session_id", - "trace_id", - "span_id", - "timestamp" - ], + "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"], "MaskRules": [ { "Name": "uuid", @@ -564,12 +531,7 @@ ], "config": { "Depth": 4, - "ExcludeFields": [ - "session_id", - "trace_id", - "span_id", - "timestamp" - ], + "ExcludeFields": ["session_id", "trace_id", "span_id", "timestamp"], "MaskRules": [ { "Name": "uuid", @@ -609,4 +571,4 @@ }, "next_id": 8 } -} \ No newline at end of file +} diff --git a/pkg/workflow/compiler_orchestrator_engine.go b/pkg/workflow/compiler_orchestrator_engine.go index 6086c2e9e0..d22613dd0e 100644 --- a/pkg/workflow/compiler_orchestrator_engine.go +++ b/pkg/workflow/compiler_orchestrator_engine.go @@ -86,6 +86,14 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean return nil, err } + // Validate steps/post-steps secrets regardless of strict mode (error in strict, warning in non-strict) + if err := c.validateStepsSecrets(result.Frontmatter); err != nil { + orchestratorEngineLog.Printf("Steps secrets validation failed: %v", err) + // Restore strict mode before returning error + c.strictMode = initialStrictMode + return nil, err + } + // Validate check-for-updates flag regardless of strict mode (error in strict, warning in non-strict) if err := c.validateUpdateCheck(result.Frontmatter); err != nil { orchestratorEngineLog.Printf("Update check validation failed: %v", err) diff --git a/pkg/workflow/strict_mode_steps_validation.go b/pkg/workflow/strict_mode_steps_validation.go new file mode 100644 index 0000000000..e6ab817307 --- /dev/null +++ b/pkg/workflow/strict_mode_steps_validation.go @@ -0,0 +1,137 @@ +// This file contains strict mode validation for secrets in custom steps. +// +// It validates that secrets expressions are not used in custom steps (steps and +// post-steps injected in the agent job). In strict mode this is an error; in +// non-strict mode a warning is emitted instead. +// +// The goal is to minimise the number of secrets present in the agent job: the +// only secrets that should appear there are those required to configure the +// agentic engine itself. + +package workflow + +import ( + "fmt" + "os" + "strings" + + "github.com/github/gh-aw/pkg/console" +) + +// validateStepsSecrets checks both the "steps" and "post-steps" frontmatter sections +// for secrets expressions (e.g. ${{ secrets.MY_SECRET }}). +// +// In strict mode the presence of any such expression is treated as an error. +// In non-strict mode a warning is emitted instead. +func (c *Compiler) validateStepsSecrets(frontmatter map[string]any) error { + for _, sectionName := range []string{"steps", "post-steps"} { + if err := c.validateStepsSectionSecrets(frontmatter, sectionName); err != nil { + return err + } + } + return nil +} + +// validateStepsSectionSecrets inspects a single steps section (named by sectionName) +// inside frontmatter for any secrets.* expressions. +func (c *Compiler) validateStepsSectionSecrets(frontmatter map[string]any, sectionName string) error { + rawValue, exists := frontmatter[sectionName] + if !exists { + strictModeValidationLog.Printf("No %s section found, skipping secrets validation", sectionName) + return nil + } + + steps, ok := rawValue.([]any) + if !ok { + strictModeValidationLog.Printf("%s section is not a list, skipping secrets validation", sectionName) + return nil + } + + var secretRefs []string + for _, step := range steps { + refs := extractSecretsFromStepValue(step) + secretRefs = append(secretRefs, refs...) + } + + // Filter out the built-in GITHUB_TOKEN: it is already present in every runner + // environment and is not a user-defined secret that could be accidentally leaked. + secretRefs = filterBuiltinTokens(secretRefs) + + if len(secretRefs) == 0 { + strictModeValidationLog.Printf("No secrets found in %s section", sectionName) + return nil + } + + strictModeValidationLog.Printf("Found %d secret expression(s) in %s section: %v", len(secretRefs), sectionName, secretRefs) + + // Deduplicate for cleaner messages. + secretRefs = deduplicateStringSlice(secretRefs) + + if c.strictMode { + return fmt.Errorf( + "strict mode: secrets expressions detected in '%s' section may be leaked to the agent job. Found: %s. "+ + "Operations requiring secrets must be moved to a separate job outside the agent job", + sectionName, strings.Join(secretRefs, ", "), + ) + } + + // Non-strict mode: emit a warning. + warningMsg := fmt.Sprintf( + "Warning: secrets expressions detected in '%s' section may be leaked to the agent job. Found: %s. "+ + "Consider moving operations requiring secrets to a separate job outside the agent job.", + sectionName, strings.Join(secretRefs, ", "), + ) + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(warningMsg)) + c.IncrementWarningCount() + + return nil +} + +// extractSecretsFromStepValue recursively walks a step value (which may be a map, +// slice, or primitive) and returns all secrets.* expressions found in string values. +func extractSecretsFromStepValue(value any) []string { + var refs []string + switch v := value.(type) { + case string: + for _, expr := range ExtractSecretsFromValue(v) { + refs = append(refs, expr) + } + case map[string]any: + for _, fieldValue := range v { + refs = append(refs, extractSecretsFromStepValue(fieldValue)...) + } + case []any: + for _, item := range v { + refs = append(refs, extractSecretsFromStepValue(item)...) + } + } + return refs +} + +// deduplicateStringSlice returns a slice with duplicate entries removed, +// preserving the order of first occurrence. +func deduplicateStringSlice(in []string) []string { + seen := make(map[string]bool, len(in)) + out := make([]string, 0, len(in)) + for _, s := range in { + if !seen[s] { + seen[s] = true + out = append(out, s) + } + } + return out +} + +// filterBuiltinTokens removes secret expressions that reference GitHub's built-in +// GITHUB_TOKEN from the list. GITHUB_TOKEN is automatically provided by the runner +// environment and is not a user-defined secret; it therefore does not represent an +// accidental leak into the agent job. +func filterBuiltinTokens(refs []string) []string { + out := refs[:0:0] + for _, ref := range refs { + if !strings.Contains(ref, "secrets.GITHUB_TOKEN") { + out = append(out, ref) + } + } + return out +} diff --git a/pkg/workflow/strict_mode_steps_validation_test.go b/pkg/workflow/strict_mode_steps_validation_test.go new file mode 100644 index 0000000000..873ad8fcfc --- /dev/null +++ b/pkg/workflow/strict_mode_steps_validation_test.go @@ -0,0 +1,299 @@ +//go:build !integration + +package workflow + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateStepsSecrets(t *testing.T) { + tests := []struct { + name string + frontmatter map[string]any + strictMode bool + expectError bool + errorMsg string + }{ + { + name: "no steps section is allowed", + frontmatter: map[string]any{ + "on": "push", + }, + strictMode: true, + expectError: false, + }, + { + name: "steps without secrets is allowed", + frontmatter: map[string]any{ + "steps": []any{ + map[string]any{ + "name": "Setup", + "run": "echo hello", + }, + }, + }, + strictMode: true, + expectError: false, + }, + { + name: "steps with GITHUB_TOKEN are allowed (built-in token is exempt)", + frontmatter: map[string]any{ + "steps": []any{ + map[string]any{ + "name": "Use GH CLI", + "env": map[string]any{ + "GH_TOKEN": "${{ secrets.GITHUB_TOKEN }}", + }, + "run": "gh issue list", + }, + }, + }, + strictMode: true, + expectError: false, + }, + { + name: "post-steps without secrets is allowed", + frontmatter: map[string]any{ + "post-steps": []any{ + map[string]any{ + "name": "Cleanup", + "run": "echo done", + }, + }, + }, + strictMode: true, + expectError: false, + }, + { + name: "steps with secret in run field in strict mode fails", + frontmatter: map[string]any{ + "steps": []any{ + map[string]any{ + "name": "Use secret", + "run": "curl -H 'Authorization: ${{ secrets.API_TOKEN }}' https://example.com", + }, + }, + }, + strictMode: true, + expectError: true, + errorMsg: "strict mode: secrets expressions detected in 'steps' section", + }, + { + name: "steps with secret in env field in strict mode fails", + frontmatter: map[string]any{ + "steps": []any{ + map[string]any{ + "name": "Use secret", + "run": "echo hi", + "env": map[string]any{ + "API_KEY": "${{ secrets.API_KEY }}", + }, + }, + }, + }, + strictMode: true, + expectError: true, + errorMsg: "strict mode: secrets expressions detected in 'steps' section", + }, + { + name: "steps with secret in with field in strict mode fails", + frontmatter: map[string]any{ + "steps": []any{ + map[string]any{ + "uses": "some/action@v1", + "with": map[string]any{ + "token": "${{ secrets.MY_API_TOKEN }}", + }, + }, + }, + }, + strictMode: true, + expectError: true, + errorMsg: "strict mode: secrets expressions detected in 'steps' section", + }, + { + name: "post-steps with secret in strict mode fails", + frontmatter: map[string]any{ + "post-steps": []any{ + map[string]any{ + "name": "Notify", + "run": "echo ${{ secrets.SLACK_TOKEN }}", + }, + }, + }, + strictMode: true, + expectError: true, + errorMsg: "strict mode: secrets expressions detected in 'post-steps' section", + }, + { + name: "steps with secret in non-strict mode emits warning but no error", + frontmatter: map[string]any{ + "steps": []any{ + map[string]any{ + "name": "Use secret", + "run": "echo ${{ secrets.API_KEY }}", + }, + }, + }, + strictMode: false, + expectError: false, + }, + { + name: "post-steps with secret in non-strict mode emits warning but no error", + frontmatter: map[string]any{ + "post-steps": []any{ + map[string]any{ + "name": "Notify", + "run": "echo ${{ secrets.SLACK_TOKEN }}", + }, + }, + }, + strictMode: false, + expectError: false, + }, + { + name: "steps section that is not a list is skipped", + frontmatter: map[string]any{ + "steps": "not-a-list", + }, + strictMode: true, + expectError: false, + }, + { + name: "multiple secrets in steps are all reported", + frontmatter: map[string]any{ + "steps": []any{ + map[string]any{ + "name": "Step 1", + "env": map[string]any{ + "KEY1": "${{ secrets.KEY1 }}", + "KEY2": "${{ secrets.KEY2 }}", + }, + }, + }, + }, + strictMode: true, + expectError: true, + errorMsg: "strict mode: secrets expressions detected in 'steps' section", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + compiler := NewCompiler() + compiler.strictMode = tt.strictMode + + err := compiler.validateStepsSecrets(tt.frontmatter) + + if tt.expectError { + require.Error(t, err, "expected an error but got none") + assert.Contains(t, err.Error(), tt.errorMsg, + "error %q should contain %q", err.Error(), tt.errorMsg) + } else { + assert.NoError(t, err, "expected no error") + } + }) + } +} + +func TestExtractSecretsFromStepValue(t *testing.T) { + tests := []struct { + name string + input any + expected []string + }{ + { + name: "nil value returns empty", + input: nil, + expected: nil, + }, + { + name: "plain string without secrets returns empty", + input: "echo hello", + expected: nil, + }, + { + name: "string with secret expression returns it", + input: "${{ secrets.TOKEN }}", + expected: []string{"${{ secrets.TOKEN }}"}, + }, + { + name: "string with secret in larger expression returns it", + input: "curl -H 'Authorization: ${{ secrets.TOKEN }}'", + expected: []string{"${{ secrets.TOKEN }}"}, + }, + { + name: "map with nested secret returns it", + input: map[string]any{ + "token": "${{ secrets.GH_TOKEN }}", + "plain": "hello", + }, + expected: []string{"${{ secrets.GH_TOKEN }}"}, + }, + { + name: "slice with secret returns it", + input: []any{ + "no secret here", + "${{ secrets.MY_SECRET }}", + }, + expected: []string{"${{ secrets.MY_SECRET }}"}, + }, + { + name: "deeply nested secret is found", + input: map[string]any{ + "env": map[string]any{ + "API_KEY": "${{ secrets.API_KEY }}", + }, + }, + expected: []string{"${{ secrets.API_KEY }}"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := extractSecretsFromStepValue(tt.input) + if len(tt.expected) == 0 { + assert.Empty(t, result, "expected no secrets") + } else { + assert.Len(t, result, len(tt.expected), "unexpected number of secrets extracted") + for _, expected := range tt.expected { + assert.Contains(t, result, expected, "expected %q to be in results", expected) + } + } + }) + } +} + +func TestDeduplicateStringSlice(t *testing.T) { + tests := []struct { + name string + input []string + expected []string + }{ + { + name: "empty slice returns empty", + input: []string{}, + expected: []string{}, + }, + { + name: "no duplicates returns same", + input: []string{"a", "b", "c"}, + expected: []string{"a", "b", "c"}, + }, + { + name: "duplicates are removed preserving order", + input: []string{"a", "b", "a", "c", "b"}, + expected: []string{"a", "b", "c"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := deduplicateStringSlice(tt.input) + assert.Equal(t, tt.expected, result, "unexpected deduplication result") + }) + } +}