From 26e075333c84f947fef4198551584a0ad80401d2 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 19 Mar 2026 16:44:32 +0100 Subject: [PATCH 1/8] acceptance: add -subset flag to run one EnvMatrix variant per test Adds a -subset flag that reduces each EnvMatrix variable to a single value using consistent hashing, cutting test runtime when full coverage is not needed. DATABRICKS_BUNDLE_ENGINE is biased 90% toward "direct" unless the script references the variable, in which case all variants are kept. The flag is automatically enabled during -update mode. Co-Authored-By: Claude Sonnet 4.6 --- acceptance/acceptance_test.go | 15 +++++- acceptance/internal/config.go | 61 +++++++++++++++++++++ acceptance/internal/config_test.go | 86 ++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index e293388dc6..24cc19406f 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -47,6 +47,7 @@ var ( UseVersion string WorkspaceTmpDir bool OnlyOutTestToml bool + Subset bool ) // In order to debug CLI running under acceptance test, search for TestInprocessMode and update @@ -78,6 +79,7 @@ func init() { // to simulate an identical environment. flag.BoolVar(&WorkspaceTmpDir, "workspace-tmp-dir", false, "Run tests on the workspace file system (For DBR testing).") flag.BoolVar(&OnlyOutTestToml, "only-out-test-toml", false, "Only regenerate out.test.toml files without running tests") + flag.BoolVar(&Subset, "subset", false, "Select a subset of EnvMatrix variants using consistent hashing") } const ( @@ -157,6 +159,10 @@ func setReplsForTestEnvVars(t *testing.T, repls *testdiff.ReplacementsContext) { } func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { + if testdiff.OverwriteMode { + Subset = true + } + repls := testdiff.ReplacementsContext{} cwd, err := os.Getwd() require.NoError(t, err) @@ -365,7 +371,14 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { extraVars = append(extraVars, "CONFIG_Cloud=true") } - expanded := internal.ExpandEnvMatrix(config.EnvMatrix, config.EnvMatrixExclude, extraVars) + envMatrix := config.EnvMatrix + if Subset { + scriptContent, err := os.ReadFile(filepath.Join(dir, EntryPointScript)) + scriptUsesEngine := err == nil && strings.Contains(string(scriptContent), "$DATABRICKS_BUNDLE_ENGINE") + envMatrix = internal.SubsetEnvMatrix(envMatrix, dir, scriptUsesEngine) + } + + expanded := internal.ExpandEnvMatrix(envMatrix, config.EnvMatrixExclude, extraVars) for ind, envset := range expanded { envname := strings.Join(envset, "/") diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 23981e7093..7584a10017 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -1,6 +1,7 @@ package internal import ( + "hash/fnv" "os" "path/filepath" "reflect" @@ -427,6 +428,66 @@ func filterExcludedEnvSets(envSets [][]string, exclude map[string][]string) [][] return filtered } +// SubsetEnvMatrix reduces each EnvMatrix variable to a single value using consistent hashing. +// For DATABRICKS_BUNDLE_ENGINE: if the script references $DATABRICKS_BUNDLE_ENGINE, both variants +// are kept; otherwise "direct" is selected with 90% probability and the other value with 10%. +// For all other variables with multiple values, one value is selected based on hash(testDir + varName). +func SubsetEnvMatrix(matrix map[string][]string, testDir string, scriptUsesEngine bool) map[string][]string { + if len(matrix) == 0 { + return matrix + } + + result := make(map[string][]string, len(matrix)) + for key, values := range matrix { + if len(values) <= 1 { + result[key] = values + continue + } + + if key == "DATABRICKS_BUNDLE_ENGINE" { + if scriptUsesEngine { + // Script references the variable — keep all variants. + result[key] = values + } else { + // Select "direct" with 90% probability, otherwise the other value. + h := fnv.New64a() + h.Write([]byte(testDir)) + h.Write([]byte(key)) + pct := h.Sum64() % 100 + directIdx := slices.Index(values, "direct") + if directIdx >= 0 && pct < 90 { + result[key] = []string{"direct"} + } else if directIdx >= 0 { + // Pick the other value (non-direct). + for _, v := range values { + if v != "direct" { + result[key] = []string{v} + break + } + } + } else { + // No "direct" value — fall through to hash selection. + idx := consistentSelect(testDir, key, len(values)) + result[key] = []string{values[idx]} + } + } + } else { + idx := consistentSelect(testDir, key, len(values)) + result[key] = []string{values[idx]} + } + } + + return result +} + +// consistentSelect returns a deterministic index in [0, n) based on hash(testDir + varName). +func consistentSelect(testDir, varName string, n int) int { + h := fnv.New64a() + h.Write([]byte(testDir)) + h.Write([]byte(varName)) + return int(h.Sum64() % uint64(n)) +} + // matchesExclusionRule returns true if envSet contains all KEY=value pairs from excludeRule. func matchesExclusionRule(envSet, excludeRule []string) bool { for _, excludePair := range excludeRule { diff --git a/acceptance/internal/config_test.go b/acceptance/internal/config_test.go index d21c4bcb6c..1353aaf395 100644 --- a/acceptance/internal/config_test.go +++ b/acceptance/internal/config_test.go @@ -1,6 +1,7 @@ package internal import ( + "fmt" "os" "path/filepath" "testing" @@ -201,6 +202,91 @@ func TestExpandEnvMatrix(t *testing.T) { } } +func TestSubsetEnvMatrix_SingleValues(t *testing.T) { + // Single-value variables are kept as-is. + matrix := map[string][]string{ + "KEY1": {"A"}, + "KEY2": {"B"}, + } + result := SubsetEnvMatrix(matrix, "some/test", false) + assert.Equal(t, map[string][]string{"KEY1": {"A"}, "KEY2": {"B"}}, result) +} + +func TestSubsetEnvMatrix_EmptyMatrix(t *testing.T) { + result := SubsetEnvMatrix(nil, "test", false) + assert.Nil(t, result) +} + +func TestSubsetEnvMatrix_NonEngineMultipleValues(t *testing.T) { + // For non-engine variables with multiple values, exactly one is selected. + matrix := map[string][]string{ + "FOO": {"a", "b", "c"}, + } + result := SubsetEnvMatrix(matrix, "test/dir", false) + require.Len(t, result["FOO"], 1) + assert.Contains(t, []string{"a", "b", "c"}, result["FOO"][0]) +} + +func TestSubsetEnvMatrix_NonEngineDeterministic(t *testing.T) { + // Same inputs produce same output. + matrix := map[string][]string{ + "FOO": {"a", "b", "c"}, + } + r1 := SubsetEnvMatrix(matrix, "test/dir", false) + r2 := SubsetEnvMatrix(matrix, "test/dir", false) + assert.Equal(t, r1, r2) +} + +func TestSubsetEnvMatrix_NonEngineDifferentDirs(t *testing.T) { + // Different test dirs may select different values (not guaranteed but likely with enough dirs). + matrix := map[string][]string{ + "FOO": {"a", "b", "c", "d", "e"}, + } + seen := map[string]bool{} + for i := range 100 { + dir := fmt.Sprintf("dir%d", i) + r := SubsetEnvMatrix(matrix, dir, false) + seen[r["FOO"][0]] = true + } + assert.Greater(t, len(seen), 1, "expected different dirs to select different values") +} + +func TestSubsetEnvMatrix_EngineScriptUsesEngine(t *testing.T) { + // When script uses $DATABRICKS_BUNDLE_ENGINE, both variants are kept. + matrix := map[string][]string{ + "DATABRICKS_BUNDLE_ENGINE": {"terraform", "direct"}, + } + result := SubsetEnvMatrix(matrix, "test/dir", true) + assert.Equal(t, []string{"terraform", "direct"}, result["DATABRICKS_BUNDLE_ENGINE"]) +} + +func TestSubsetEnvMatrix_EngineScriptDoesNotUseEngine(t *testing.T) { + // When script doesn't use $DATABRICKS_BUNDLE_ENGINE, exactly one variant is selected. + matrix := map[string][]string{ + "DATABRICKS_BUNDLE_ENGINE": {"terraform", "direct"}, + } + result := SubsetEnvMatrix(matrix, "test/dir", false) + require.Len(t, result["DATABRICKS_BUNDLE_ENGINE"], 1) +} + +func TestSubsetEnvMatrix_EngineDirectBias(t *testing.T) { + // Across many test dirs, "direct" should be selected ~90% of the time. + matrix := map[string][]string{ + "DATABRICKS_BUNDLE_ENGINE": {"terraform", "direct"}, + } + directCount := 0 + total := 1000 + for i := range total { + dir := fmt.Sprintf("test/dir%d", i) + r := SubsetEnvMatrix(matrix, dir, false) + if r["DATABRICKS_BUNDLE_ENGINE"][0] == "direct" { + directCount++ + } + } + ratio := float64(directCount) / float64(total) + assert.InDelta(t, 0.9, ratio, 0.05, "expected ~90%% direct, got %.1f%%", ratio*100) +} + func TestLoadConfigPhaseIsNotInherited(t *testing.T) { tests := []struct { name string From fb4458e5a2321b69d9cfe7552f9d520c132a7a7f Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 19 Mar 2026 17:08:41 +0100 Subject: [PATCH 2/8] acceptance: subset by sampling from expanded+excluded combos with weights Replace per-variable SubsetEnvMatrix with SubsetExpanded which operates on the already-expanded and exclusion-filtered combo list. This ensures EnvMatrixExclude rules are respected before selection. DATABRICKS_BUNDLE_ENGINE=direct gets weight 10; all other variants weight 1, giving ~10/11 probability of selecting a direct variant. Co-Authored-By: Claude Sonnet 4.6 --- acceptance/acceptance_test.go | 8 +-- acceptance/internal/config.go | 67 ++++++------------------ acceptance/internal/config_test.go | 83 +++--------------------------- 3 files changed, 25 insertions(+), 133 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 24cc19406f..eb177d851f 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -371,15 +371,11 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { extraVars = append(extraVars, "CONFIG_Cloud=true") } - envMatrix := config.EnvMatrix + expanded := internal.ExpandEnvMatrix(config.EnvMatrix, config.EnvMatrixExclude, extraVars) if Subset { - scriptContent, err := os.ReadFile(filepath.Join(dir, EntryPointScript)) - scriptUsesEngine := err == nil && strings.Contains(string(scriptContent), "$DATABRICKS_BUNDLE_ENGINE") - envMatrix = internal.SubsetEnvMatrix(envMatrix, dir, scriptUsesEngine) + expanded = internal.SubsetExpanded(expanded, dir) } - expanded := internal.ExpandEnvMatrix(envMatrix, config.EnvMatrixExclude, extraVars) - for ind, envset := range expanded { envname := strings.Join(envset, "/") t.Run(envname, func(t *testing.T) { diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 7584a10017..4f22df4755 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -428,64 +428,27 @@ func filterExcludedEnvSets(envSets [][]string, exclude map[string][]string) [][] return filtered } -// SubsetEnvMatrix reduces each EnvMatrix variable to a single value using consistent hashing. -// For DATABRICKS_BUNDLE_ENGINE: if the script references $DATABRICKS_BUNDLE_ENGINE, both variants -// are kept; otherwise "direct" is selected with 90% probability and the other value with 10%. -// For all other variables with multiple values, one value is selected based on hash(testDir + varName). -func SubsetEnvMatrix(matrix map[string][]string, testDir string, scriptUsesEngine bool) map[string][]string { - if len(matrix) == 0 { - return matrix +// SubsetExpanded selects one variant from an already-expanded and exclusion-filtered list +// using weighted consistent hashing. DATABRICKS_BUNDLE_ENGINE=direct has weight 10; +// all other variants have weight 1. +func SubsetExpanded(expanded [][]string, testDir string) [][]string { + if len(expanded) <= 1 { + return expanded } - - result := make(map[string][]string, len(matrix)) - for key, values := range matrix { - if len(values) <= 1 { - result[key] = values - continue + // Build weighted list: direct variants appear 10 times, others once. + var weighted [][]string + for _, envset := range expanded { + weight := 1 + if slices.Contains(envset, "DATABRICKS_BUNDLE_ENGINE=direct") { + weight = 10 } - - if key == "DATABRICKS_BUNDLE_ENGINE" { - if scriptUsesEngine { - // Script references the variable — keep all variants. - result[key] = values - } else { - // Select "direct" with 90% probability, otherwise the other value. - h := fnv.New64a() - h.Write([]byte(testDir)) - h.Write([]byte(key)) - pct := h.Sum64() % 100 - directIdx := slices.Index(values, "direct") - if directIdx >= 0 && pct < 90 { - result[key] = []string{"direct"} - } else if directIdx >= 0 { - // Pick the other value (non-direct). - for _, v := range values { - if v != "direct" { - result[key] = []string{v} - break - } - } - } else { - // No "direct" value — fall through to hash selection. - idx := consistentSelect(testDir, key, len(values)) - result[key] = []string{values[idx]} - } - } - } else { - idx := consistentSelect(testDir, key, len(values)) - result[key] = []string{values[idx]} + for range weight { + weighted = append(weighted, envset) } } - - return result -} - -// consistentSelect returns a deterministic index in [0, n) based on hash(testDir + varName). -func consistentSelect(testDir, varName string, n int) int { h := fnv.New64a() h.Write([]byte(testDir)) - h.Write([]byte(varName)) - return int(h.Sum64() % uint64(n)) + return [][]string{weighted[h.Sum64()%uint64(len(weighted))]} } // matchesExclusionRule returns true if envSet contains all KEY=value pairs from excludeRule. diff --git a/acceptance/internal/config_test.go b/acceptance/internal/config_test.go index 1353aaf395..c4b41ce0fc 100644 --- a/acceptance/internal/config_test.go +++ b/acceptance/internal/config_test.go @@ -202,89 +202,22 @@ func TestExpandEnvMatrix(t *testing.T) { } } -func TestSubsetEnvMatrix_SingleValues(t *testing.T) { - // Single-value variables are kept as-is. - matrix := map[string][]string{ - "KEY1": {"A"}, - "KEY2": {"B"}, - } - result := SubsetEnvMatrix(matrix, "some/test", false) - assert.Equal(t, map[string][]string{"KEY1": {"A"}, "KEY2": {"B"}}, result) -} - -func TestSubsetEnvMatrix_EmptyMatrix(t *testing.T) { - result := SubsetEnvMatrix(nil, "test", false) - assert.Nil(t, result) -} - -func TestSubsetEnvMatrix_NonEngineMultipleValues(t *testing.T) { - // For non-engine variables with multiple values, exactly one is selected. - matrix := map[string][]string{ - "FOO": {"a", "b", "c"}, - } - result := SubsetEnvMatrix(matrix, "test/dir", false) - require.Len(t, result["FOO"], 1) - assert.Contains(t, []string{"a", "b", "c"}, result["FOO"][0]) -} - -func TestSubsetEnvMatrix_NonEngineDeterministic(t *testing.T) { - // Same inputs produce same output. - matrix := map[string][]string{ - "FOO": {"a", "b", "c"}, - } - r1 := SubsetEnvMatrix(matrix, "test/dir", false) - r2 := SubsetEnvMatrix(matrix, "test/dir", false) - assert.Equal(t, r1, r2) -} - -func TestSubsetEnvMatrix_NonEngineDifferentDirs(t *testing.T) { - // Different test dirs may select different values (not guaranteed but likely with enough dirs). - matrix := map[string][]string{ - "FOO": {"a", "b", "c", "d", "e"}, - } - seen := map[string]bool{} - for i := range 100 { - dir := fmt.Sprintf("dir%d", i) - r := SubsetEnvMatrix(matrix, dir, false) - seen[r["FOO"][0]] = true - } - assert.Greater(t, len(seen), 1, "expected different dirs to select different values") -} - -func TestSubsetEnvMatrix_EngineScriptUsesEngine(t *testing.T) { - // When script uses $DATABRICKS_BUNDLE_ENGINE, both variants are kept. - matrix := map[string][]string{ - "DATABRICKS_BUNDLE_ENGINE": {"terraform", "direct"}, - } - result := SubsetEnvMatrix(matrix, "test/dir", true) - assert.Equal(t, []string{"terraform", "direct"}, result["DATABRICKS_BUNDLE_ENGINE"]) -} - -func TestSubsetEnvMatrix_EngineScriptDoesNotUseEngine(t *testing.T) { - // When script doesn't use $DATABRICKS_BUNDLE_ENGINE, exactly one variant is selected. - matrix := map[string][]string{ - "DATABRICKS_BUNDLE_ENGINE": {"terraform", "direct"}, - } - result := SubsetEnvMatrix(matrix, "test/dir", false) - require.Len(t, result["DATABRICKS_BUNDLE_ENGINE"], 1) -} - -func TestSubsetEnvMatrix_EngineDirectBias(t *testing.T) { - // Across many test dirs, "direct" should be selected ~90% of the time. - matrix := map[string][]string{ - "DATABRICKS_BUNDLE_ENGINE": {"terraform", "direct"}, +func TestSubsetExpanded_DirectBias(t *testing.T) { + // Across many test dirs, DATABRICKS_BUNDLE_ENGINE=direct should be selected ~10/11 of the time. + expanded := [][]string{ + {"DATABRICKS_BUNDLE_ENGINE=terraform"}, + {"DATABRICKS_BUNDLE_ENGINE=direct"}, } directCount := 0 total := 1000 for i := range total { - dir := fmt.Sprintf("test/dir%d", i) - r := SubsetEnvMatrix(matrix, dir, false) - if r["DATABRICKS_BUNDLE_ENGINE"][0] == "direct" { + r := SubsetExpanded(expanded, fmt.Sprintf("test/dir%d", i)) + if r[0][0] == "DATABRICKS_BUNDLE_ENGINE=direct" { directCount++ } } ratio := float64(directCount) / float64(total) - assert.InDelta(t, 0.9, ratio, 0.05, "expected ~90%% direct, got %.1f%%", ratio*100) + assert.InDelta(t, float64(10)/11, ratio, 0.05, "expected ~10/11 direct, got %.1f%%", ratio*100) } func TestLoadConfigPhaseIsNotInherited(t *testing.T) { From 255d9c5d4fe866f48f3ed387f9e83a5a8b052c67 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 19 Mar 2026 17:20:03 +0100 Subject: [PATCH 3/8] acceptance: keep all engine variants when script uses \$DATABRICKS_BUNDLE_ENGINE When the script references \$DATABRICKS_BUNDLE_ENGINE (e.g. in output filenames), SubsetExpanded groups combos by engine value and picks one per group, ensuring both terraform and direct variants run. Otherwise it picks one combo total with direct weighted 10x over terraform. Co-Authored-By: Claude Sonnet 4.6 --- acceptance/acceptance_test.go | 4 ++- acceptance/internal/config.go | 47 ++++++++++++++++++++---------- acceptance/internal/config_test.go | 24 ++++++++++++++- 3 files changed, 58 insertions(+), 17 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index eb177d851f..829151d42c 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -373,7 +373,9 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { expanded := internal.ExpandEnvMatrix(config.EnvMatrix, config.EnvMatrixExclude, extraVars) if Subset { - expanded = internal.SubsetExpanded(expanded, dir) + scriptContent, _ := os.ReadFile(filepath.Join(dir, EntryPointScript)) + scriptUsesEngine := strings.Contains(string(scriptContent), "$DATABRICKS_BUNDLE_ENGINE") + expanded = internal.SubsetExpanded(expanded, dir, scriptUsesEngine) } for ind, envset := range expanded { diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 4f22df4755..3b38e19740 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -428,16 +428,40 @@ func filterExcludedEnvSets(envSets [][]string, exclude map[string][]string) [][] return filtered } -// SubsetExpanded selects one variant from an already-expanded and exclusion-filtered list -// using weighted consistent hashing. DATABRICKS_BUNDLE_ENGINE=direct has weight 10; -// all other variants have weight 1. -func SubsetExpanded(expanded [][]string, testDir string) [][]string { +// SubsetExpanded selects one variant per DATABRICKS_BUNDLE_ENGINE value (if scriptUsesEngine) +// or one variant total from an already-expanded and exclusion-filtered list. +// DATABRICKS_BUNDLE_ENGINE=direct has weight 10; all other variants have weight 1. +func SubsetExpanded(expanded [][]string, testDir string, scriptUsesEngine bool) [][]string { if len(expanded) <= 1 { return expanded } - // Build weighted list: direct variants appear 10 times, others once. + if scriptUsesEngine { + // Group by engine value and pick one combo per group. + groups := make(map[string][][]string) + for _, envset := range expanded { + engine := "" + for _, kv := range envset { + if v, ok := strings.CutPrefix(kv, "DATABRICKS_BUNDLE_ENGINE="); ok { + engine = v + break + } + } + groups[engine] = append(groups[engine], envset) + } + var result [][]string + for _, group := range groups { + result = append(result, weightedSelect(group, testDir)) + } + return result + } + return [][]string{weightedSelect(expanded, testDir)} +} + +// weightedSelect picks one envset using weighted consistent hashing. +// DATABRICKS_BUNDLE_ENGINE=direct has weight 10; all other envsets have weight 1. +func weightedSelect(envsets [][]string, testDir string) []string { var weighted [][]string - for _, envset := range expanded { + for _, envset := range envsets { weight := 1 if slices.Contains(envset, "DATABRICKS_BUNDLE_ENGINE=direct") { weight = 10 @@ -448,20 +472,13 @@ func SubsetExpanded(expanded [][]string, testDir string) [][]string { } h := fnv.New64a() h.Write([]byte(testDir)) - return [][]string{weighted[h.Sum64()%uint64(len(weighted))]} + return weighted[h.Sum64()%uint64(len(weighted))] } // matchesExclusionRule returns true if envSet contains all KEY=value pairs from excludeRule. func matchesExclusionRule(envSet, excludeRule []string) bool { for _, excludePair := range excludeRule { - found := false - for _, envPair := range envSet { - if envPair == excludePair { - found = true - break - } - } - if !found { + if !slices.Contains(envSet, excludePair) { return false } } diff --git a/acceptance/internal/config_test.go b/acceptance/internal/config_test.go index c4b41ce0fc..a81345ed57 100644 --- a/acceptance/internal/config_test.go +++ b/acceptance/internal/config_test.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -211,7 +212,7 @@ func TestSubsetExpanded_DirectBias(t *testing.T) { directCount := 0 total := 1000 for i := range total { - r := SubsetExpanded(expanded, fmt.Sprintf("test/dir%d", i)) + r := SubsetExpanded(expanded, fmt.Sprintf("test/dir%d", i), false) if r[0][0] == "DATABRICKS_BUNDLE_ENGINE=direct" { directCount++ } @@ -220,6 +221,27 @@ func TestSubsetExpanded_DirectBias(t *testing.T) { assert.InDelta(t, float64(10)/11, ratio, 0.05, "expected ~10/11 direct, got %.1f%%", ratio*100) } +func TestSubsetExpanded_ScriptUsesEngine(t *testing.T) { + // When script uses $DATABRICKS_BUNDLE_ENGINE, one combo per engine value is returned. + expanded := [][]string{ + {"DATABRICKS_BUNDLE_ENGINE=terraform", "READPLAN="}, + {"DATABRICKS_BUNDLE_ENGINE=direct", "READPLAN="}, + {"DATABRICKS_BUNDLE_ENGINE=direct", "READPLAN=1"}, + } + result := SubsetExpanded(expanded, "test/dir", true) + require.Len(t, result, 2) + engines := make(map[string]bool) + for _, envset := range result { + for _, kv := range envset { + if strings.HasPrefix(kv, "DATABRICKS_BUNDLE_ENGINE=") { + engines[kv] = true + } + } + } + assert.True(t, engines["DATABRICKS_BUNDLE_ENGINE=terraform"]) + assert.True(t, engines["DATABRICKS_BUNDLE_ENGINE=direct"]) +} + func TestLoadConfigPhaseIsNotInherited(t *testing.T) { tests := []struct { name string From 8c9fba203ddae0fddbfcbad27c45f2af3400e939 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 19 Mar 2026 17:50:29 +0100 Subject: [PATCH 4/8] acceptance: also check parent script.prepare files for \$DATABRICKS_BUNDLE_ENGINE When determining whether to keep all engine variants, walk parent directories checking script.prepare files in addition to the test's own script. Results are cached via sync.Map since parent prepare scripts are shared across many tests. Co-Authored-By: Claude Sonnet 4.6 --- acceptance/acceptance_test.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 829151d42c..8c1656df14 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -158,6 +158,27 @@ func setReplsForTestEnvVars(t *testing.T, repls *testdiff.ReplacementsContext) { } } +// prepareScriptUsesEngineCache caches whether a script.prepare in a given directory +// (or any of its ancestors) references $DATABRICKS_BUNDLE_ENGINE. +// Since parent script.prepare files are shared across many tests, caching avoids redundant reads. +var prepareScriptUsesEngineCache sync.Map + +// anyPrepareScriptUsesEngine returns true if any script.prepare in dir or its ancestors +// contains $DATABRICKS_BUNDLE_ENGINE. +func anyPrepareScriptUsesEngine(dir string) bool { + if dir == "" || dir == "." { + return false + } + if v, ok := prepareScriptUsesEngineCache.Load(dir); ok { + return v.(bool) + } + content, err := os.ReadFile(filepath.Join(dir, PrepareScript)) + result := (err == nil && strings.Contains(string(content), "$DATABRICKS_BUNDLE_ENGINE")) || + anyPrepareScriptUsesEngine(filepath.Dir(dir)) + prepareScriptUsesEngineCache.Store(dir, result) + return result +} + func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { if testdiff.OverwriteMode { Subset = true @@ -374,7 +395,8 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { expanded := internal.ExpandEnvMatrix(config.EnvMatrix, config.EnvMatrixExclude, extraVars) if Subset { scriptContent, _ := os.ReadFile(filepath.Join(dir, EntryPointScript)) - scriptUsesEngine := strings.Contains(string(scriptContent), "$DATABRICKS_BUNDLE_ENGINE") + scriptUsesEngine := strings.Contains(string(scriptContent), "$DATABRICKS_BUNDLE_ENGINE") || + anyPrepareScriptUsesEngine(dir) expanded = internal.SubsetExpanded(expanded, dir, scriptUsesEngine) } From f65f3d65759834dac1b12208545c07eb5d61a536 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 19 Mar 2026 17:56:32 +0100 Subject: [PATCH 5/8] acceptance: check _script helpers in parent dirs for \$DATABRICKS_BUNDLE_ENGINE _script files are bash helpers sourced by test scripts via 'source \$TESTDIR/../_script'. Check these (not script.prepare) when determining whether to keep all engine variants during subset selection. Co-Authored-By: Claude Sonnet 4.6 --- acceptance/acceptance_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 8c1656df14..a07a9af8d0 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -158,24 +158,24 @@ func setReplsForTestEnvVars(t *testing.T, repls *testdiff.ReplacementsContext) { } } -// prepareScriptUsesEngineCache caches whether a script.prepare in a given directory +// helperScriptUsesEngineCache caches whether a _script helper in a given directory // (or any of its ancestors) references $DATABRICKS_BUNDLE_ENGINE. -// Since parent script.prepare files are shared across many tests, caching avoids redundant reads. -var prepareScriptUsesEngineCache sync.Map +// Since _script helpers are shared across many tests, caching avoids redundant reads. +var helperScriptUsesEngineCache sync.Map -// anyPrepareScriptUsesEngine returns true if any script.prepare in dir or its ancestors +// anyHelperScriptUsesEngine returns true if any _script helper in dir or its ancestors // contains $DATABRICKS_BUNDLE_ENGINE. -func anyPrepareScriptUsesEngine(dir string) bool { +func anyHelperScriptUsesEngine(dir string) bool { if dir == "" || dir == "." { return false } - if v, ok := prepareScriptUsesEngineCache.Load(dir); ok { + if v, ok := helperScriptUsesEngineCache.Load(dir); ok { return v.(bool) } - content, err := os.ReadFile(filepath.Join(dir, PrepareScript)) + content, err := os.ReadFile(filepath.Join(dir, "_script")) result := (err == nil && strings.Contains(string(content), "$DATABRICKS_BUNDLE_ENGINE")) || - anyPrepareScriptUsesEngine(filepath.Dir(dir)) - prepareScriptUsesEngineCache.Store(dir, result) + anyHelperScriptUsesEngine(filepath.Dir(dir)) + helperScriptUsesEngineCache.Store(dir, result) return result } @@ -396,7 +396,7 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { if Subset { scriptContent, _ := os.ReadFile(filepath.Join(dir, EntryPointScript)) scriptUsesEngine := strings.Contains(string(scriptContent), "$DATABRICKS_BUNDLE_ENGINE") || - anyPrepareScriptUsesEngine(dir) + anyHelperScriptUsesEngine(dir) expanded = internal.SubsetExpanded(expanded, dir, scriptUsesEngine) } From 7dbc97db43206df36ca7c0f4d51f53b6a7d57dad Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 20 Mar 2026 13:41:44 +0100 Subject: [PATCH 6/8] update comment --- acceptance/acceptance_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index a07a9af8d0..060cf1cae5 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -79,7 +79,7 @@ func init() { // to simulate an identical environment. flag.BoolVar(&WorkspaceTmpDir, "workspace-tmp-dir", false, "Run tests on the workspace file system (For DBR testing).") flag.BoolVar(&OnlyOutTestToml, "only-out-test-toml", false, "Only regenerate out.test.toml files without running tests") - flag.BoolVar(&Subset, "subset", false, "Select a subset of EnvMatrix variants using consistent hashing") + flag.BoolVar(&Subset, "subset", false, "Select a subset of EnvMatrix variants that cover all output files. Auto-enabled on -update.") } const ( From 22304e1d686934ff669bf8bbc23006570f8bb646 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 23 Mar 2026 10:04:42 +0100 Subject: [PATCH 7/8] add selftest --- acceptance/selftest/subset_ancestor_engine/_script | 1 + .../selftest/subset_ancestor_engine/child/out.direct.txt | 1 + .../selftest/subset_ancestor_engine/child/out.terraform.txt | 1 + .../selftest/subset_ancestor_engine/child/out.test.toml | 5 +++++ acceptance/selftest/subset_ancestor_engine/child/output.txt | 0 acceptance/selftest/subset_ancestor_engine/child/script | 4 ++++ acceptance/selftest/subset_ancestor_engine/child/test.toml | 1 + 7 files changed, 13 insertions(+) create mode 100644 acceptance/selftest/subset_ancestor_engine/_script create mode 100644 acceptance/selftest/subset_ancestor_engine/child/out.direct.txt create mode 100644 acceptance/selftest/subset_ancestor_engine/child/out.terraform.txt create mode 100644 acceptance/selftest/subset_ancestor_engine/child/out.test.toml create mode 100644 acceptance/selftest/subset_ancestor_engine/child/output.txt create mode 100644 acceptance/selftest/subset_ancestor_engine/child/script create mode 100644 acceptance/selftest/subset_ancestor_engine/child/test.toml diff --git a/acceptance/selftest/subset_ancestor_engine/_script b/acceptance/selftest/subset_ancestor_engine/_script new file mode 100644 index 0000000000..0836732d6e --- /dev/null +++ b/acceptance/selftest/subset_ancestor_engine/_script @@ -0,0 +1 @@ +echo "engine=$DATABRICKS_BUNDLE_ENGINE" > "out.$DATABRICKS_BUNDLE_ENGINE.txt" diff --git a/acceptance/selftest/subset_ancestor_engine/child/out.direct.txt b/acceptance/selftest/subset_ancestor_engine/child/out.direct.txt new file mode 100644 index 0000000000..163862caec --- /dev/null +++ b/acceptance/selftest/subset_ancestor_engine/child/out.direct.txt @@ -0,0 +1 @@ +engine=direct diff --git a/acceptance/selftest/subset_ancestor_engine/child/out.terraform.txt b/acceptance/selftest/subset_ancestor_engine/child/out.terraform.txt new file mode 100644 index 0000000000..2618d93f70 --- /dev/null +++ b/acceptance/selftest/subset_ancestor_engine/child/out.terraform.txt @@ -0,0 +1 @@ +engine=terraform diff --git a/acceptance/selftest/subset_ancestor_engine/child/out.test.toml b/acceptance/selftest/subset_ancestor_engine/child/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/subset_ancestor_engine/child/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/subset_ancestor_engine/child/output.txt b/acceptance/selftest/subset_ancestor_engine/child/output.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/selftest/subset_ancestor_engine/child/script b/acceptance/selftest/subset_ancestor_engine/child/script new file mode 100644 index 0000000000..a3c2588f2d --- /dev/null +++ b/acceptance/selftest/subset_ancestor_engine/child/script @@ -0,0 +1,4 @@ +# This script does not directly reference $DATABRICKS_BUNDLE_ENGINE. +# Engine detection happens via the ancestor _script helper, +# exercising the anyHelperScriptUsesEngine() path in subset selection. +source "$TESTDIR/../_script" diff --git a/acceptance/selftest/subset_ancestor_engine/child/test.toml b/acceptance/selftest/subset_ancestor_engine/child/test.toml new file mode 100644 index 0000000000..9b64b4605a --- /dev/null +++ b/acceptance/selftest/subset_ancestor_engine/child/test.toml @@ -0,0 +1 @@ +EnvVaryOutput = "DATABRICKS_BUNDLE_ENGINE" From 569a2a66a38a64d009f51f128691fd7740cbb73c Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 23 Mar 2026 10:13:32 +0100 Subject: [PATCH 8/8] acceptance: refactor SubsetExpanded to use key->index map instead of map->slice conversion Replaces map[string][][]string (engine -> group) with a parallel pair of slices (result, groups) indexed by first-seen insertion order, using a keyToIdx map[string]int as the side structure for deduplication. This eliminates the non-deterministic map iteration that determined result order. Co-authored-by: Isaac --- acceptance/internal/config.go | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/acceptance/internal/config.go b/acceptance/internal/config.go index 3b38e19740..e14bddae68 100644 --- a/acceptance/internal/config.go +++ b/acceptance/internal/config.go @@ -436,8 +436,11 @@ func SubsetExpanded(expanded [][]string, testDir string, scriptUsesEngine bool) return expanded } if scriptUsesEngine { - // Group by engine value and pick one combo per group. - groups := make(map[string][][]string) + // Collect candidates per engine key, preserving first-seen order. + // keyToIdx maps engine value -> index in result/groups slices. + var result [][]string + var groups [][][]string + keyToIdx := make(map[string]int) for _, envset := range expanded { engine := "" for _, kv := range envset { @@ -446,11 +449,17 @@ func SubsetExpanded(expanded [][]string, testDir string, scriptUsesEngine bool) break } } - groups[engine] = append(groups[engine], envset) + idx, ok := keyToIdx[engine] + if !ok { + idx = len(result) + keyToIdx[engine] = idx + result = append(result, nil) + groups = append(groups, nil) + } + groups[idx] = append(groups[idx], envset) } - var result [][]string - for _, group := range groups { - result = append(result, weightedSelect(group, testDir)) + for i, group := range groups { + result[i] = weightedSelect(group, testDir) } return result }