From bff7f62e318477fbc05152664d6d2b7c73306094 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 16 Mar 2026 00:23:56 +0100 Subject: [PATCH 1/8] Replace regex-based interpolation with character scanner - Move interpolation parser to libs/interpolation/ (independent of dyn) - Use \$ -> $ and \\ -> \ escape sequences (Bash convention) - Reject nested ${} constructs as errors - Replace strings.Replace interpolation with token-based concatenation - Add WarnMalformedReferences mutator for early parse error warnings - Add NewRefWithDiagnostics for surfacing parse errors as diagnostics Co-authored-by: Isaac --- .../bad_syntax/out.deploy.direct.txt | 3 + .../bad_syntax/out.plan.direct.txt | 3 + .../bad_syntax/out.plan.terraform.txt | 3 + .../resource_deps/bad_syntax/output.txt | 3 + .../malformed_reference/databricks.yml | 6 + .../malformed_reference/out.test.toml | 5 + .../variables/malformed_reference/output.txt | 12 + .../variables/malformed_reference/script | 1 + .../mutator/warn_malformed_references.go | 47 +++ bundle/phases/initialize.go | 4 + design/interpolation-parser.md | 72 +++++ libs/dyn/dynvar/ref.go | 125 +++++--- libs/dyn/dynvar/ref_test.go | 18 ++ libs/dyn/dynvar/resolve.go | 50 ++-- libs/dyn/dynvar/resolve_test.go | 13 + libs/interpolation/parse.go | 213 ++++++++++++++ libs/interpolation/parse_test.go | 267 ++++++++++++++++++ 17 files changed, 790 insertions(+), 55 deletions(-) create mode 100644 acceptance/bundle/variables/malformed_reference/databricks.yml create mode 100644 acceptance/bundle/variables/malformed_reference/out.test.toml create mode 100644 acceptance/bundle/variables/malformed_reference/output.txt create mode 100644 acceptance/bundle/variables/malformed_reference/script create mode 100644 bundle/config/mutator/warn_malformed_references.go create mode 100644 design/interpolation-parser.md create mode 100644 libs/interpolation/parse.go create mode 100644 libs/interpolation/parse_test.go diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt b/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt index 6bed0296b3..a985c8415e 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt @@ -1,3 +1,6 @@ +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... Deploying resources... Updating deployment state... diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt b/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt index 1a40fdbaa3..a53d32c081 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt @@ -1,3 +1,6 @@ +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + create volumes.bar create volumes.foo diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt b/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt index b8f8078aba..ed957b8518 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt @@ -1,3 +1,6 @@ +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + Error: exit status 1 Error: Invalid attribute name diff --git a/acceptance/bundle/resource_deps/bad_syntax/output.txt b/acceptance/bundle/resource_deps/bad_syntax/output.txt index 0e9d83b643..968ebf652a 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/output.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/output.txt @@ -1,5 +1,8 @@ >>> [CLI] bundle validate -o json +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + { "volumes": { "bar": { diff --git a/acceptance/bundle/variables/malformed_reference/databricks.yml b/acceptance/bundle/variables/malformed_reference/databricks.yml new file mode 100644 index 0000000000..63a0282395 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: "${foo.bar-}" + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/malformed_reference/out.test.toml b/acceptance/bundle/variables/malformed_reference/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/malformed_reference/output.txt b/acceptance/bundle/variables/malformed_reference/output.txt new file mode 100644 index 0000000000..fba4d19be6 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/output.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle validate +Warning: invalid variable reference ${foo.bar-}: invalid key "bar-" + in databricks.yml:2:9 + +Name: ${foo.bar-} +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/${foo.bar-}/default + +Found 1 warning diff --git a/acceptance/bundle/variables/malformed_reference/script b/acceptance/bundle/variables/malformed_reference/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/bundle/config/mutator/warn_malformed_references.go b/bundle/config/mutator/warn_malformed_references.go new file mode 100644 index 0000000000..cf6934c7b7 --- /dev/null +++ b/bundle/config/mutator/warn_malformed_references.go @@ -0,0 +1,47 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" +) + +type warnMalformedReferences struct{} + +// WarnMalformedReferences returns a mutator that emits warnings for strings +// containing malformed variable references (e.g. "${foo.bar-}"). +func WarnMalformedReferences() bundle.Mutator { + return &warnMalformedReferences{} +} + +func (*warnMalformedReferences) Name() string { + return "WarnMalformedReferences" +} + +func (*warnMalformedReferences) Validate(ctx context.Context, b *bundle.Bundle) error { + return nil +} + +func (*warnMalformedReferences) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { + _, err := dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + // Only check values with source locations to avoid false positives + // from synthesized/computed values. + if len(v.Locations()) == 0 { + return v, nil + } + _, _, refDiags := dynvar.NewRefWithDiagnostics(v) + diags = diags.Extend(refDiags) + return v, nil + }) + return root, err + }) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + return diags +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 761714b48e..02da7a8f15 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -94,6 +94,10 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // searches for strings with variable references in them. mutator.RewriteWorkspacePrefix(), + // Walks the config tree and emits warnings for malformed variable references + // (e.g. "${foo.bar-}") before variable resolution occurs. + mutator.WarnMalformedReferences(), + // Reads (dynamic): variables.* (checks if there's a value assigned to variable already or if it has lookup reference) // Updates (dynamic): variables.*.value (sets values from environment variables, variable files, or defaults) // Resolves and sets values for bundle variables in the following order: from environment variables, from variable files and then defaults diff --git a/design/interpolation-parser.md b/design/interpolation-parser.md new file mode 100644 index 0000000000..991a006278 --- /dev/null +++ b/design/interpolation-parser.md @@ -0,0 +1,72 @@ +# Variable Interpolation: Character Scanner Parser + +Author: Shreyas Goenka +Date: 12 March 2026 + +## Motivation + +DABs variable interpolation (`${...}`) was regex-based. This caused: + +1. **Silent failures** — `${foo.bar-}` silently treated as literal text with no warning. +2. **No suggestions** — `${bundle.nme}` produces "reference does not exist" with no hint. +3. **No escape mechanism** — no way to produce a literal `${` in output. +4. **No extensibility** — cannot support structured path features like key-value references `tasks[task_key="x"]` that exist in `libs/structs/structpath`. + +## Background: How Other Systems Parse `${...}` + +| System | Strategy | Escape | Error Quality | +|--------|----------|--------|---------------| +| Go `text/template` | State-function lexer | None | Line + template name | +| HCL2 (Terraform) | Ragel FSM + recursive descent | `$${` → literal `${` | Source range + suggestions | +| Python f-strings | Mode-stack tokenizer | `{{` → `{` | Line/column | +| Rust `format!` | Iterator-based descent | `{{`/`}}` | Spans + suggestions | +| Bash | Char-by-char + depth tracking | `\$` | Line number | + +For a syntax as simple as `${path.to.var[0]}` (no nesting, no functions, no +operators), a full recursive descent parser is overkill. A **two-mode character +scanner** — the same core pattern used by Go's `text/template` and HCL — gives +proper error reporting and escape support without the complexity. + +## Design Decisions + +### Two-mode character scanner + +A two-mode scanner (TEXT / REFERENCE) that produces a flat list of tokens. +No AST, no recursive descent. Easy to port to the Python implementation. + +See `libs/interpolation/parse.go`. + +### Nested `${` rejection + +Nested `${...}` inside a reference (e.g., `${var.foo_${var.tail}}`) is +rejected as an error. This construct is ambiguous and was never intentionally +supported — the old regex happened to match only the innermost pair by +coincidence. + +### `\$` escape sequence + +`\$` produces a literal `$`, and `\\` produces a literal `\`. This follows +the same convention used by Bash for escaping `$` and is the least +surprising option for users working in shell environments. + +A standalone `\` before any character other than `$` or `\` is passed +through as a literal backslash, so existing configurations that happen to +contain backslashes are not affected. + +### Malformed reference warnings + +A standalone `WarnMalformedReferences` mutator walks the config tree once +before variable resolution. It only checks values with source locations +(`len(v.Locations()) > 0`) to avoid false positives from synthesized values +(e.g., normalized/computed paths). + +### Token-based resolution + +The resolver's string interpolation changed from `strings.Replace` (with +count=1 to avoid double-replacing duplicate refs) to a token concatenation +loop. Each `TokenRef` maps 1:1 to a resolved value, eliminating the ambiguity. + +## Python sync + +The Python regex in `python/databricks/bundles/core/_transform.py` needs a +corresponding update in a follow-up PR. diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go index a048c80cb8..4025521096 100644 --- a/libs/dyn/dynvar/ref.go +++ b/libs/dyn/dynvar/ref.go @@ -1,23 +1,13 @@ package dynvar import ( - "fmt" - "regexp" - + "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" -) - -var ( - // !!! Should be in sync with _variable_regex in Python code. - // !!! - // !!! See python/databricks/bundles/core/_transform.py - baseVarDef = `[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*` - re = regexp.MustCompile(fmt.Sprintf(`\$\{(%s(\.%s(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`, baseVarDef, baseVarDef)) + "github.com/databricks/cli/libs/interpolation" ) // Ref represents a variable reference. // It is a string [dyn.Value] contained in a larger [dyn.Value]. -// Its path within the containing [dyn.Value] is also stored. type Ref struct { // Original value. Value dyn.Value @@ -25,13 +15,13 @@ type Ref struct { // String value in the original [dyn.Value]. Str string - // Matches of the variable reference in the string. - Matches [][]string + // Parsed tokens from the interpolation parser. + Tokens []interpolation.Token } // NewRef returns a new Ref if the given [dyn.Value] contains a string // with one or more variable references. It returns false if the given -// [dyn.Value] does not contain variable references. +// [dyn.Value] does not contain variable references or if parsing fails. // // Examples of a valid variable references: // - "${a.b}" @@ -39,53 +29,118 @@ type Ref struct { // - "${a.b[0].c}" // - "${a} ${b} ${c}" func NewRef(v dyn.Value) (Ref, bool) { + ref, ok, _ := newRef(v) + return ref, ok +} + +// NewRefWithDiagnostics returns a new Ref along with any diagnostics. +// Parse errors for malformed references (e.g. "${foo.bar-}") are returned +// as warnings. The second return value is false when no valid references +// are found (either no references at all, or a parse error occurred). +func NewRefWithDiagnostics(v dyn.Value) (Ref, bool, diag.Diagnostics) { + return newRef(v) +} + +func newRef(v dyn.Value) (Ref, bool, diag.Diagnostics) { s, ok := v.AsString() if !ok { - return Ref{}, false + return Ref{}, false, nil + } + + tokens, err := interpolation.Parse(s) + if err != nil { + // Return parse error as a warning diagnostic. + return Ref{}, false, diag.Diagnostics{{ + Severity: diag.Warning, + Summary: err.Error(), + Locations: v.Locations(), + }} + } + + // Check if any token is a variable reference or if escape sequences + // were processed (the reconstructed string differs from the original). + hasRef := false + for _, t := range tokens { + if t.Kind == interpolation.TokenRef { + hasRef = true + break + } } - // Check if the string contains any variable references. - m := re.FindAllStringSubmatch(s, -1) - if len(m) == 0 { - return Ref{}, false + if !hasRef { + // Even without references, if escape sequences were processed we need + // to return a Ref so the resolver can reconstruct the literal string. + if !hasEscapes(s, tokens) { + return Ref{}, false, nil + } } return Ref{ - Value: v, - Str: s, - Matches: m, - }, true + Value: v, + Str: s, + Tokens: tokens, + }, true, nil } // IsPure returns true if the variable reference contains a single // variable reference and nothing more. We need this so we can // interpolate values of non-string types (i.e. it can be substituted). func (v Ref) IsPure() bool { - // Need single match, equal to the incoming string. - if len(v.Matches) == 0 || len(v.Matches[0]) == 0 { - panic("invalid variable reference; expect at least one match") - } - return v.Matches[0][0] == v.Str + return len(v.Tokens) == 1 && v.Tokens[0].Kind == interpolation.TokenRef } +// References returns the path strings of all variable references. func (v Ref) References() []string { var out []string - for _, m := range v.Matches { - out = append(out, m[1]) + for _, t := range v.Tokens { + if t.Kind == interpolation.TokenRef { + out = append(out, t.Value) + } } return out } +// hasEscapes checks whether escape sequences were processed during parsing. +// Escape processing shortens the output (e.g., `\$` becomes `$`), so if the +// sum of token value lengths is less than the original string length, escapes +// were present. +func hasEscapes(original string, tokens []interpolation.Token) bool { + n := 0 + for _, t := range tokens { + n += len(t.Value) + } + return n < len(original) +} + +// IsPureVariableReference returns true if s is a single variable reference +// with no surrounding text. func IsPureVariableReference(s string) bool { - return len(s) > 0 && re.FindString(s) == s + if len(s) == 0 { + return false + } + tokens, err := interpolation.Parse(s) + if err != nil { + return false + } + return len(tokens) == 1 && tokens[0].Kind == interpolation.TokenRef } +// ContainsVariableReference returns true if s contains at least one variable reference. func ContainsVariableReference(s string) bool { - return re.MatchString(s) + tokens, err := interpolation.Parse(s) + if err != nil { + return false + } + for _, t := range tokens { + if t.Kind == interpolation.TokenRef { + return true + } + } + return false } -// If s is a pure variable reference, this function returns the corresponding -// dyn.Path. Otherwise, it returns false. +// PureReferenceToPath returns the corresponding [dyn.Path] if s is a pure +// variable reference. Otherwise, it returns false. func PureReferenceToPath(s string) (dyn.Path, bool) { ref, ok := NewRef(dyn.V(s)) if !ok { diff --git a/libs/dyn/dynvar/ref_test.go b/libs/dyn/dynvar/ref_test.go index d59d80cba1..3c98e1e474 100644 --- a/libs/dyn/dynvar/ref_test.go +++ b/libs/dyn/dynvar/ref_test.go @@ -3,6 +3,7 @@ package dynvar import ( "testing" + "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -47,6 +48,23 @@ func TestNewRefInvalidPattern(t *testing.T) { } } +func TestNewRefWithDiagnosticsMalformed(t *testing.T) { + v := dyn.NewValue("${foo.bar-}", []dyn.Location{{File: "test.yml", Line: 1, Column: 1}}) + _, ok, diags := NewRefWithDiagnostics(v) + assert.False(t, ok) + require.Len(t, diags, 1) + assert.Equal(t, diag.Warning, diags[0].Severity) + assert.Contains(t, diags[0].Summary, "invalid") +} + +func TestNewRefWithDiagnosticsValid(t *testing.T) { + v := dyn.V("${foo.bar}") + ref, ok, diags := NewRefWithDiagnostics(v) + assert.True(t, ok) + assert.Empty(t, diags) + assert.Equal(t, []string{"foo.bar"}, ref.References()) +} + func TestIsPureVariableReference(t *testing.T) { assert.False(t, IsPureVariableReference("")) assert.False(t, IsPureVariableReference("${foo.bar} suffix")) diff --git a/libs/dyn/dynvar/resolve.go b/libs/dyn/dynvar/resolve.go index b1366d93bb..f811f25394 100644 --- a/libs/dyn/dynvar/resolve.go +++ b/libs/dyn/dynvar/resolve.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/interpolation" "github.com/databricks/cli/libs/utils" ) @@ -156,30 +157,39 @@ func (r *resolver) resolveRef(ref Ref, seen []string) (dyn.Value, error) { return dyn.NewValue(resolved[0].Value(), ref.Value.Locations()), nil } - // Not pure; perform string interpolation. - for j := range ref.Matches { - // The value is invalid if resolution returned [ErrSkipResolution]. - // We must skip those and leave the original variable reference in place. - if !resolved[j].IsValid() { - continue - } - - // Try to turn the resolved value into a string. - s, ok := resolved[j].AsString() - if !ok { - // Only allow primitive types to be converted to string. - switch resolved[j].Kind() { - case dyn.KindString, dyn.KindBool, dyn.KindInt, dyn.KindFloat, dyn.KindTime, dyn.KindNil: - s = fmt.Sprint(resolved[j].AsAny()) - default: - return dyn.InvalidValue, fmt.Errorf("cannot interpolate non-primitive value of type %s into string", resolved[j].Kind()) + // Not pure; perform token-based string interpolation. + var buf strings.Builder + refIdx := 0 + for _, tok := range ref.Tokens { + switch tok.Kind { + case interpolation.TokenLiteral: + buf.WriteString(tok.Value) + case interpolation.TokenRef: + // The value is invalid if resolution returned [ErrSkipResolution]. + // We must skip those and leave the original variable reference in place. + if !resolved[refIdx].IsValid() { + buf.WriteString("${") + buf.WriteString(tok.Value) + buf.WriteByte('}') + } else { + // Try to turn the resolved value into a string. + s, ok := resolved[refIdx].AsString() + if !ok { + // Only allow primitive types to be converted to string. + switch resolved[refIdx].Kind() { + case dyn.KindString, dyn.KindBool, dyn.KindInt, dyn.KindFloat, dyn.KindTime, dyn.KindNil: + s = fmt.Sprint(resolved[refIdx].AsAny()) + default: + return dyn.InvalidValue, fmt.Errorf("cannot interpolate non-primitive value of type %s into string", resolved[refIdx].Kind()) + } + } + buf.WriteString(s) } + refIdx++ } - - ref.Str = strings.Replace(ref.Str, ref.Matches[j][0], s, 1) } - return dyn.NewValue(ref.Str, ref.Value.Locations()), nil + return dyn.NewValue(buf.String(), ref.Value.Locations()), nil } func (r *resolver) resolveKey(key string, seen []string) (dyn.Value, error) { diff --git a/libs/dyn/dynvar/resolve_test.go b/libs/dyn/dynvar/resolve_test.go index 5b64029bae..598c78852b 100644 --- a/libs/dyn/dynvar/resolve_test.go +++ b/libs/dyn/dynvar/resolve_test.go @@ -393,3 +393,16 @@ func TestResolveSequenceVariable(t *testing.T) { assert.Equal(t, "value1", seq[0].MustString()) assert.Equal(t, "value2", seq[1].MustString()) } + +func TestResolveEscapedRef(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V(`\${a}`), + }) + + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + // Escaped reference should produce literal ${a}. + assert.Equal(t, "${a}", getByPath(t, out, "b").MustString()) +} diff --git a/libs/interpolation/parse.go b/libs/interpolation/parse.go new file mode 100644 index 0000000000..48518c8c32 --- /dev/null +++ b/libs/interpolation/parse.go @@ -0,0 +1,213 @@ +package interpolation + +import ( + "errors" + "fmt" + "regexp" + "strings" +) + +// TokenKind represents the type of a parsed token. +type TokenKind int + +const ( + TokenLiteral TokenKind = iota // Literal text (no interpolation) + TokenRef // Variable reference: content between ${ and } +) + +// Token represents a parsed segment of an interpolation string. +type Token struct { + Kind TokenKind + Value string // For TokenLiteral: the literal text; For TokenRef: the path string (without ${}) + Start int // Start position in original string + End int // End position in original string (exclusive) +} + +const ( + dollarChar = '$' + openBrace = '{' + closeBrace = '}' + backslashChar = '\\' +) + +// keyPattern validates a single key segment in a variable path. +// Matches: [a-zA-Z]+([-_]*[a-zA-Z0-9]+)* +// Examples: "foo", "my-job", "a_b_c", "abc123" +var keyPattern = regexp.MustCompile(`^[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*$`) + +// indexPattern matches one or more [N] index suffixes. +var indexPattern = regexp.MustCompile(`^(\[[0-9]+\])+$`) + +// Parse parses a string that may contain ${...} variable references. +// It returns a slice of tokens representing literal text and variable references. +// +// Escape sequences: +// - "\$" produces a literal "$" +// - "\\" produces a literal "\" +// +// Examples: +// - "hello" -> [Literal("hello")] +// - "${a.b}" -> [Ref("a.b")] +// - "pre ${a.b} post" -> [Literal("pre "), Ref("a.b"), Literal(" post")] +// - "\${a.b}" -> [Literal("${a.b}")] +func Parse(s string) ([]Token, error) { + if len(s) == 0 { + return nil, nil + } + + var tokens []Token + i := 0 + var buf strings.Builder + litStart := 0 // tracks the start position in the original string for the current literal + + flushLiteral := func(end int) { + if buf.Len() > 0 { + tokens = append(tokens, Token{ + Kind: TokenLiteral, + Value: buf.String(), + Start: litStart, + End: end, + }) + buf.Reset() + } + } + + for i < len(s) { + switch s[i] { + case backslashChar: + // Handle escape sequences: \$ -> $, \\ -> \ + if buf.Len() == 0 { + litStart = i + } + if i+1 < len(s) { + switch s[i+1] { + case dollarChar: + buf.WriteByte(dollarChar) + i += 2 + case backslashChar: + buf.WriteByte(backslashChar) + i += 2 + default: + // Not a recognized escape; treat backslash as literal. + buf.WriteByte(backslashChar) + i++ + } + } else { + // Trailing backslash at end of string: treat as literal. + buf.WriteByte(backslashChar) + i++ + } + + case dollarChar: + // We see '$'. Look ahead. + if i+1 >= len(s) { + // Trailing '$' at end of string: treat as literal. + if buf.Len() == 0 { + litStart = i + } + buf.WriteByte(dollarChar) + i++ + continue + } + + if s[i+1] != openBrace { + // '$' not followed by '{': treat as literal. + if buf.Len() == 0 { + litStart = i + } + buf.WriteByte(dollarChar) + i++ + continue + } + + // Start of variable reference "${". + refStart := i + j := i + 2 // skip "${" + + // Scan until closing '}', rejecting nested '${'. + pathStart := j + for j < len(s) && s[j] != closeBrace { + if s[j] == dollarChar && j+1 < len(s) && s[j+1] == openBrace { + return nil, fmt.Errorf( + "nested variable references are not supported (at position %d)", refStart, + ) + } + j++ + } + + if j >= len(s) { + return nil, fmt.Errorf( + "unterminated variable reference at position %d", refStart, + ) + } + + path := s[pathStart:j] + j++ // skip '}' + + if path == "" { + return nil, fmt.Errorf( + "empty variable reference at position %d", refStart, + ) + } + + if err := validatePath(path); err != nil { + return nil, fmt.Errorf( + "invalid variable reference ${%s}: %w", path, err, + ) + } + + flushLiteral(i) + tokens = append(tokens, Token{ + Kind: TokenRef, + Value: path, + Start: refStart, + End: j, + }) + i = j + + default: + if buf.Len() == 0 { + litStart = i + } + buf.WriteByte(s[i]) + i++ + } + } + + flushLiteral(i) + return tokens, nil +} + +// validatePath validates the path inside a ${...} reference by splitting on +// '.' and validating each segment individually. +func validatePath(path string) error { + segments := strings.Split(path, ".") + for _, seg := range segments { + if seg == "" { + return errors.New("invalid path") + } + + // Strip trailing [N] index suffixes to get the key part. + key, idx := splitKeyAndIndex(seg) + + if key == "" { + return fmt.Errorf("invalid key %q", seg) + } + if !keyPattern.MatchString(key) { + return fmt.Errorf("invalid key %q", key) + } + if idx != "" && !indexPattern.MatchString(idx) { + return fmt.Errorf("invalid index in %q", seg) + } + } + return nil +} + +// splitKeyAndIndex splits "foo[0][1]" into ("foo", "[0][1]"). +func splitKeyAndIndex(seg string) (string, string) { + i := strings.IndexByte(seg, '[') + if i < 0 { + return seg, "" + } + return seg[:i], seg[i:] +} diff --git a/libs/interpolation/parse_test.go b/libs/interpolation/parse_test.go new file mode 100644 index 0000000000..565f13b2e1 --- /dev/null +++ b/libs/interpolation/parse_test.go @@ -0,0 +1,267 @@ +package interpolation + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseEmpty(t *testing.T) { + tokens, err := Parse("") + require.NoError(t, err) + assert.Nil(t, tokens) +} + +func TestParseLiteralOnly(t *testing.T) { + tokens, err := Parse("hello world") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "hello world", Start: 0, End: 11}, + }, tokens) +} + +func TestParseSingleRef(t *testing.T) { + tokens, err := Parse("${a.b}") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenRef, Value: "a.b", Start: 0, End: 6}, + }, tokens) +} + +func TestParseMultipleRefs(t *testing.T) { + tokens, err := Parse("${a} ${b}") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenRef, Value: "a", Start: 0, End: 4}, + {Kind: TokenLiteral, Value: " ", Start: 4, End: 5}, + {Kind: TokenRef, Value: "b", Start: 5, End: 9}, + }, tokens) +} + +func TestParseMixedLiteralAndRef(t *testing.T) { + tokens, err := Parse("pre ${a.b} post") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "pre ", Start: 0, End: 4}, + {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, + {Kind: TokenLiteral, Value: " post", Start: 10, End: 15}, + }, tokens) +} + +func TestParseValidPaths(t *testing.T) { + tests := []struct { + input string + path string + }{ + {"${a}", "a"}, + {"${abc}", "abc"}, + {"${a.b.c}", "a.b.c"}, + {"${a.b[0]}", "a.b[0]"}, + {"${a[0]}", "a[0]"}, + {"${a.b[0][1]}", "a.b[0][1]"}, + {"${a.b-c}", "a.b-c"}, + {"${a.b_c}", "a.b_c"}, + {"${a.b-c-d}", "a.b-c-d"}, + {"${a.b_c_d}", "a.b_c_d"}, + {"${abc.def.ghi}", "abc.def.ghi"}, + {"${a.b123}", "a.b123"}, + {"${resources.jobs.my-job.id}", "resources.jobs.my-job.id"}, + {"${var.my_var}", "var.my_var"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + tokens, err := Parse(tt.input) + require.NoError(t, err) + require.Len(t, tokens, 1) + assert.Equal(t, TokenRef, tokens[0].Kind) + assert.Equal(t, tt.path, tokens[0].Value) + }) + } +} + +func TestParseEscapeSequences(t *testing.T) { + tests := []struct { + name string + input string + tokens []Token + }{ + { + "escaped_dollar", + `\$`, + []Token{{Kind: TokenLiteral, Value: "$", Start: 0, End: 2}}, + }, + { + "escaped_ref", + `\${a}`, + []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, + }, + { + "escaped_backslash", + `\\`, + []Token{{Kind: TokenLiteral, Value: `\`, Start: 0, End: 2}}, + }, + { + "double_escaped_backslash", + `\\\\`, + []Token{{Kind: TokenLiteral, Value: `\\`, Start: 0, End: 4}}, + }, + { + "escaped_backslash_then_ref", + `\\${a.b}`, + []Token{ + {Kind: TokenLiteral, Value: `\`, Start: 0, End: 2}, + {Kind: TokenRef, Value: "a.b", Start: 2, End: 8}, + }, + }, + { + "backslash_before_non_special", + `\n`, + []Token{{Kind: TokenLiteral, Value: `\n`, Start: 0, End: 2}}, + }, + { + "escaped_dollar_then_ref", + `\$\$${a.b}`, + []Token{ + {Kind: TokenLiteral, Value: "$$", Start: 0, End: 4}, + {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tokens, err := Parse(tt.input) + require.NoError(t, err) + assert.Equal(t, tt.tokens, tokens) + }) + } +} + +func TestParseDollarAtEnd(t *testing.T) { + tokens, err := Parse("abc$") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "abc$", Start: 0, End: 4}, + }, tokens) +} + +func TestParseDollarBeforeNonBrace(t *testing.T) { + tokens, err := Parse("$x") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "$x", Start: 0, End: 2}, + }, tokens) +} + +func TestParseBackslashAtEnd(t *testing.T) { + tokens, err := Parse(`abc\`) + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: `abc\`, Start: 0, End: 4}, + }, tokens) +} + +func TestParseNestedReferenceReturnsError(t *testing.T) { + _, err := Parse("${var.foo_${var.tail}}") + require.Error(t, err) + assert.Contains(t, err.Error(), "nested variable references are not supported") +} + +func TestParseUnterminatedRef(t *testing.T) { + _, err := Parse("${a.b") + require.Error(t, err) + assert.Contains(t, err.Error(), "unterminated") +} + +func TestParseEmptyRef(t *testing.T) { + _, err := Parse("${}") + require.Error(t, err) + assert.Contains(t, err.Error(), "empty") +} + +func TestParseInvalidPaths(t *testing.T) { + tests := []struct { + name string + input string + }{ + {"trailing_hyphen", "${foo.bar-}"}, + {"double_dot", "${foo..bar}"}, + {"leading_digit", "${0foo}"}, + {"hyphen_start_segment", "${foo.-bar}"}, + {"trailing_dot", "${foo.}"}, + {"leading_dot", "${.foo}"}, + {"space_in_path", "${foo. bar}"}, + {"special_char", "${foo.bar!}"}, + {"just_digits", "${123}"}, + {"trailing_underscore", "${foo.bar_}"}, + {"underscore_start_segment", "${foo._bar}"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := Parse(tt.input) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid") + }) + } +} + +func TestParsePositions(t *testing.T) { + tests := []struct { + name string + input string + tokens []Token + }{ + { + "single_ref", + "${a.b}", + []Token{{Kind: TokenRef, Value: "a.b", Start: 0, End: 6}}, + }, + { + "literal_ref_literal", + "pre ${a.b} post", + []Token{ + {Kind: TokenLiteral, Value: "pre ", Start: 0, End: 4}, + {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, + {Kind: TokenLiteral, Value: " post", Start: 10, End: 15}, + }, + }, + { + "escaped_ref", + `\${a}`, + []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, + }, + { + "adjacent_refs", + "${a}${b}", + []Token{ + {Kind: TokenRef, Value: "a", Start: 0, End: 4}, + {Kind: TokenRef, Value: "b", Start: 4, End: 8}, + }, + }, + { + "dollar_sign_mid_literal", + "a$b", + []Token{{Kind: TokenLiteral, Value: "a$b", Start: 0, End: 3}}, + }, + { + "escape_between_refs", + `${a}\$${b}`, + []Token{ + {Kind: TokenRef, Value: "a", Start: 0, End: 4}, + {Kind: TokenLiteral, Value: "$", Start: 4, End: 6}, + {Kind: TokenRef, Value: "b", Start: 6, End: 10}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tokens, err := Parse(tt.input) + require.NoError(t, err) + assert.Equal(t, tt.tokens, tokens) + }) + } +} From 523e287ac8c865212fe567551af8bd5f51148cba Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 16 Mar 2026 00:54:46 +0100 Subject: [PATCH 2/8] Update var_in_var test for nested ${} rejection The var_in_var test used ${var.foo_${var.tail}} which relied on undocumented nested reference behavior (the test itself noted: "not officially supported... might start to reject this in the future"). The new parser now rejects nested ${} constructs as intended. Co-authored-by: Isaac --- acceptance/bundle/variables/var_in_var/output.txt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/acceptance/bundle/variables/var_in_var/output.txt b/acceptance/bundle/variables/var_in_var/output.txt index 27ec6e6ab1..2d284c76cc 100644 --- a/acceptance/bundle/variables/var_in_var/output.txt +++ b/acceptance/bundle/variables/var_in_var/output.txt @@ -1,9 +1,12 @@ >>> [CLI] bundle validate -o json -t target_x +Warning: nested variable references are not supported (at position 0) + in databricks.yml:14:14 + { "final": { - "default": "hello from foo x", - "value": "hello from foo x" + "default": "${var.foo_${var.tail}}", + "value": "${var.foo_${var.tail}}" }, "foo_x": { "default": "hello from foo x", @@ -20,10 +23,13 @@ } >>> [CLI] bundle validate -o json -t target_y +Warning: nested variable references are not supported (at position 0) + in databricks.yml:14:14 + { "final": { - "default": "hi from foo y", - "value": "hi from foo y" + "default": "${var.foo_${var.tail}}", + "value": "${var.foo_${var.tail}}" }, "foo_x": { "default": "hello from foo x", From 29301f6e49298b5fa26d05bfe373e06cb169daf9 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 19 Mar 2026 16:53:44 +0100 Subject: [PATCH 3/8] Add acceptance tests and consolidate unit tests for interpolation parser Add acceptance tests for escape sequences, unterminated refs, empty refs, leading digit keys, and dollar-before-non-brace. Consolidate standalone unit tests into table-driven TestParse and TestParseErrors. Add breaking change entry to NEXT_CHANGELOG for nested variable reference rejection. Co-authored-by: Isaac --- NEXT_CHANGELOG.md | 2 + .../dollar_before_non_brace/databricks.yml | 6 + .../dollar_before_non_brace/out.test.toml | 5 + .../dollar_before_non_brace/output.txt | 9 + .../variables/dollar_before_non_brace/script | 1 + .../bundle/variables/empty_ref/databricks.yml | 6 + .../bundle/variables/empty_ref/out.test.toml | 5 + .../bundle/variables/empty_ref/output.txt | 12 ++ acceptance/bundle/variables/empty_ref/script | 1 + .../variables/escape_sequences/databricks.yml | 6 + .../variables/escape_sequences/out.test.toml | 5 + .../variables/escape_sequences/output.txt | 16 ++ .../bundle/variables/escape_sequences/script | 1 + .../leading_digit_ref/databricks.yml | 6 + .../variables/leading_digit_ref/out.test.toml | 5 + .../variables/leading_digit_ref/output.txt | 12 ++ .../bundle/variables/leading_digit_ref/script | 1 + .../variables/unterminated_ref/databricks.yml | 6 + .../variables/unterminated_ref/out.test.toml | 5 + .../variables/unterminated_ref/output.txt | 12 ++ .../bundle/variables/unterminated_ref/script | 1 + libs/interpolation/parse_test.go | 190 +++++++----------- 22 files changed, 195 insertions(+), 118 deletions(-) create mode 100644 acceptance/bundle/variables/dollar_before_non_brace/databricks.yml create mode 100644 acceptance/bundle/variables/dollar_before_non_brace/out.test.toml create mode 100644 acceptance/bundle/variables/dollar_before_non_brace/output.txt create mode 100644 acceptance/bundle/variables/dollar_before_non_brace/script create mode 100644 acceptance/bundle/variables/empty_ref/databricks.yml create mode 100644 acceptance/bundle/variables/empty_ref/out.test.toml create mode 100644 acceptance/bundle/variables/empty_ref/output.txt create mode 100644 acceptance/bundle/variables/empty_ref/script create mode 100644 acceptance/bundle/variables/escape_sequences/databricks.yml create mode 100644 acceptance/bundle/variables/escape_sequences/out.test.toml create mode 100644 acceptance/bundle/variables/escape_sequences/output.txt create mode 100644 acceptance/bundle/variables/escape_sequences/script create mode 100644 acceptance/bundle/variables/leading_digit_ref/databricks.yml create mode 100644 acceptance/bundle/variables/leading_digit_ref/out.test.toml create mode 100644 acceptance/bundle/variables/leading_digit_ref/output.txt create mode 100644 acceptance/bundle/variables/leading_digit_ref/script create mode 100644 acceptance/bundle/variables/unterminated_ref/databricks.yml create mode 100644 acceptance/bundle/variables/unterminated_ref/out.test.toml create mode 100644 acceptance/bundle/variables/unterminated_ref/output.txt create mode 100644 acceptance/bundle/variables/unterminated_ref/script diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index d5a0e34975..a8faf00fbb 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -6,6 +6,8 @@ ### Bundles +* **Breaking**: Nested variable references like `${var.foo_${var.tail}}` are now rejected with a warning and left unresolved. Previously the regex-based parser matched only the innermost `${var.tail}` by coincidence, which silently produced incorrect results. If you rely on dynamic variable name construction, use separate variables or target overrides instead ([#4747](https://github.com/databricks/cli/pull/4747)). + ### Dependency updates ### API Changes diff --git a/acceptance/bundle/variables/dollar_before_non_brace/databricks.yml b/acceptance/bundle/variables/dollar_before_non_brace/databricks.yml new file mode 100644 index 0000000000..62fc346040 --- /dev/null +++ b/acceptance/bundle/variables/dollar_before_non_brace/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: '$foo is not a reference' + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/dollar_before_non_brace/out.test.toml b/acceptance/bundle/variables/dollar_before_non_brace/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/dollar_before_non_brace/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/dollar_before_non_brace/output.txt b/acceptance/bundle/variables/dollar_before_non_brace/output.txt new file mode 100644 index 0000000000..e97599047b --- /dev/null +++ b/acceptance/bundle/variables/dollar_before_non_brace/output.txt @@ -0,0 +1,9 @@ + +>>> [CLI] bundle validate +Name: $foo is not a reference +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/$foo is not a reference/default + +Validation OK! diff --git a/acceptance/bundle/variables/dollar_before_non_brace/script b/acceptance/bundle/variables/dollar_before_non_brace/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/dollar_before_non_brace/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/acceptance/bundle/variables/empty_ref/databricks.yml b/acceptance/bundle/variables/empty_ref/databricks.yml new file mode 100644 index 0000000000..c6254777bd --- /dev/null +++ b/acceptance/bundle/variables/empty_ref/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: '${}' + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/empty_ref/out.test.toml b/acceptance/bundle/variables/empty_ref/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/empty_ref/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/empty_ref/output.txt b/acceptance/bundle/variables/empty_ref/output.txt new file mode 100644 index 0000000000..2e2f56b493 --- /dev/null +++ b/acceptance/bundle/variables/empty_ref/output.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle validate +Warning: empty variable reference at position 0 + in databricks.yml:2:9 + +Name: ${} +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/${}/default + +Found 1 warning diff --git a/acceptance/bundle/variables/empty_ref/script b/acceptance/bundle/variables/empty_ref/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/empty_ref/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/acceptance/bundle/variables/escape_sequences/databricks.yml b/acceptance/bundle/variables/escape_sequences/databricks.yml new file mode 100644 index 0000000000..4e1e732b0f --- /dev/null +++ b/acceptance/bundle/variables/escape_sequences/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: 'literal \${not_a_ref} and ${var.greeting}' + +variables: + greeting: + default: hello diff --git a/acceptance/bundle/variables/escape_sequences/out.test.toml b/acceptance/bundle/variables/escape_sequences/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/escape_sequences/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/escape_sequences/output.txt b/acceptance/bundle/variables/escape_sequences/output.txt new file mode 100644 index 0000000000..827a328329 --- /dev/null +++ b/acceptance/bundle/variables/escape_sequences/output.txt @@ -0,0 +1,16 @@ + +>>> [CLI] bundle validate +Warning: Please do not use variable interpolation in the name of your bundle. The name of your bundle +is a part of the path at which your bundle state is stored at by default. Parameterizing it at +runtime can have unexpected consequences like duplicate deployments or resources not being +cleaned up during bundle destroy. + at bundle.name + in databricks.yml:2:9 + +Name: literal ${not_a_ref} and hello +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/literal ${not_a_ref} and hello/default + +Found 1 warning diff --git a/acceptance/bundle/variables/escape_sequences/script b/acceptance/bundle/variables/escape_sequences/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/escape_sequences/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/acceptance/bundle/variables/leading_digit_ref/databricks.yml b/acceptance/bundle/variables/leading_digit_ref/databricks.yml new file mode 100644 index 0000000000..bdd2e44a4b --- /dev/null +++ b/acceptance/bundle/variables/leading_digit_ref/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: '${0foo}' + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/leading_digit_ref/out.test.toml b/acceptance/bundle/variables/leading_digit_ref/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/leading_digit_ref/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/leading_digit_ref/output.txt b/acceptance/bundle/variables/leading_digit_ref/output.txt new file mode 100644 index 0000000000..9b2f8e4eef --- /dev/null +++ b/acceptance/bundle/variables/leading_digit_ref/output.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle validate +Warning: invalid variable reference ${0foo}: invalid key "0foo" + in databricks.yml:2:9 + +Name: ${0foo} +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/${0foo}/default + +Found 1 warning diff --git a/acceptance/bundle/variables/leading_digit_ref/script b/acceptance/bundle/variables/leading_digit_ref/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/leading_digit_ref/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/acceptance/bundle/variables/unterminated_ref/databricks.yml b/acceptance/bundle/variables/unterminated_ref/databricks.yml new file mode 100644 index 0000000000..3ca0d01099 --- /dev/null +++ b/acceptance/bundle/variables/unterminated_ref/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: '${foo.bar' + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/unterminated_ref/out.test.toml b/acceptance/bundle/variables/unterminated_ref/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/unterminated_ref/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/unterminated_ref/output.txt b/acceptance/bundle/variables/unterminated_ref/output.txt new file mode 100644 index 0000000000..2dd4178d15 --- /dev/null +++ b/acceptance/bundle/variables/unterminated_ref/output.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle validate +Warning: unterminated variable reference at position 0 + in databricks.yml:2:9 + +Name: ${foo.bar +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/${foo.bar/default + +Found 1 warning diff --git a/acceptance/bundle/variables/unterminated_ref/script b/acceptance/bundle/variables/unterminated_ref/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/unterminated_ref/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/libs/interpolation/parse_test.go b/libs/interpolation/parse_test.go index 565f13b2e1..060d2b6d5f 100644 --- a/libs/interpolation/parse_test.go +++ b/libs/interpolation/parse_test.go @@ -7,48 +7,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestParseEmpty(t *testing.T) { - tokens, err := Parse("") - require.NoError(t, err) - assert.Nil(t, tokens) -} - -func TestParseLiteralOnly(t *testing.T) { - tokens, err := Parse("hello world") - require.NoError(t, err) - assert.Equal(t, []Token{ - {Kind: TokenLiteral, Value: "hello world", Start: 0, End: 11}, - }, tokens) -} - -func TestParseSingleRef(t *testing.T) { - tokens, err := Parse("${a.b}") - require.NoError(t, err) - assert.Equal(t, []Token{ - {Kind: TokenRef, Value: "a.b", Start: 0, End: 6}, - }, tokens) -} - -func TestParseMultipleRefs(t *testing.T) { - tokens, err := Parse("${a} ${b}") - require.NoError(t, err) - assert.Equal(t, []Token{ - {Kind: TokenRef, Value: "a", Start: 0, End: 4}, - {Kind: TokenLiteral, Value: " ", Start: 4, End: 5}, - {Kind: TokenRef, Value: "b", Start: 5, End: 9}, - }, tokens) -} - -func TestParseMixedLiteralAndRef(t *testing.T) { - tokens, err := Parse("pre ${a.b} post") - require.NoError(t, err) - assert.Equal(t, []Token{ - {Kind: TokenLiteral, Value: "pre ", Start: 0, End: 4}, - {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, - {Kind: TokenLiteral, Value: " post", Start: 10, End: 15}, - }, tokens) -} - func TestParseValidPaths(t *testing.T) { tests := []struct { input string @@ -139,86 +97,36 @@ func TestParseEscapeSequences(t *testing.T) { } } -func TestParseDollarAtEnd(t *testing.T) { - tokens, err := Parse("abc$") - require.NoError(t, err) - assert.Equal(t, []Token{ - {Kind: TokenLiteral, Value: "abc$", Start: 0, End: 4}, - }, tokens) -} - -func TestParseDollarBeforeNonBrace(t *testing.T) { - tokens, err := Parse("$x") - require.NoError(t, err) - assert.Equal(t, []Token{ - {Kind: TokenLiteral, Value: "$x", Start: 0, End: 2}, - }, tokens) -} - -func TestParseBackslashAtEnd(t *testing.T) { - tokens, err := Parse(`abc\`) - require.NoError(t, err) - assert.Equal(t, []Token{ - {Kind: TokenLiteral, Value: `abc\`, Start: 0, End: 4}, - }, tokens) -} - -func TestParseNestedReferenceReturnsError(t *testing.T) { - _, err := Parse("${var.foo_${var.tail}}") - require.Error(t, err) - assert.Contains(t, err.Error(), "nested variable references are not supported") -} - -func TestParseUnterminatedRef(t *testing.T) { - _, err := Parse("${a.b") - require.Error(t, err) - assert.Contains(t, err.Error(), "unterminated") -} - -func TestParseEmptyRef(t *testing.T) { - _, err := Parse("${}") - require.Error(t, err) - assert.Contains(t, err.Error(), "empty") -} - -func TestParseInvalidPaths(t *testing.T) { - tests := []struct { - name string - input string - }{ - {"trailing_hyphen", "${foo.bar-}"}, - {"double_dot", "${foo..bar}"}, - {"leading_digit", "${0foo}"}, - {"hyphen_start_segment", "${foo.-bar}"}, - {"trailing_dot", "${foo.}"}, - {"leading_dot", "${.foo}"}, - {"space_in_path", "${foo. bar}"}, - {"special_char", "${foo.bar!}"}, - {"just_digits", "${123}"}, - {"trailing_underscore", "${foo.bar_}"}, - {"underscore_start_segment", "${foo._bar}"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, err := Parse(tt.input) - require.Error(t, err) - assert.Contains(t, err.Error(), "invalid") - }) - } -} - -func TestParsePositions(t *testing.T) { +func TestParse(t *testing.T) { tests := []struct { name string input string tokens []Token }{ + { + "empty", + "", + nil, + }, + { + "literal_only", + "hello world", + []Token{{Kind: TokenLiteral, Value: "hello world", Start: 0, End: 11}}, + }, { "single_ref", "${a.b}", []Token{{Kind: TokenRef, Value: "a.b", Start: 0, End: 6}}, }, + { + "multiple_refs", + "${a} ${b}", + []Token{ + {Kind: TokenRef, Value: "a", Start: 0, End: 4}, + {Kind: TokenLiteral, Value: " ", Start: 4, End: 5}, + {Kind: TokenRef, Value: "b", Start: 5, End: 9}, + }, + }, { "literal_ref_literal", "pre ${a.b} post", @@ -228,11 +136,6 @@ func TestParsePositions(t *testing.T) { {Kind: TokenLiteral, Value: " post", Start: 10, End: 15}, }, }, - { - "escaped_ref", - `\${a}`, - []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, - }, { "adjacent_refs", "${a}${b}", @@ -242,10 +145,30 @@ func TestParsePositions(t *testing.T) { }, }, { - "dollar_sign_mid_literal", + "dollar_at_end", + "abc$", + []Token{{Kind: TokenLiteral, Value: "abc$", Start: 0, End: 4}}, + }, + { + "dollar_before_non_brace", + "$x", + []Token{{Kind: TokenLiteral, Value: "$x", Start: 0, End: 2}}, + }, + { + "dollar_mid_literal", "a$b", []Token{{Kind: TokenLiteral, Value: "a$b", Start: 0, End: 3}}, }, + { + "backslash_at_end", + `abc\`, + []Token{{Kind: TokenLiteral, Value: `abc\`, Start: 0, End: 4}}, + }, + { + "escaped_ref", + `\${a}`, + []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, + }, { "escape_between_refs", `${a}\$${b}`, @@ -265,3 +188,34 @@ func TestParsePositions(t *testing.T) { }) } } + +func TestParseErrors(t *testing.T) { + tests := []struct { + name string + input string + errContains string + }{ + {"nested_reference", "${var.foo_${var.tail}}", "nested variable references are not supported"}, + {"unterminated_ref", "${a.b", "unterminated"}, + {"empty_ref", "${}", "empty"}, + {"trailing_hyphen", "${foo.bar-}", "invalid"}, + {"double_dot", "${foo..bar}", "invalid"}, + {"leading_digit", "${0foo}", "invalid"}, + {"hyphen_start_segment", "${foo.-bar}", "invalid"}, + {"trailing_dot", "${foo.}", "invalid"}, + {"leading_dot", "${.foo}", "invalid"}, + {"space_in_path", "${foo. bar}", "invalid"}, + {"special_char", "${foo.bar!}", "invalid"}, + {"just_digits", "${123}", "invalid"}, + {"trailing_underscore", "${foo.bar_}", "invalid"}, + {"underscore_start_segment", "${foo._bar}", "invalid"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := Parse(tt.input) + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errContains) + }) + } +} From 8fabf5687fbdc610dfeb20e32d9983eae74da0df Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Wed, 25 Mar 2026 14:25:17 +0100 Subject: [PATCH 4/8] Improve malformed reference warnings with config path and structured errors Add ParseError type to interpolation package so callers can access the position offset separately from the message. Update WarnMalformedReferences to include the config path (e.g. bundle.name) in diagnostics and incorporate the position into the column location. Remove redundant Validate method. Co-authored-by: Isaac --- .../bad_syntax/out.deploy.direct.txt | 1 + .../bad_syntax/out.plan.direct.txt | 1 + .../bad_syntax/out.plan.terraform.txt | 1 + .../resource_deps/bad_syntax/output.txt | 1 + .../bundle/variables/empty_ref/output.txt | 3 +- .../variables/leading_digit_ref/output.txt | 1 + .../variables/malformed_reference/output.txt | 1 + .../variables/unterminated_ref/output.txt | 3 +- .../bundle/variables/var_in_var/output.txt | 6 ++- .../mutator/warn_malformed_references.go | 39 ++++++++++++++---- libs/interpolation/parse.go | 40 +++++++++++++------ 11 files changed, 74 insertions(+), 23 deletions(-) diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt b/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt index a985c8415e..d9790d5c72 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt @@ -1,4 +1,5 @@ Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + at resources.volumes.foo.schema_name in databricks.yml:11:21 Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt b/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt index a53d32c081..e00391b668 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt @@ -1,4 +1,5 @@ Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + at resources.volumes.foo.schema_name in databricks.yml:11:21 create volumes.bar diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt b/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt index ed957b8518..ec5521761d 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt @@ -1,4 +1,5 @@ Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + at resources.volumes.foo.schema_name in databricks.yml:11:21 Error: exit status 1 diff --git a/acceptance/bundle/resource_deps/bad_syntax/output.txt b/acceptance/bundle/resource_deps/bad_syntax/output.txt index 968ebf652a..7dedf83402 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/output.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/output.txt @@ -1,6 +1,7 @@ >>> [CLI] bundle validate -o json Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + at resources.volumes.foo.schema_name in databricks.yml:11:21 { diff --git a/acceptance/bundle/variables/empty_ref/output.txt b/acceptance/bundle/variables/empty_ref/output.txt index 2e2f56b493..beafe20df5 100644 --- a/acceptance/bundle/variables/empty_ref/output.txt +++ b/acceptance/bundle/variables/empty_ref/output.txt @@ -1,6 +1,7 @@ >>> [CLI] bundle validate -Warning: empty variable reference at position 0 +Warning: empty variable reference + at bundle.name in databricks.yml:2:9 Name: ${} diff --git a/acceptance/bundle/variables/leading_digit_ref/output.txt b/acceptance/bundle/variables/leading_digit_ref/output.txt index 9b2f8e4eef..d43d228eeb 100644 --- a/acceptance/bundle/variables/leading_digit_ref/output.txt +++ b/acceptance/bundle/variables/leading_digit_ref/output.txt @@ -1,6 +1,7 @@ >>> [CLI] bundle validate Warning: invalid variable reference ${0foo}: invalid key "0foo" + at bundle.name in databricks.yml:2:9 Name: ${0foo} diff --git a/acceptance/bundle/variables/malformed_reference/output.txt b/acceptance/bundle/variables/malformed_reference/output.txt index fba4d19be6..d5b772a731 100644 --- a/acceptance/bundle/variables/malformed_reference/output.txt +++ b/acceptance/bundle/variables/malformed_reference/output.txt @@ -1,6 +1,7 @@ >>> [CLI] bundle validate Warning: invalid variable reference ${foo.bar-}: invalid key "bar-" + at bundle.name in databricks.yml:2:9 Name: ${foo.bar-} diff --git a/acceptance/bundle/variables/unterminated_ref/output.txt b/acceptance/bundle/variables/unterminated_ref/output.txt index 2dd4178d15..cb5953f539 100644 --- a/acceptance/bundle/variables/unterminated_ref/output.txt +++ b/acceptance/bundle/variables/unterminated_ref/output.txt @@ -1,6 +1,7 @@ >>> [CLI] bundle validate -Warning: unterminated variable reference at position 0 +Warning: unterminated variable reference + at bundle.name in databricks.yml:2:9 Name: ${foo.bar diff --git a/acceptance/bundle/variables/var_in_var/output.txt b/acceptance/bundle/variables/var_in_var/output.txt index 2d284c76cc..9c9872ff3d 100644 --- a/acceptance/bundle/variables/var_in_var/output.txt +++ b/acceptance/bundle/variables/var_in_var/output.txt @@ -1,6 +1,7 @@ >>> [CLI] bundle validate -o json -t target_x -Warning: nested variable references are not supported (at position 0) +Warning: nested variable references are not supported + at variables.final.default in databricks.yml:14:14 { @@ -23,7 +24,8 @@ Warning: nested variable references are not supported (at position 0) } >>> [CLI] bundle validate -o json -t target_y -Warning: nested variable references are not supported (at position 0) +Warning: nested variable references are not supported + at variables.final.default in databricks.yml:14:14 { diff --git a/bundle/config/mutator/warn_malformed_references.go b/bundle/config/mutator/warn_malformed_references.go index cf6934c7b7..d13952b64b 100644 --- a/bundle/config/mutator/warn_malformed_references.go +++ b/bundle/config/mutator/warn_malformed_references.go @@ -2,11 +2,13 @@ package mutator import ( "context" + "errors" + "slices" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" - "github.com/databricks/cli/libs/dyn/dynvar" + "github.com/databricks/cli/libs/interpolation" ) type warnMalformedReferences struct{} @@ -21,10 +23,6 @@ func (*warnMalformedReferences) Name() string { return "WarnMalformedReferences" } -func (*warnMalformedReferences) Validate(ctx context.Context, b *bundle.Bundle) error { - return nil -} - func (*warnMalformedReferences) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { var diags diag.Diagnostics err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { @@ -34,8 +32,35 @@ func (*warnMalformedReferences) Apply(ctx context.Context, b *bundle.Bundle) dia if len(v.Locations()) == 0 { return v, nil } - _, _, refDiags := dynvar.NewRefWithDiagnostics(v) - diags = diags.Extend(refDiags) + + s, ok := v.AsString() + if !ok { + return v, nil + } + + _, parseErr := interpolation.Parse(s) + if parseErr == nil { + return v, nil + } + + var pe *interpolation.ParseError + if !errors.As(parseErr, &pe) { + return v, nil + } + + // Clone locations and adjust column with the position offset + // so the diagnostic points to the problematic reference. + locs := slices.Clone(v.Locations()) + if len(locs) > 0 { + locs[0].Column += pe.Pos + } + + diags = append(diags, diag.Diagnostic{ + Severity: diag.Warning, + Summary: pe.Msg, + Locations: locs, + Paths: []dyn.Path{p}, + }) return v, nil }) return root, err diff --git a/libs/interpolation/parse.go b/libs/interpolation/parse.go index 48518c8c32..595cce42c7 100644 --- a/libs/interpolation/parse.go +++ b/libs/interpolation/parse.go @@ -7,6 +7,18 @@ import ( "strings" ) +// ParseError is returned by [Parse] when the input contains a malformed +// variable reference. Pos is the byte offset in the original string where +// the problematic reference starts. +type ParseError struct { + Msg string + Pos int +} + +func (e *ParseError) Error() string { + return e.Msg +} + // TokenKind represents the type of a parsed token. type TokenKind int @@ -128,32 +140,36 @@ func Parse(s string) ([]Token, error) { pathStart := j for j < len(s) && s[j] != closeBrace { if s[j] == dollarChar && j+1 < len(s) && s[j+1] == openBrace { - return nil, fmt.Errorf( - "nested variable references are not supported (at position %d)", refStart, - ) + return nil, &ParseError{ + Msg: "nested variable references are not supported", + Pos: refStart, + } } j++ } if j >= len(s) { - return nil, fmt.Errorf( - "unterminated variable reference at position %d", refStart, - ) + return nil, &ParseError{ + Msg: "unterminated variable reference", + Pos: refStart, + } } path := s[pathStart:j] j++ // skip '}' if path == "" { - return nil, fmt.Errorf( - "empty variable reference at position %d", refStart, - ) + return nil, &ParseError{ + Msg: "empty variable reference", + Pos: refStart, + } } if err := validatePath(path); err != nil { - return nil, fmt.Errorf( - "invalid variable reference ${%s}: %w", path, err, - ) + return nil, &ParseError{ + Msg: fmt.Sprintf("invalid variable reference ${%s}: %s", path, err), + Pos: refStart, + } } flushLiteral(i) From fdba4f120558e7910db25ab3199b278f5990c00b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:15:35 +0100 Subject: [PATCH 5/8] Support nested variable references like ${var.foo_${var.tail}} The parser now treats outer ${...} prefixes as literal text when a nested ${ is encountered, allowing inner references to be resolved first. Multi-round resolution progressively resolves from inside out. Co-authored-by: Isaac --- NEXT_CHANGELOG.md | 2 +- .../bundle/variables/var_in_var/output.txt | 16 +++------- .../var_in_var_3level/databricks.yml | 12 +++++++ .../variables/var_in_var_3level/out.test.toml | 5 +++ .../variables/var_in_var_3level/output.txt | 20 ++++++++++++ .../bundle/variables/var_in_var_3level/script | 1 + libs/dyn/dynvar/resolve_test.go | 29 +++++++++++++++++ libs/interpolation/parse.go | 27 +++++++++++++--- libs/interpolation/parse_test.go | 32 +++++++++++++++++-- 9 files changed, 123 insertions(+), 21 deletions(-) create mode 100644 acceptance/bundle/variables/var_in_var_3level/databricks.yml create mode 100644 acceptance/bundle/variables/var_in_var_3level/out.test.toml create mode 100644 acceptance/bundle/variables/var_in_var_3level/output.txt create mode 100644 acceptance/bundle/variables/var_in_var_3level/script diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index b3e381dd16..1c9dd9a4ae 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -12,7 +12,7 @@ * Deduplicate grant entries with duplicate principals or privileges during initialization ([#4801](https://github.com/databricks/cli/pull/4801)) * engine/direct: Fix unwanted recreation of secret scopes when scope_backend_type is not set ([#4834](https://github.com/databricks/cli/pull/4834)) -* **Breaking**: Nested variable references like `${var.foo_${var.tail}}` are now rejected with a warning and left unresolved. Previously the regex-based parser matched only the innermost `${var.tail}` by coincidence, which silently produced incorrect results. If you rely on dynamic variable name construction, use separate variables or target overrides instead ([#4747](https://github.com/databricks/cli/pull/4747)). +* Replace regex-based variable interpolation with a character scanner. Escape sequences `\$` and `\\` are now supported ([#4747](https://github.com/databricks/cli/pull/4747)). ### Dependency updates diff --git a/acceptance/bundle/variables/var_in_var/output.txt b/acceptance/bundle/variables/var_in_var/output.txt index 9c9872ff3d..27ec6e6ab1 100644 --- a/acceptance/bundle/variables/var_in_var/output.txt +++ b/acceptance/bundle/variables/var_in_var/output.txt @@ -1,13 +1,9 @@ >>> [CLI] bundle validate -o json -t target_x -Warning: nested variable references are not supported - at variables.final.default - in databricks.yml:14:14 - { "final": { - "default": "${var.foo_${var.tail}}", - "value": "${var.foo_${var.tail}}" + "default": "hello from foo x", + "value": "hello from foo x" }, "foo_x": { "default": "hello from foo x", @@ -24,14 +20,10 @@ Warning: nested variable references are not supported } >>> [CLI] bundle validate -o json -t target_y -Warning: nested variable references are not supported - at variables.final.default - in databricks.yml:14:14 - { "final": { - "default": "${var.foo_${var.tail}}", - "value": "${var.foo_${var.tail}}" + "default": "hi from foo y", + "value": "hi from foo y" }, "foo_x": { "default": "hello from foo x", diff --git a/acceptance/bundle/variables/var_in_var_3level/databricks.yml b/acceptance/bundle/variables/var_in_var_3level/databricks.yml new file mode 100644 index 0000000000..af94bf7d0d --- /dev/null +++ b/acceptance/bundle/variables/var_in_var_3level/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: test-bundle + +variables: + env: + default: prod + region_prod: + default: us + endpoint_us: + default: https://us.example.com + result: + default: ${var.endpoint_${var.region_${var.env}}} diff --git a/acceptance/bundle/variables/var_in_var_3level/out.test.toml b/acceptance/bundle/variables/var_in_var_3level/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/var_in_var_3level/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/var_in_var_3level/output.txt b/acceptance/bundle/variables/var_in_var_3level/output.txt new file mode 100644 index 0000000000..8ddd6e951a --- /dev/null +++ b/acceptance/bundle/variables/var_in_var_3level/output.txt @@ -0,0 +1,20 @@ + +>>> [CLI] bundle validate -o json +{ + "endpoint_us": { + "default": "https://us.example.com", + "value": "https://us.example.com" + }, + "env": { + "default": "prod", + "value": "prod" + }, + "region_prod": { + "default": "us", + "value": "us" + }, + "result": { + "default": "https://us.example.com", + "value": "https://us.example.com" + } +} diff --git a/acceptance/bundle/variables/var_in_var_3level/script b/acceptance/bundle/variables/var_in_var_3level/script new file mode 100644 index 0000000000..1551014050 --- /dev/null +++ b/acceptance/bundle/variables/var_in_var_3level/script @@ -0,0 +1 @@ +trace $CLI bundle validate -o json | jq .variables diff --git a/libs/dyn/dynvar/resolve_test.go b/libs/dyn/dynvar/resolve_test.go index 598c78852b..748204ef90 100644 --- a/libs/dyn/dynvar/resolve_test.go +++ b/libs/dyn/dynvar/resolve_test.go @@ -394,6 +394,35 @@ func TestResolveSequenceVariable(t *testing.T) { assert.Equal(t, "value2", seq[1].MustString()) } +func TestResolveNestedVariableReference(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "tail": dyn.V("x"), + "foo_x": dyn.V("hello"), + "final": dyn.V("${foo_${tail}}"), + }) + + // First pass resolves ${tail} -> "x", producing "${foo_x}" for final. + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + // After one pass, the inner ref is resolved but the outer is not yet. + assert.Equal(t, "${foo_x}", getByPath(t, out, "final").MustString()) +} + +func TestResolveThreeLevelNestedVariableReference(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "c": dyn.V("z"), + "b_z": dyn.V("y"), + "a_y": dyn.V("hello"), + "final": dyn.V("${a_${b_${c}}}"), + }) + + // First pass resolves ${c} -> "z", producing "${a_${b_z}}" for final. + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + assert.Equal(t, "${a_${b_z}}", getByPath(t, out, "final").MustString()) +} + func TestResolveEscapedRef(t *testing.T) { in := dyn.V(map[string]dyn.Value{ "a": dyn.V("a"), diff --git a/libs/interpolation/parse.go b/libs/interpolation/parse.go index 595cce42c7..ffc0e12fd2 100644 --- a/libs/interpolation/parse.go +++ b/libs/interpolation/parse.go @@ -57,11 +57,16 @@ var indexPattern = regexp.MustCompile(`^(\[[0-9]+\])+$`) // - "\$" produces a literal "$" // - "\\" produces a literal "\" // +// Nested references like "${a.${b}}" are supported by treating the outer +// "${a." as literal text so that inner references are resolved first. +// After resolution the resulting string (e.g. "${a.x}") is re-parsed. +// // Examples: // - "hello" -> [Literal("hello")] // - "${a.b}" -> [Ref("a.b")] // - "pre ${a.b} post" -> [Literal("pre "), Ref("a.b"), Literal(" post")] // - "\${a.b}" -> [Literal("${a.b}")] +// - "${a.${b}}" -> [Literal("${a."), Ref("b"), Literal("}")] func Parse(s string) ([]Token, error) { if len(s) == 0 { return nil, nil @@ -136,18 +141,30 @@ func Parse(s string) ([]Token, error) { refStart := i j := i + 2 // skip "${" - // Scan until closing '}', rejecting nested '${'. + // Scan until closing '}', handling nested '${'. pathStart := j + nested := false for j < len(s) && s[j] != closeBrace { if s[j] == dollarChar && j+1 < len(s) && s[j+1] == openBrace { - return nil, &ParseError{ - Msg: "nested variable references are not supported", - Pos: refStart, - } + // Nested '${' found. Treat the outer "${..." prefix as + // literal so inner references get resolved first. + // E.g. "${a.${b}}" produces: + // [Literal("${a."), Ref("b"), Literal("}")] + nested = true + break } j++ } + if nested { + if buf.Len() == 0 { + litStart = refStart + } + buf.WriteString(s[refStart:j]) + i = j + continue + } + if j >= len(s) { return nil, &ParseError{ Msg: "unterminated variable reference", diff --git a/libs/interpolation/parse_test.go b/libs/interpolation/parse_test.go index 060d2b6d5f..3da65054f3 100644 --- a/libs/interpolation/parse_test.go +++ b/libs/interpolation/parse_test.go @@ -178,6 +178,33 @@ func TestParse(t *testing.T) { {Kind: TokenRef, Value: "b", Start: 6, End: 10}, }, }, + { + "nested_ref", + "${var.foo_${var.tail}}", + []Token{ + {Kind: TokenLiteral, Value: "${var.foo_", Start: 0, End: 10}, + {Kind: TokenRef, Value: "var.tail", Start: 10, End: 21}, + {Kind: TokenLiteral, Value: "}", Start: 21, End: 22}, + }, + }, + { + "three_level_nested_ref", + "${a_${b_${c}}}", + []Token{ + {Kind: TokenLiteral, Value: "${a_${b_", Start: 0, End: 8}, + {Kind: TokenRef, Value: "c", Start: 8, End: 12}, + {Kind: TokenLiteral, Value: "}}", Start: 12, End: 14}, + }, + }, + { + "nested_ref_mid_path", + "${a.${b.c}.d}", + []Token{ + {Kind: TokenLiteral, Value: "${a.", Start: 0, End: 4}, + {Kind: TokenRef, Value: "b.c", Start: 4, End: 10}, + {Kind: TokenLiteral, Value: ".d}", Start: 10, End: 13}, + }, + }, } for _, tt := range tests { @@ -191,11 +218,10 @@ func TestParse(t *testing.T) { func TestParseErrors(t *testing.T) { tests := []struct { - name string - input string + name string + input string errContains string }{ - {"nested_reference", "${var.foo_${var.tail}}", "nested variable references are not supported"}, {"unterminated_ref", "${a.b", "unterminated"}, {"empty_ref", "${}", "empty"}, {"trailing_hyphen", "${foo.bar-}", "invalid"}, From 8cddafa5c6bfeecb0984d58634606e7e2e5a9ff8 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 19:12:14 +0100 Subject: [PATCH 6/8] Add cross-language tests for variable reference parsing Shared test cases in libs/interpolation/testdata/variable_references.json are consumed by both the Go parser (TestParsePureVariableReferences) and the Python regex (test_pure_variable_reference) to verify they agree on which strings are pure variable references. When modifying the parser, add test cases to the JSON file so both languages are validated. Co-authored-by: Isaac --- libs/interpolation/parse.go | 7 + libs/interpolation/parse_test.go | 39 +++++ .../testdata/variable_references.json | 142 ++++++++++++++++++ python/databricks/bundles/core/_transform.py | 9 +- .../core/test_variable_references.py | 45 ++++++ 5 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 libs/interpolation/testdata/variable_references.json create mode 100644 python/databricks_tests/core/test_variable_references.py diff --git a/libs/interpolation/parse.go b/libs/interpolation/parse.go index ffc0e12fd2..0f2004359e 100644 --- a/libs/interpolation/parse.go +++ b/libs/interpolation/parse.go @@ -45,6 +45,13 @@ const ( // keyPattern validates a single key segment in a variable path. // Matches: [a-zA-Z]+([-_]*[a-zA-Z0-9]+)* // Examples: "foo", "my-job", "a_b_c", "abc123" +// +// PyDABs uses a duplicate regex to detect pure variable references +// (python/databricks/bundles/core/_transform.py). The patterns must stay in +// sync. Cross-language test cases live in testdata/variable_references.json +// and are run by both Go (TestParsePureVariableReferences) and Python +// (test_pure_variable_reference). When changing key/index/path validation, +// add cases to that file so both languages are tested. var keyPattern = regexp.MustCompile(`^[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*$`) // indexPattern matches one or more [N] index suffixes. diff --git a/libs/interpolation/parse_test.go b/libs/interpolation/parse_test.go index 3da65054f3..6021e3b541 100644 --- a/libs/interpolation/parse_test.go +++ b/libs/interpolation/parse_test.go @@ -1,6 +1,8 @@ package interpolation import ( + "encoding/json" + "os" "testing" "github.com/stretchr/testify/assert" @@ -245,3 +247,40 @@ func TestParseErrors(t *testing.T) { }) } } + +// TestParsePureVariableReferences loads shared test cases from +// testdata/variable_references.json and verifies the Go parser agrees +// on which strings are pure variable references. +// +// The same JSON file is consumed by the Python test suite +// (python/databricks_tests/core/test_variable_references.py) to +// verify that the Python regex stays in sync with the Go parser. +// +// When modifying the parser (e.g. adding new key patterns, escape +// sequences, or reference syntax), add test cases to the JSON file +// so both Go and Python are validated. +func TestParsePureVariableReferences(t *testing.T) { + data, err := os.ReadFile("testdata/variable_references.json") + require.NoError(t, err) + + var cases []struct { + Input string `json:"input"` + IsPureRef bool `json:"is_pure_ref"` + Path *string `json:"path,omitempty"` + Comment string `json:"comment"` + } + require.NoError(t, json.Unmarshal(data, &cases)) + + for _, tc := range cases { + t.Run(tc.Comment, func(t *testing.T) { + tokens, parseErr := Parse(tc.Input) + + isPure := parseErr == nil && len(tokens) == 1 && tokens[0].Kind == TokenRef + assert.Equal(t, tc.IsPureRef, isPure, "input: %s", tc.Input) + + if tc.IsPureRef && tc.Path != nil { + assert.Equal(t, *tc.Path, tokens[0].Value) + } + }) + } +} diff --git a/libs/interpolation/testdata/variable_references.json b/libs/interpolation/testdata/variable_references.json new file mode 100644 index 0000000000..748f7ef969 --- /dev/null +++ b/libs/interpolation/testdata/variable_references.json @@ -0,0 +1,142 @@ +[ + { + "input": "${a.b}", + "is_pure_ref": true, + "path": "a.b", + "comment": "simple two-segment path" + }, + { + "input": "${a.b.c}", + "is_pure_ref": true, + "path": "a.b.c", + "comment": "three-segment path" + }, + { + "input": "${a.b[0].c}", + "is_pure_ref": true, + "path": "a.b[0].c", + "comment": "path with index" + }, + { + "input": "${a[0]}", + "is_pure_ref": true, + "path": "a[0]", + "comment": "single key with index" + }, + { + "input": "${a.b[0][1]}", + "is_pure_ref": true, + "path": "a.b[0][1]", + "comment": "path with multiple indices" + }, + { + "input": "${a.b-c}", + "is_pure_ref": true, + "path": "a.b-c", + "comment": "hyphen in key" + }, + { + "input": "${a.b_c}", + "is_pure_ref": true, + "path": "a.b_c", + "comment": "underscore in key" + }, + { + "input": "${resources.jobs.my-job.id}", + "is_pure_ref": true, + "path": "resources.jobs.my-job.id", + "comment": "typical resource reference" + }, + { + "input": "${var.my_var}", + "is_pure_ref": true, + "path": "var.my_var", + "comment": "typical variable reference" + }, + { + "input": "${a}", + "is_pure_ref": true, + "path": "a", + "comment": "single key" + }, + { + "input": "hello", + "is_pure_ref": false, + "comment": "plain string, no reference" + }, + { + "input": "${a} ${b}", + "is_pure_ref": false, + "comment": "multiple references, not pure" + }, + { + "input": "pre ${a.b} post", + "is_pure_ref": false, + "comment": "reference with surrounding text" + }, + { + "input": "${a}${b}", + "is_pure_ref": false, + "comment": "adjacent references, not pure" + }, + { + "input": "", + "is_pure_ref": false, + "comment": "empty string" + }, + { + "input": "${}", + "is_pure_ref": false, + "comment": "empty reference" + }, + { + "input": "${a.b", + "is_pure_ref": false, + "comment": "unterminated reference" + }, + { + "input": "${foo.bar-}", + "is_pure_ref": false, + "comment": "trailing hyphen in key" + }, + { + "input": "${foo..bar}", + "is_pure_ref": false, + "comment": "double dot in path" + }, + { + "input": "${0foo}", + "is_pure_ref": false, + "comment": "leading digit in key" + }, + { + "input": "${foo. bar}", + "is_pure_ref": false, + "comment": "space in path" + }, + { + "input": "${foo.bar!}", + "is_pure_ref": false, + "comment": "special char in key" + }, + { + "input": "${foo.bar_}", + "is_pure_ref": false, + "comment": "trailing underscore in key" + }, + { + "input": "${foo._bar}", + "is_pure_ref": false, + "comment": "underscore-prefixed segment" + }, + { + "input": "$x", + "is_pure_ref": false, + "comment": "dollar without brace" + }, + { + "input": "abc$", + "is_pure_ref": false, + "comment": "trailing dollar" + } +] diff --git a/python/databricks/bundles/core/_transform.py b/python/databricks/bundles/core/_transform.py index 8cf639c2c1..05f98417fb 100644 --- a/python/databricks/bundles/core/_transform.py +++ b/python/databricks/bundles/core/_transform.py @@ -272,9 +272,14 @@ def _unwrap_variable(tpe: type) -> Optional[type]: return None -# Regex for string corresponding to variables. +# Regex for detecting pure variable references (entire string is a single ${...}). # -# The source of truth is regex in libs/dyn/dynvar/ref.go +# This regex must stay in sync with the Go parser in libs/interpolation/parse.go. +# The Go parser is the source of truth for interpolation; this regex only needs +# to recognize pure references so PyDABs can wrap them as Variable objects. +# +# Cross-language tests in libs/interpolation/testdata/variable_references.json +# verify that this regex and the Go parser agree. # # Example: # - "${a.b}" diff --git a/python/databricks_tests/core/test_variable_references.py b/python/databricks_tests/core/test_variable_references.py new file mode 100644 index 0000000000..21c41e8db2 --- /dev/null +++ b/python/databricks_tests/core/test_variable_references.py @@ -0,0 +1,45 @@ +"""Cross-language test for variable reference detection. + +Loads shared test cases from libs/interpolation/testdata/variable_references.json +and verifies the Python regex agrees with the Go parser on which strings are pure +variable references. + +The same JSON file is consumed by the Go test suite +(libs/interpolation/parse_test.go:TestParsePureVariableReferences). + +When modifying the Go parser (e.g. adding new key patterns, escape sequences, +or reference syntax), add test cases to the JSON file so both Go and Python +are validated. +""" + +import json +from pathlib import Path + +import pytest + +from databricks.bundles.core._transform import _unwrap_variable_path + +_testdata = ( + Path(__file__).resolve().parents[3] + / "libs" + / "interpolation" + / "testdata" + / "variable_references.json" +) +_cases = json.loads(_testdata.read_text()) + + +@pytest.mark.parametrize( + "tc", + _cases, + ids=[tc["comment"] for tc in _cases], +) +def test_pure_variable_reference(tc): + result = _unwrap_variable_path(tc["input"]) + + if tc["is_pure_ref"]: + assert result == tc.get("path"), ( + f"expected pure ref with path={tc.get('path')!r}, got {result!r}" + ) + else: + assert result is None, f"expected None for non-pure ref, got {result!r}" From dd3b6f90838b28cb4154246f34cc72e8b69d9b15 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 27 Mar 2026 17:53:48 +0100 Subject: [PATCH 7/8] Remove escape sequence support from interpolation parser Escape sequences (\$ and \\) are incompatible with multi-round variable resolution: escapes consumed in round N produce bare ${...} text that round N+1 incorrectly resolves as a real variable reference. Remove escape support entirely and defer to a follow-up if needed. Co-authored-by: Isaac --- NEXT_CHANGELOG.md | 2 +- .../variables/escape_sequences/databricks.yml | 6 -- .../variables/escape_sequences/out.test.toml | 5 -- .../variables/escape_sequences/output.txt | 16 ---- .../bundle/variables/escape_sequences/script | 1 - design/interpolation-parser.md | 45 ++++++----- libs/dyn/dynvar/ref.go | 21 +---- libs/dyn/dynvar/resolve_test.go | 12 --- libs/interpolation/parse.go | 40 ++-------- libs/interpolation/parse_test.go | 78 +------------------ 10 files changed, 37 insertions(+), 189 deletions(-) delete mode 100644 acceptance/bundle/variables/escape_sequences/databricks.yml delete mode 100644 acceptance/bundle/variables/escape_sequences/out.test.toml delete mode 100644 acceptance/bundle/variables/escape_sequences/output.txt delete mode 100644 acceptance/bundle/variables/escape_sequences/script diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 1c9dd9a4ae..e4e33987a9 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -12,7 +12,7 @@ * Deduplicate grant entries with duplicate principals or privileges during initialization ([#4801](https://github.com/databricks/cli/pull/4801)) * engine/direct: Fix unwanted recreation of secret scopes when scope_backend_type is not set ([#4834](https://github.com/databricks/cli/pull/4834)) -* Replace regex-based variable interpolation with a character scanner. Escape sequences `\$` and `\\` are now supported ([#4747](https://github.com/databricks/cli/pull/4747)). +* Replace regex-based variable interpolation with a character scanner ([#4747](https://github.com/databricks/cli/pull/4747)). ### Dependency updates diff --git a/acceptance/bundle/variables/escape_sequences/databricks.yml b/acceptance/bundle/variables/escape_sequences/databricks.yml deleted file mode 100644 index 4e1e732b0f..0000000000 --- a/acceptance/bundle/variables/escape_sequences/databricks.yml +++ /dev/null @@ -1,6 +0,0 @@ -bundle: - name: 'literal \${not_a_ref} and ${var.greeting}' - -variables: - greeting: - default: hello diff --git a/acceptance/bundle/variables/escape_sequences/out.test.toml b/acceptance/bundle/variables/escape_sequences/out.test.toml deleted file mode 100644 index d560f1de04..0000000000 --- a/acceptance/bundle/variables/escape_sequences/out.test.toml +++ /dev/null @@ -1,5 +0,0 @@ -Local = true -Cloud = false - -[EnvMatrix] - DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/escape_sequences/output.txt b/acceptance/bundle/variables/escape_sequences/output.txt deleted file mode 100644 index 827a328329..0000000000 --- a/acceptance/bundle/variables/escape_sequences/output.txt +++ /dev/null @@ -1,16 +0,0 @@ - ->>> [CLI] bundle validate -Warning: Please do not use variable interpolation in the name of your bundle. The name of your bundle -is a part of the path at which your bundle state is stored at by default. Parameterizing it at -runtime can have unexpected consequences like duplicate deployments or resources not being -cleaned up during bundle destroy. - at bundle.name - in databricks.yml:2:9 - -Name: literal ${not_a_ref} and hello -Target: default -Workspace: - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/literal ${not_a_ref} and hello/default - -Found 1 warning diff --git a/acceptance/bundle/variables/escape_sequences/script b/acceptance/bundle/variables/escape_sequences/script deleted file mode 100644 index 5350876150..0000000000 --- a/acceptance/bundle/variables/escape_sequences/script +++ /dev/null @@ -1 +0,0 @@ -trace $CLI bundle validate diff --git a/design/interpolation-parser.md b/design/interpolation-parser.md index 991a006278..a45cc30206 100644 --- a/design/interpolation-parser.md +++ b/design/interpolation-parser.md @@ -9,8 +9,7 @@ DABs variable interpolation (`${...}`) was regex-based. This caused: 1. **Silent failures** — `${foo.bar-}` silently treated as literal text with no warning. 2. **No suggestions** — `${bundle.nme}` produces "reference does not exist" with no hint. -3. **No escape mechanism** — no way to produce a literal `${` in output. -4. **No extensibility** — cannot support structured path features like key-value references `tasks[task_key="x"]` that exist in `libs/structs/structpath`. +3. **No extensibility** — cannot support structured path features like key-value references `tasks[task_key="x"]` that exist in `libs/structs/structpath`. ## Background: How Other Systems Parse `${...}` @@ -25,7 +24,7 @@ DABs variable interpolation (`${...}`) was regex-based. This caused: For a syntax as simple as `${path.to.var[0]}` (no nesting, no functions, no operators), a full recursive descent parser is overkill. A **two-mode character scanner** — the same core pattern used by Go's `text/template` and HCL — gives -proper error reporting and escape support without the complexity. +proper error reporting without the complexity. ## Design Decisions @@ -36,22 +35,26 @@ No AST, no recursive descent. Easy to port to the Python implementation. See `libs/interpolation/parse.go`. -### Nested `${` rejection +### Nested `${` support Nested `${...}` inside a reference (e.g., `${var.foo_${var.tail}}`) is -rejected as an error. This construct is ambiguous and was never intentionally -supported — the old regex happened to match only the innermost pair by -coincidence. - -### `\$` escape sequence - -`\$` produces a literal `$`, and `\\` produces a literal `\`. This follows -the same convention used by Bash for escaping `$` and is the least -surprising option for users working in shell environments. - -A standalone `\` before any character other than `$` or `\` is passed -through as a literal backslash, so existing configurations that happen to -contain backslashes are not affected. +supported. When the parser encounters a nested `${` inside an outer +reference, it treats the outer `${...` prefix as literal text so the inner +reference is resolved first. Multi-round resolution (up to 11 rounds in +`resolve_variable_references.go`) then progressively resolves from inside +out. For example, `${a_${b_${c}}}` resolves in 3 rounds. + +### No escape sequences + +Escape sequences (e.g. `\$` → `$`) are intentionally omitted. Variable +resolution uses multi-round processing (up to 11 rounds) where each round +re-parses all string values. Escape sequences consumed in round N would +produce bare `${...}` text that round N+1 would incorrectly resolve as a +real variable reference. A safe escape mechanism would require either +deferred consumption (escapes survive all rounds and are consumed in a +final pass) or sentinel characters, adding significant complexity. Since +there is no existing user demand for literal `${` in output, we defer +escape support to a follow-up if needed. ### Malformed reference warnings @@ -68,5 +71,9 @@ loop. Each `TokenRef` maps 1:1 to a resolved value, eliminating the ambiguity. ## Python sync -The Python regex in `python/databricks/bundles/core/_transform.py` needs a -corresponding update in a follow-up PR. +PyDABs uses a regex in `python/databricks/bundles/core/_transform.py` to +detect pure variable references (entire string is a single `${...}`). This +regex must stay in sync with the Go parser's key/path validation. Shared +test cases in `libs/interpolation/testdata/variable_references.json` are +consumed by both Go (`TestParsePureVariableReferences`) and Python +(`test_pure_variable_reference`) to verify agreement. diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go index 4025521096..d52d1fa772 100644 --- a/libs/dyn/dynvar/ref.go +++ b/libs/dyn/dynvar/ref.go @@ -57,8 +57,7 @@ func newRef(v dyn.Value) (Ref, bool, diag.Diagnostics) { }} } - // Check if any token is a variable reference or if escape sequences - // were processed (the reconstructed string differs from the original). + // Check if any token is a variable reference. hasRef := false for _, t := range tokens { if t.Kind == interpolation.TokenRef { @@ -68,11 +67,7 @@ func newRef(v dyn.Value) (Ref, bool, diag.Diagnostics) { } if !hasRef { - // Even without references, if escape sequences were processed we need - // to return a Ref so the resolver can reconstruct the literal string. - if !hasEscapes(s, tokens) { - return Ref{}, false, nil - } + return Ref{}, false, nil } return Ref{ @@ -100,18 +95,6 @@ func (v Ref) References() []string { return out } -// hasEscapes checks whether escape sequences were processed during parsing. -// Escape processing shortens the output (e.g., `\$` becomes `$`), so if the -// sum of token value lengths is less than the original string length, escapes -// were present. -func hasEscapes(original string, tokens []interpolation.Token) bool { - n := 0 - for _, t := range tokens { - n += len(t.Value) - } - return n < len(original) -} - // IsPureVariableReference returns true if s is a single variable reference // with no surrounding text. func IsPureVariableReference(s string) bool { diff --git a/libs/dyn/dynvar/resolve_test.go b/libs/dyn/dynvar/resolve_test.go index 748204ef90..959cf3344f 100644 --- a/libs/dyn/dynvar/resolve_test.go +++ b/libs/dyn/dynvar/resolve_test.go @@ -423,15 +423,3 @@ func TestResolveThreeLevelNestedVariableReference(t *testing.T) { assert.Equal(t, "${a_${b_z}}", getByPath(t, out, "final").MustString()) } -func TestResolveEscapedRef(t *testing.T) { - in := dyn.V(map[string]dyn.Value{ - "a": dyn.V("a"), - "b": dyn.V(`\${a}`), - }) - - out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) - require.NoError(t, err) - - // Escaped reference should produce literal ${a}. - assert.Equal(t, "${a}", getByPath(t, out, "b").MustString()) -} diff --git a/libs/interpolation/parse.go b/libs/interpolation/parse.go index 0f2004359e..ee0bb92ce9 100644 --- a/libs/interpolation/parse.go +++ b/libs/interpolation/parse.go @@ -36,10 +36,9 @@ type Token struct { } const ( - dollarChar = '$' - openBrace = '{' - closeBrace = '}' - backslashChar = '\\' + dollarChar = '$' + openBrace = '{' + closeBrace = '}' ) // keyPattern validates a single key segment in a variable path. @@ -50,8 +49,8 @@ const ( // (python/databricks/bundles/core/_transform.py). The patterns must stay in // sync. Cross-language test cases live in testdata/variable_references.json // and are run by both Go (TestParsePureVariableReferences) and Python -// (test_pure_variable_reference). When changing key/index/path validation, -// add cases to that file so both languages are tested. +// (test_pure_variable_reference). When changing key/index/path validation +// or reference syntax, add cases to that file so both languages are tested. var keyPattern = regexp.MustCompile(`^[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*$`) // indexPattern matches one or more [N] index suffixes. @@ -60,10 +59,6 @@ var indexPattern = regexp.MustCompile(`^(\[[0-9]+\])+$`) // Parse parses a string that may contain ${...} variable references. // It returns a slice of tokens representing literal text and variable references. // -// Escape sequences: -// - "\$" produces a literal "$" -// - "\\" produces a literal "\" -// // Nested references like "${a.${b}}" are supported by treating the outer // "${a." as literal text so that inner references are resolved first. // After resolution the resulting string (e.g. "${a.x}") is re-parsed. @@ -72,7 +67,6 @@ var indexPattern = regexp.MustCompile(`^(\[[0-9]+\])+$`) // - "hello" -> [Literal("hello")] // - "${a.b}" -> [Ref("a.b")] // - "pre ${a.b} post" -> [Literal("pre "), Ref("a.b"), Literal(" post")] -// - "\${a.b}" -> [Literal("${a.b}")] // - "${a.${b}}" -> [Literal("${a."), Ref("b"), Literal("}")] func Parse(s string) ([]Token, error) { if len(s) == 0 { @@ -98,30 +92,6 @@ func Parse(s string) ([]Token, error) { for i < len(s) { switch s[i] { - case backslashChar: - // Handle escape sequences: \$ -> $, \\ -> \ - if buf.Len() == 0 { - litStart = i - } - if i+1 < len(s) { - switch s[i+1] { - case dollarChar: - buf.WriteByte(dollarChar) - i += 2 - case backslashChar: - buf.WriteByte(backslashChar) - i += 2 - default: - // Not a recognized escape; treat backslash as literal. - buf.WriteByte(backslashChar) - i++ - } - } else { - // Trailing backslash at end of string: treat as literal. - buf.WriteByte(backslashChar) - i++ - } - case dollarChar: // We see '$'. Look ahead. if i+1 >= len(s) { diff --git a/libs/interpolation/parse_test.go b/libs/interpolation/parse_test.go index 6021e3b541..3c23eb6529 100644 --- a/libs/interpolation/parse_test.go +++ b/libs/interpolation/parse_test.go @@ -41,64 +41,6 @@ func TestParseValidPaths(t *testing.T) { } } -func TestParseEscapeSequences(t *testing.T) { - tests := []struct { - name string - input string - tokens []Token - }{ - { - "escaped_dollar", - `\$`, - []Token{{Kind: TokenLiteral, Value: "$", Start: 0, End: 2}}, - }, - { - "escaped_ref", - `\${a}`, - []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, - }, - { - "escaped_backslash", - `\\`, - []Token{{Kind: TokenLiteral, Value: `\`, Start: 0, End: 2}}, - }, - { - "double_escaped_backslash", - `\\\\`, - []Token{{Kind: TokenLiteral, Value: `\\`, Start: 0, End: 4}}, - }, - { - "escaped_backslash_then_ref", - `\\${a.b}`, - []Token{ - {Kind: TokenLiteral, Value: `\`, Start: 0, End: 2}, - {Kind: TokenRef, Value: "a.b", Start: 2, End: 8}, - }, - }, - { - "backslash_before_non_special", - `\n`, - []Token{{Kind: TokenLiteral, Value: `\n`, Start: 0, End: 2}}, - }, - { - "escaped_dollar_then_ref", - `\$\$${a.b}`, - []Token{ - {Kind: TokenLiteral, Value: "$$", Start: 0, End: 4}, - {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tokens, err := Parse(tt.input) - require.NoError(t, err) - assert.Equal(t, tt.tokens, tokens) - }) - } -} - func TestParse(t *testing.T) { tests := []struct { name string @@ -166,20 +108,6 @@ func TestParse(t *testing.T) { `abc\`, []Token{{Kind: TokenLiteral, Value: `abc\`, Start: 0, End: 4}}, }, - { - "escaped_ref", - `\${a}`, - []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, - }, - { - "escape_between_refs", - `${a}\$${b}`, - []Token{ - {Kind: TokenRef, Value: "a", Start: 0, End: 4}, - {Kind: TokenLiteral, Value: "$", Start: 4, End: 6}, - {Kind: TokenRef, Value: "b", Start: 6, End: 10}, - }, - }, { "nested_ref", "${var.foo_${var.tail}}", @@ -256,9 +184,9 @@ func TestParseErrors(t *testing.T) { // (python/databricks_tests/core/test_variable_references.py) to // verify that the Python regex stays in sync with the Go parser. // -// When modifying the parser (e.g. adding new key patterns, escape -// sequences, or reference syntax), add test cases to the JSON file -// so both Go and Python are validated. +// When modifying the parser (e.g. adding new key patterns or +// reference syntax), add test cases to the JSON file so both Go +// and Python are validated. func TestParsePureVariableReferences(t *testing.T) { data, err := os.ReadFile("testdata/variable_references.json") require.NoError(t, err) From 033b1a2b11c33ec6be8944c8ae59a93339ceb4d6 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 27 Mar 2026 18:07:41 +0100 Subject: [PATCH 8/8] Remove design doc from PR Co-authored-by: Isaac --- design/interpolation-parser.md | 79 ---------------------------------- 1 file changed, 79 deletions(-) delete mode 100644 design/interpolation-parser.md diff --git a/design/interpolation-parser.md b/design/interpolation-parser.md deleted file mode 100644 index a45cc30206..0000000000 --- a/design/interpolation-parser.md +++ /dev/null @@ -1,79 +0,0 @@ -# Variable Interpolation: Character Scanner Parser - -Author: Shreyas Goenka -Date: 12 March 2026 - -## Motivation - -DABs variable interpolation (`${...}`) was regex-based. This caused: - -1. **Silent failures** — `${foo.bar-}` silently treated as literal text with no warning. -2. **No suggestions** — `${bundle.nme}` produces "reference does not exist" with no hint. -3. **No extensibility** — cannot support structured path features like key-value references `tasks[task_key="x"]` that exist in `libs/structs/structpath`. - -## Background: How Other Systems Parse `${...}` - -| System | Strategy | Escape | Error Quality | -|--------|----------|--------|---------------| -| Go `text/template` | State-function lexer | None | Line + template name | -| HCL2 (Terraform) | Ragel FSM + recursive descent | `$${` → literal `${` | Source range + suggestions | -| Python f-strings | Mode-stack tokenizer | `{{` → `{` | Line/column | -| Rust `format!` | Iterator-based descent | `{{`/`}}` | Spans + suggestions | -| Bash | Char-by-char + depth tracking | `\$` | Line number | - -For a syntax as simple as `${path.to.var[0]}` (no nesting, no functions, no -operators), a full recursive descent parser is overkill. A **two-mode character -scanner** — the same core pattern used by Go's `text/template` and HCL — gives -proper error reporting without the complexity. - -## Design Decisions - -### Two-mode character scanner - -A two-mode scanner (TEXT / REFERENCE) that produces a flat list of tokens. -No AST, no recursive descent. Easy to port to the Python implementation. - -See `libs/interpolation/parse.go`. - -### Nested `${` support - -Nested `${...}` inside a reference (e.g., `${var.foo_${var.tail}}`) is -supported. When the parser encounters a nested `${` inside an outer -reference, it treats the outer `${...` prefix as literal text so the inner -reference is resolved first. Multi-round resolution (up to 11 rounds in -`resolve_variable_references.go`) then progressively resolves from inside -out. For example, `${a_${b_${c}}}` resolves in 3 rounds. - -### No escape sequences - -Escape sequences (e.g. `\$` → `$`) are intentionally omitted. Variable -resolution uses multi-round processing (up to 11 rounds) where each round -re-parses all string values. Escape sequences consumed in round N would -produce bare `${...}` text that round N+1 would incorrectly resolve as a -real variable reference. A safe escape mechanism would require either -deferred consumption (escapes survive all rounds and are consumed in a -final pass) or sentinel characters, adding significant complexity. Since -there is no existing user demand for literal `${` in output, we defer -escape support to a follow-up if needed. - -### Malformed reference warnings - -A standalone `WarnMalformedReferences` mutator walks the config tree once -before variable resolution. It only checks values with source locations -(`len(v.Locations()) > 0`) to avoid false positives from synthesized values -(e.g., normalized/computed paths). - -### Token-based resolution - -The resolver's string interpolation changed from `strings.Replace` (with -count=1 to avoid double-replacing duplicate refs) to a token concatenation -loop. Each `TokenRef` maps 1:1 to a resolved value, eliminating the ambiguity. - -## Python sync - -PyDABs uses a regex in `python/databricks/bundles/core/_transform.py` to -detect pure variable references (entire string is a single `${...}`). This -regex must stay in sync with the Go parser's key/path validation. Shared -test cases in `libs/interpolation/testdata/variable_references.json` are -consumed by both Go (`TestParsePureVariableReferences`) and Python -(`test_pure_variable_reference`) to verify agreement.