Skip to content

Commit 74ff2e2

Browse files
Add cross-language tests for variable reference parsing
Shared test cases in libs/interpolation/testdata/variable_references.json are consumed by both the Go parser (TestParsePureVariableReferences) and the Python regex (test_pure_variable_reference) to verify they agree on which strings are pure variable references. When modifying the parser, add test cases to the JSON file so both languages are validated. Co-authored-by: Isaac
1 parent fdba4f1 commit 74ff2e2

File tree

4 files changed

+236
-2
lines changed

4 files changed

+236
-2
lines changed

libs/interpolation/parse_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package interpolation
22

33
import (
4+
"encoding/json"
5+
"os"
46
"testing"
57

68
"github.com/stretchr/testify/assert"
@@ -245,3 +247,40 @@ func TestParseErrors(t *testing.T) {
245247
})
246248
}
247249
}
250+
251+
// TestParsePureVariableReferences loads shared test cases from
252+
// testdata/variable_references.json and verifies the Go parser agrees
253+
// on which strings are pure variable references.
254+
//
255+
// The same JSON file is consumed by the Python test suite
256+
// (python/databricks_tests/core/test_variable_references.py) to
257+
// verify that the Python regex stays in sync with the Go parser.
258+
//
259+
// When modifying the parser (e.g. adding new key patterns, escape
260+
// sequences, or reference syntax), add test cases to the JSON file
261+
// so both Go and Python are validated.
262+
func TestParsePureVariableReferences(t *testing.T) {
263+
data, err := os.ReadFile("testdata/variable_references.json")
264+
require.NoError(t, err)
265+
266+
var cases []struct {
267+
Input string `json:"input"`
268+
IsPureRef bool `json:"is_pure_ref"`
269+
Path *string `json:"path,omitempty"`
270+
Comment string `json:"comment"`
271+
}
272+
require.NoError(t, json.Unmarshal(data, &cases))
273+
274+
for _, tc := range cases {
275+
t.Run(tc.Comment, func(t *testing.T) {
276+
tokens, parseErr := Parse(tc.Input)
277+
278+
isPure := parseErr == nil && len(tokens) == 1 && tokens[0].Kind == TokenRef
279+
assert.Equal(t, tc.IsPureRef, isPure, "input: %s", tc.Input)
280+
281+
if tc.IsPureRef && tc.Path != nil {
282+
assert.Equal(t, *tc.Path, tokens[0].Value)
283+
}
284+
})
285+
}
286+
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
[
2+
{
3+
"input": "${a.b}",
4+
"is_pure_ref": true,
5+
"path": "a.b",
6+
"comment": "simple two-segment path"
7+
},
8+
{
9+
"input": "${a.b.c}",
10+
"is_pure_ref": true,
11+
"path": "a.b.c",
12+
"comment": "three-segment path"
13+
},
14+
{
15+
"input": "${a.b[0].c}",
16+
"is_pure_ref": true,
17+
"path": "a.b[0].c",
18+
"comment": "path with index"
19+
},
20+
{
21+
"input": "${a[0]}",
22+
"is_pure_ref": true,
23+
"path": "a[0]",
24+
"comment": "single key with index"
25+
},
26+
{
27+
"input": "${a.b[0][1]}",
28+
"is_pure_ref": true,
29+
"path": "a.b[0][1]",
30+
"comment": "path with multiple indices"
31+
},
32+
{
33+
"input": "${a.b-c}",
34+
"is_pure_ref": true,
35+
"path": "a.b-c",
36+
"comment": "hyphen in key"
37+
},
38+
{
39+
"input": "${a.b_c}",
40+
"is_pure_ref": true,
41+
"path": "a.b_c",
42+
"comment": "underscore in key"
43+
},
44+
{
45+
"input": "${resources.jobs.my-job.id}",
46+
"is_pure_ref": true,
47+
"path": "resources.jobs.my-job.id",
48+
"comment": "typical resource reference"
49+
},
50+
{
51+
"input": "${var.my_var}",
52+
"is_pure_ref": true,
53+
"path": "var.my_var",
54+
"comment": "typical variable reference"
55+
},
56+
{
57+
"input": "${a}",
58+
"is_pure_ref": true,
59+
"path": "a",
60+
"comment": "single key"
61+
},
62+
{
63+
"input": "hello",
64+
"is_pure_ref": false,
65+
"comment": "plain string, no reference"
66+
},
67+
{
68+
"input": "${a} ${b}",
69+
"is_pure_ref": false,
70+
"comment": "multiple references, not pure"
71+
},
72+
{
73+
"input": "pre ${a.b} post",
74+
"is_pure_ref": false,
75+
"comment": "reference with surrounding text"
76+
},
77+
{
78+
"input": "${a}${b}",
79+
"is_pure_ref": false,
80+
"comment": "adjacent references, not pure"
81+
},
82+
{
83+
"input": "",
84+
"is_pure_ref": false,
85+
"comment": "empty string"
86+
},
87+
{
88+
"input": "${}",
89+
"is_pure_ref": false,
90+
"comment": "empty reference"
91+
},
92+
{
93+
"input": "${a.b",
94+
"is_pure_ref": false,
95+
"comment": "unterminated reference"
96+
},
97+
{
98+
"input": "${foo.bar-}",
99+
"is_pure_ref": false,
100+
"comment": "trailing hyphen in key"
101+
},
102+
{
103+
"input": "${foo..bar}",
104+
"is_pure_ref": false,
105+
"comment": "double dot in path"
106+
},
107+
{
108+
"input": "${0foo}",
109+
"is_pure_ref": false,
110+
"comment": "leading digit in key"
111+
},
112+
{
113+
"input": "${foo. bar}",
114+
"is_pure_ref": false,
115+
"comment": "space in path"
116+
},
117+
{
118+
"input": "${foo.bar!}",
119+
"is_pure_ref": false,
120+
"comment": "special char in key"
121+
},
122+
{
123+
"input": "${foo.bar_}",
124+
"is_pure_ref": false,
125+
"comment": "trailing underscore in key"
126+
},
127+
{
128+
"input": "${foo._bar}",
129+
"is_pure_ref": false,
130+
"comment": "underscore-prefixed segment"
131+
},
132+
{
133+
"input": "$x",
134+
"is_pure_ref": false,
135+
"comment": "dollar without brace"
136+
},
137+
{
138+
"input": "abc$",
139+
"is_pure_ref": false,
140+
"comment": "trailing dollar"
141+
}
142+
]

python/databricks/bundles/core/_transform.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,9 +272,17 @@ def _unwrap_variable(tpe: type) -> Optional[type]:
272272
return None
273273

274274

275-
# Regex for string corresponding to variables.
275+
# Regex for detecting pure variable references (entire string is a single ${...}).
276276
#
277-
# The source of truth is regex in libs/dyn/dynvar/ref.go
277+
# The base key pattern (_base_var_def) and the overall reference structure
278+
# (dot-separated keys with optional [N] indices) must stay in sync with
279+
# keyPattern, indexPattern, and validatePath in libs/interpolation/parse.go.
280+
# The Go parser is the source of truth for interpolation; this regex only needs
281+
# to recognize pure references so PyDABs can wrap them as Variable objects.
282+
#
283+
# Cross-language tests in libs/interpolation/testdata/variable_references.json
284+
# verify that this regex and the Go parser agree. When modifying the Go parser,
285+
# add cases there so both languages are validated.
278286
#
279287
# Example:
280288
# - "${a.b}"
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""Cross-language test for variable reference detection.
2+
3+
Loads shared test cases from libs/interpolation/testdata/variable_references.json
4+
and verifies the Python regex agrees with the Go parser on which strings are pure
5+
variable references.
6+
7+
The same JSON file is consumed by the Go test suite
8+
(libs/interpolation/parse_test.go:TestParsePureVariableReferences).
9+
10+
When modifying the Go parser (e.g. adding new key patterns, escape sequences,
11+
or reference syntax), add test cases to the JSON file so both Go and Python
12+
are validated.
13+
"""
14+
15+
import json
16+
from pathlib import Path
17+
18+
import pytest
19+
20+
from databricks.bundles.core._transform import _unwrap_variable_path
21+
22+
_testdata = (
23+
Path(__file__).resolve().parents[3]
24+
/ "libs"
25+
/ "interpolation"
26+
/ "testdata"
27+
/ "variable_references.json"
28+
)
29+
_cases = json.loads(_testdata.read_text())
30+
31+
32+
@pytest.mark.parametrize(
33+
"tc",
34+
_cases,
35+
ids=[tc["comment"] for tc in _cases],
36+
)
37+
def test_pure_variable_reference(tc):
38+
result = _unwrap_variable_path(tc["input"])
39+
40+
if tc["is_pure_ref"]:
41+
assert result == tc.get("path"), (
42+
f"expected pure ref with path={tc.get('path')!r}, got {result!r}"
43+
)
44+
else:
45+
assert result is None, f"expected None for non-pure ref, got {result!r}"

0 commit comments

Comments
 (0)