Skip to content

Commit e5c1351

Browse files
Replace JSON schema validation in CI with Go test (#4848)
## Summary - Remove the `ajv-cli` npm dependency from CI workflow - Add a Go unit test (`bundle/schema/validate_test.go`) using `google/jsonschema-go` that validates the generated bundle schema against the existing pass/fail test data files - The nested `$defs` structure (mirroring Go import paths) is flattened into flat keys with `$ref` values rewritten to use JSON Pointer `~1` escaping, for compatibility with the Google library ## Test plan - [x] `go test ./bundle/schema/ -v` passes all tests (12 pass cases, 14 fail cases) This pull request was AI-assisted by Isaac.
1 parent 457cfdc commit e5c1351

File tree

5 files changed

+219
-31
lines changed

5 files changed

+219
-31
lines changed

.github/workflows/push.yml

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -306,37 +306,6 @@ jobs:
306306
exit 1
307307
fi
308308
309-
# Github repo: https://github.com/ajv-validator/ajv-cli
310-
- name: Install ajv-cli
311-
run: npm install -g ajv-cli@5.0.0
312-
313-
# Assert that the generated bundle schema is a valid JSON schema by using
314-
# ajv-cli to validate it against bundle configuration files.
315-
# By default the ajv-cli runs in strict mode which will fail if the schema
316-
# itself is not valid. Strict mode is more strict than the JSON schema
317-
# specification. See for details: https://ajv.js.org/options.html#strict-mode-options
318-
# The ajv-cli is configured to use the markdownDescription keyword which is not part of the JSON schema specification,
319-
# but is used in editors like VSCode to render markdown in the description field
320-
- name: Validate bundle schema
321-
run: |
322-
go run main.go bundle schema > schema.json
323-
324-
# Add markdownDescription keyword to ajv
325-
echo "module.exports = function(a) {
326-
a.addKeyword('deprecationMessage');
327-
a.addKeyword('doNotSuggest');
328-
a.addKeyword('markdownDescription');
329-
a.addKeyword('x-databricks-preview');
330-
}" >> keywords.js
331-
332-
for file in ./bundle/internal/schema/testdata/pass/*.yml; do
333-
ajv test -s schema.json -d $file --valid -c=./keywords.js
334-
done
335-
336-
for file in ./bundle/internal/schema/testdata/fail/*.yml; do
337-
ajv test -s schema.json -d $file --invalid -c=./keywords.js
338-
done
339-
340309
validate-python-codegen:
341310
needs: cleanups
342311
runs-on: ubuntu-latest

NOTICE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ License - https://github.com/manifoldco/promptui/blob/master/LICENSE.md
7474

7575
This Software contains code from the following open source projects, licensed under the MIT license:
7676

77+
google/jsonschema-go - https://github.com/google/jsonschema-go
78+
Copyright 2025 Google LLC
79+
License - https://github.com/google/jsonschema-go/blob/main/LICENSE
80+
7781
charmbracelet/bubbles - https://github.com/charmbracelet/bubbles
7882
Copyright (c) 2020-2025 Charmbracelet, Inc
7983
License - https://github.com/charmbracelet/bubbles/blob/master/LICENSE

bundle/schema/validate_test.go

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
package schema_test
2+
3+
import (
4+
"encoding/json"
5+
"os"
6+
"path/filepath"
7+
"strings"
8+
"testing"
9+
10+
"github.com/databricks/cli/bundle/schema"
11+
googleschema "github.com/google/jsonschema-go/jsonschema"
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
"go.yaml.in/yaml/v3"
15+
)
16+
17+
// isSchemaNode returns true if the object is a JSON Schema definition rather
18+
// than an intermediate nesting node in the $defs tree. Intermediate nodes only
19+
// have map[string]any values (more nesting), while schema definitions always
20+
// have at least one non-object value ("type" is a string, "oneOf" is an array,
21+
// etc.). An empty object {} is also a valid schema (it accepts any value).
22+
func isSchemaNode(obj map[string]any) bool {
23+
if len(obj) == 0 {
24+
return true
25+
}
26+
for _, v := range obj {
27+
if _, isObj := v.(map[string]any); !isObj {
28+
return true
29+
}
30+
}
31+
return false
32+
}
33+
34+
// flattenDefs flattens the nested $defs object tree into a single-level map.
35+
// Nested path segments are joined with "/" to form flat keys.
36+
// e.g., $defs["github.com"]["databricks"]["resources.Job"] becomes
37+
// $defs["github.com/databricks/resources.Job"].
38+
func flattenDefs(defs map[string]any) map[string]any {
39+
result := map[string]any{}
40+
flattenDefsHelper("", defs, result)
41+
return result
42+
}
43+
44+
func flattenDefsHelper(prefix string, node, result map[string]any) {
45+
for key, value := range node {
46+
fullKey := prefix + "/" + key
47+
if prefix == "" {
48+
fullKey = key
49+
}
50+
51+
obj, isObj := value.(map[string]any)
52+
if !isObj || isSchemaNode(obj) {
53+
result[fullKey] = value
54+
} else {
55+
flattenDefsHelper(fullKey, obj, result)
56+
}
57+
}
58+
}
59+
60+
// rewriteRefs recursively walks a JSON value and rewrites all $ref strings.
61+
// After flattening, $defs keys contain literal "/" characters. In JSON Pointer
62+
// (RFC 6901) "/" is the path separator, so these must be escaped as "~1" in
63+
// $ref values to be treated as a single key lookup.
64+
func rewriteRefs(v any) any {
65+
switch val := v.(type) {
66+
case map[string]any:
67+
result := make(map[string]any, len(val))
68+
for k, child := range val {
69+
if k == "$ref" {
70+
if s, ok := child.(string); ok {
71+
result[k] = rewriteRef(s)
72+
} else {
73+
result[k] = child
74+
}
75+
} else {
76+
result[k] = rewriteRefs(child)
77+
}
78+
}
79+
return result
80+
case []any:
81+
result := make([]any, len(val))
82+
for i, item := range val {
83+
result[i] = rewriteRefs(item)
84+
}
85+
return result
86+
default:
87+
return v
88+
}
89+
}
90+
91+
// rewriteRef transforms a $ref from nested JSON Pointer format to flat key format.
92+
// e.g., "#/$defs/github.com/databricks/resources.Job"
93+
// becomes "#/$defs/github.com~1databricks~1resources.Job"
94+
func rewriteRef(ref string) string {
95+
const prefix = "#/$defs/"
96+
if !strings.HasPrefix(ref, prefix) {
97+
return ref
98+
}
99+
path := ref[len(prefix):]
100+
return prefix + strings.ReplaceAll(path, "/", "~1")
101+
}
102+
103+
// transformSchema flattens nested $defs and rewrites $ref values for compatibility
104+
// with the Google jsonschema-go library which expects flat $defs.
105+
func transformSchema(raw map[string]any) map[string]any {
106+
if defs, ok := raw["$defs"].(map[string]any); ok {
107+
raw["$defs"] = flattenDefs(defs)
108+
}
109+
return rewriteRefs(raw).(map[string]any)
110+
}
111+
112+
func compileSchema(t *testing.T) *googleschema.Resolved {
113+
t.Helper()
114+
115+
var raw map[string]any
116+
err := json.Unmarshal(schema.Bytes, &raw)
117+
require.NoError(t, err)
118+
119+
transformed := transformSchema(raw)
120+
121+
b, err := json.Marshal(transformed)
122+
require.NoError(t, err)
123+
124+
var s googleschema.Schema
125+
err = json.Unmarshal(b, &s)
126+
require.NoError(t, err)
127+
128+
resolved, err := s.Resolve(nil)
129+
require.NoError(t, err)
130+
131+
return resolved
132+
}
133+
134+
// loadYAMLAsJSON reads a YAML file and returns it as a JSON-compatible any value.
135+
// The YAML -> JSON roundtrip ensures canonical JSON types (float64, string, bool, nil,
136+
// map[string]any, []any) that the JSON schema validator expects.
137+
func loadYAMLAsJSON(t *testing.T, path string) any {
138+
t.Helper()
139+
140+
data, err := os.ReadFile(path)
141+
require.NoError(t, err)
142+
143+
var yamlVal any
144+
err = yaml.Unmarshal(data, &yamlVal)
145+
require.NoError(t, err)
146+
147+
jsonBytes, err := json.Marshal(yamlVal)
148+
require.NoError(t, err)
149+
150+
var instance any
151+
err = json.Unmarshal(jsonBytes, &instance)
152+
require.NoError(t, err)
153+
154+
return instance
155+
}
156+
157+
func TestSchemaValidatePassCases(t *testing.T) {
158+
sch := compileSchema(t)
159+
160+
files, err := filepath.Glob("../internal/schema/testdata/pass/*.yml")
161+
require.NoError(t, err)
162+
require.NotEmpty(t, files)
163+
164+
for _, file := range files {
165+
t.Run(filepath.Base(file), func(t *testing.T) {
166+
instance := loadYAMLAsJSON(t, file)
167+
err := sch.Validate(instance)
168+
assert.NoError(t, err)
169+
})
170+
}
171+
}
172+
173+
func TestSchemaValidateFailCases(t *testing.T) {
174+
sch := compileSchema(t)
175+
176+
// Each entry maps a test file to the expected schema path in the error.
177+
// The bundle schema wraps every type in oneOf for interpolation patterns,
178+
// and the Google library discards per-branch errors on oneOf failure, so
179+
// we can only assert on the schema path, not the specific failure reason.
180+
tests := map[string]string{
181+
"basic.yml": "config.Bundle",
182+
"deprecated_job_field_format.yml": "config.Resources",
183+
"hidden_job_field_deployment.yml": "config.Resources",
184+
"hidden_job_field_edit_mode.yml": "config.Target",
185+
"incorrect_volume_type.yml": "config.Resources",
186+
"invalid_enum_value_in_job.yml": "config.Resources",
187+
"invalid_enum_value_in_model.yml": "config.Resources",
188+
"invalid_reference_in_job.yml": "config.Resources",
189+
"invalid_reference_in_model.yml": "config.Resources",
190+
"readonly_job_field_git_snapshot.yml": "config.Resources",
191+
"readonly_job_field_job_source.yml": "config.Resources",
192+
"required_field_missing_in_job.yml": "config.Resources",
193+
"unknown_field_in_job.yml": "config.Resources",
194+
"unknown_field_in_model.yml": "config.Resources",
195+
}
196+
197+
files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml")
198+
require.NoError(t, err)
199+
require.NotEmpty(t, files)
200+
201+
for _, file := range files {
202+
name := filepath.Base(file)
203+
expectedErr, ok := tests[name]
204+
require.True(t, ok, "no expected error for %s, please add an entry to the test table", name)
205+
206+
t.Run(name, func(t *testing.T) {
207+
instance := loadYAMLAsJSON(t, file)
208+
err := sch.Validate(instance)
209+
assert.ErrorContains(t, err, expectedErr)
210+
})
211+
}
212+
}

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ require (
1414
github.com/charmbracelet/lipgloss v1.1.0 // MIT
1515
github.com/databricks/databricks-sdk-go v0.126.0 // Apache 2.0
1616
github.com/fatih/color v1.19.0 // MIT
17+
github.com/google/jsonschema-go v0.4.2 // MIT
1718
github.com/google/uuid v1.6.0 // BSD-3-Clause
1819
github.com/gorilla/mux v1.8.1 // BSD 3-Clause
1920
github.com/gorilla/websocket v1.5.3 // BSD 2-Clause

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
111111
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
112112
github.com/google/go-querystring v1.2.0 h1:yhqkPbu2/OH+V9BfpCVPZkNmUXhb2gBxJArfhIxNtP0=
113113
github.com/google/go-querystring v1.2.0/go.mod h1:8IFJqpSRITyJ8QhQ13bmbeMBDfmeEJZD5A0egEOmkqU=
114+
github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8=
115+
github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
114116
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
115117
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
116118
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=

0 commit comments

Comments
 (0)