From b05d280822a01f7729cd0452de27fb94ea7a2c9d Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:11:14 +0100 Subject: [PATCH 01/12] Replace ajv npm-based JSON schema validation with Go unit test Remove the ajv-cli npm dependency from CI and replace it with a Go test using santhosh-tekuri/jsonschema/v6. This validates the generated bundle schema against pass/fail test cases entirely in Go. Co-authored-by: Isaac --- .github/workflows/push.yml | 30 ------------ bundle/schema/validate_test.go | 86 ++++++++++++++++++++++++++++++++++ go.mod | 2 + go.sum | 4 ++ 4 files changed, 92 insertions(+), 30 deletions(-) create mode 100644 bundle/schema/validate_test.go diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index c0a731b356..0333ee8842 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -306,36 +306,6 @@ jobs: exit 1 fi - # Github repo: https://github.com/ajv-validator/ajv-cli - - name: Install ajv-cli - run: npm install -g ajv-cli@5.0.0 - - # Assert that the generated bundle schema is a valid JSON schema by using - # ajv-cli to validate it against bundle configuration files. - # By default the ajv-cli runs in strict mode which will fail if the schema - # itself is not valid. Strict mode is more strict than the JSON schema - # specification. See for details: https://ajv.js.org/options.html#strict-mode-options - # The ajv-cli is configured to use the markdownDescription keyword which is not part of the JSON schema specification, - # but is used in editors like VSCode to render markdown in the description field - - name: Validate bundle schema - run: | - go run main.go bundle schema > schema.json - - # Add markdownDescription keyword to ajv - echo "module.exports = function(a) { - a.addKeyword('deprecationMessage'); - a.addKeyword('doNotSuggest'); - a.addKeyword('markdownDescription'); - a.addKeyword('x-databricks-preview'); - }" >> keywords.js - - for file in ./bundle/internal/schema/testdata/pass/*.yml; do - ajv test -s schema.json -d $file --valid -c=./keywords.js - done - - for file in ./bundle/internal/schema/testdata/fail/*.yml; do - ajv test -s schema.json -d $file --invalid -c=./keywords.js - done validate-python-codegen: needs: cleanups diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go new file mode 100644 index 0000000000..96b80d8e75 --- /dev/null +++ b/bundle/schema/validate_test.go @@ -0,0 +1,86 @@ +package schema_test + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle/schema" + "github.com/santhosh-tekuri/jsonschema/v6" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.yaml.in/yaml/v3" +) + +func compileSchema(t *testing.T) *jsonschema.Schema { + t.Helper() + + var schemaVal any + err := json.Unmarshal(schema.Bytes, &schemaVal) + require.NoError(t, err) + + c := jsonschema.NewCompiler() + err = c.AddResource("schema.json", schemaVal) + require.NoError(t, err) + + sch, err := c.Compile("schema.json") + require.NoError(t, err) + + return sch +} + +// loadYAMLAsJSON reads a YAML file and returns it as a JSON-compatible any value. +// The YAML -> JSON roundtrip ensures canonical JSON types (float64, string, bool, nil, +// map[string]any, []any) that the JSON schema validator expects. +func loadYAMLAsJSON(t *testing.T, path string) any { + t.Helper() + + data, err := os.ReadFile(path) + require.NoError(t, err) + + var yamlVal any + err = yaml.Unmarshal(data, &yamlVal) + require.NoError(t, err) + + jsonBytes, err := json.Marshal(yamlVal) + require.NoError(t, err) + + var instance any + err = json.Unmarshal(jsonBytes, &instance) + require.NoError(t, err) + + return instance +} + +func TestSchemaValidatePassCases(t *testing.T) { + sch := compileSchema(t) + + files, err := filepath.Glob("../internal/schema/testdata/pass/*.yml") + require.NoError(t, err) + require.NotEmpty(t, files) + + for _, file := range files { + t.Run(filepath.Base(file), func(t *testing.T) { + instance := loadYAMLAsJSON(t, file) + err := sch.Validate(instance) + assert.NoError(t, err) + }) + } +} + +func TestSchemaValidateFailCases(t *testing.T) { + sch := compileSchema(t) + + files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml") + require.NoError(t, err) + require.NotEmpty(t, files) + + for _, file := range files { + t.Run(filepath.Base(file), func(t *testing.T) { + instance := loadYAMLAsJSON(t, file) + err := sch.Validate(instance) + assert.Error(t, err) + }) + } +} diff --git a/go.mod b/go.mod index 4cf2047691..7f8674631e 100644 --- a/go.mod +++ b/go.mod @@ -48,6 +48,8 @@ require gopkg.in/yaml.v3 v3.0.1 // indirect // Dependencies for experimental SSH commands require github.com/tailscale/hujson v0.0.0-20250605163823-992244df8c5a // BSD-3-Clause +require github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 + require ( cloud.google.com/go/auth v0.18.1 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect diff --git a/go.sum b/go.sum index affad9a015..8f81f0c690 100644 --- a/go.sum +++ b/go.sum @@ -80,6 +80,8 @@ github.com/databricks/databricks-sdk-go v0.126.0/go.mod h1:hWoHnHbNLjPKiTm5K/7bc github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= +github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= @@ -193,6 +195,8 @@ github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= From 19ddf222e6c66c33676e6ff04d1cc643787ec076 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:20:47 +0100 Subject: [PATCH 02/12] Use google/jsonschema-go instead of santhosh-tekuri/jsonschema Transform the nested $defs structure (which mirrors Go import paths) into flat keys before passing to the Google library, and rewrite $ref values to use JSON Pointer ~1 escaping for the flattened keys. Co-authored-by: Isaac --- bundle/schema/validate_test.go | 135 +++++++++++++++++++++++++++++++-- go.mod | 2 +- go.sum | 6 +- 3 files changed, 130 insertions(+), 13 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 96b80d8e75..f49172968e 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -4,30 +4,149 @@ import ( "encoding/json" "os" "path/filepath" + "strings" "testing" "github.com/databricks/cli/bundle/schema" - "github.com/santhosh-tekuri/jsonschema/v6" + googleschema "github.com/google/jsonschema-go/jsonschema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.yaml.in/yaml/v3" ) -func compileSchema(t *testing.T) *jsonschema.Schema { +// schemaKeywords is the set of recognized JSON Schema keywords used to distinguish +// schema definitions from intermediate nesting nodes in $defs. +var schemaKeywords = map[string]bool{ + "$anchor": true, "$defs": true, "$id": true, "$ref": true, "$schema": true, + "additionalItems": true, "additionalProperties": true, + "allOf": true, "anyOf": true, + "const": true, "contains": true, + "default": true, "deprecated": true, "description": true, + "else": true, "enum": true, "exclusiveMaximum": true, "exclusiveMinimum": true, + "format": true, + "if": true, "items": true, + "maxItems": true, "maxLength": true, "maxProperties": true, + "maximum": true, "minItems": true, "minLength": true, "minProperties": true, + "minimum": true, "multipleOf": true, + "not": true, "oneOf": true, + "pattern": true, "patternProperties": true, "prefixItems": true, "properties": true, + "required": true, + "then": true, "title": true, "type": true, + "uniqueItems": true, +} + +// isSchemaNode returns true if the object is a JSON Schema definition +// (has recognized keywords or is empty, which is the "anything" schema). +func isSchemaNode(obj map[string]any) bool { + if len(obj) == 0 { + return true + } + for key := range obj { + if schemaKeywords[key] { + return true + } + } + return false +} + +// flattenDefs flattens the nested $defs object tree into a single-level map. +// Nested path segments are joined with "/" to form flat keys. +// e.g., $defs["github.com"]["databricks"]["resources.Job"] becomes +// $defs["github.com/databricks/resources.Job"]. +func flattenDefs(defs map[string]any) map[string]any { + result := map[string]any{} + flattenDefsHelper("", defs, result) + return result +} + +func flattenDefsHelper(prefix string, node map[string]any, result map[string]any) { + for key, value := range node { + fullKey := key + if prefix != "" { + fullKey = prefix + "/" + key + } + + obj, isObj := value.(map[string]any) + if !isObj || isSchemaNode(obj) { + result[fullKey] = value + } else { + flattenDefsHelper(fullKey, obj, result) + } + } +} + +// rewriteRefs recursively walks a JSON value and rewrites all $ref strings +// to use JSON Pointer ~1 escaping for flat $defs keys. +func rewriteRefs(v any) any { + switch val := v.(type) { + case map[string]any: + result := make(map[string]any, len(val)) + for k, child := range val { + if k == "$ref" { + if s, ok := child.(string); ok { + result[k] = rewriteRef(s) + } else { + result[k] = child + } + } else { + result[k] = rewriteRefs(child) + } + } + return result + case []any: + result := make([]any, len(val)) + for i, item := range val { + result[i] = rewriteRefs(item) + } + return result + default: + return v + } +} + +// rewriteRef transforms a $ref from nested JSON Pointer format to flat key format. +// e.g., "#/$defs/github.com/databricks/resources.Job" +// becomes "#/$defs/github.com~1databricks~1resources.Job" +func rewriteRef(ref string) string { + const prefix = "#/$defs/" + if !strings.HasPrefix(ref, prefix) { + return ref + } + path := ref[len(prefix):] + escaped := strings.ReplaceAll(path, "~", "~0") + escaped = strings.ReplaceAll(escaped, "/", "~1") + return prefix + escaped +} + +// transformSchema flattens nested $defs and rewrites $ref values for compatibility +// with the Google jsonschema-go library which expects flat $defs. +func transformSchema(raw map[string]any) map[string]any { + if defs, ok := raw["$defs"].(map[string]any); ok { + raw["$defs"] = flattenDefs(defs) + } + return rewriteRefs(raw).(map[string]any) +} + +func compileSchema(t *testing.T) *googleschema.Resolved { t.Helper() - var schemaVal any - err := json.Unmarshal(schema.Bytes, &schemaVal) + var raw map[string]any + err := json.Unmarshal(schema.Bytes, &raw) + require.NoError(t, err) + + transformed := transformSchema(raw) + + b, err := json.Marshal(transformed) require.NoError(t, err) - c := jsonschema.NewCompiler() - err = c.AddResource("schema.json", schemaVal) + var s googleschema.Schema + err = json.Unmarshal(b, &s) require.NoError(t, err) - sch, err := c.Compile("schema.json") + resolved, err := s.Resolve(nil) require.NoError(t, err) - return sch + return resolved } // loadYAMLAsJSON reads a YAML file and returns it as a JSON-compatible any value. diff --git a/go.mod b/go.mod index 7f8674631e..93efe9e357 100644 --- a/go.mod +++ b/go.mod @@ -48,7 +48,7 @@ require gopkg.in/yaml.v3 v3.0.1 // indirect // Dependencies for experimental SSH commands require github.com/tailscale/hujson v0.0.0-20250605163823-992244df8c5a // BSD-3-Clause -require github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 +require github.com/google/jsonschema-go v0.4.2 require ( cloud.google.com/go/auth v0.18.1 // indirect diff --git a/go.sum b/go.sum index 8f81f0c690..0451c80a63 100644 --- a/go.sum +++ b/go.sum @@ -80,8 +80,6 @@ github.com/databricks/databricks-sdk-go v0.126.0/go.mod h1:hWoHnHbNLjPKiTm5K/7bc github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= -github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= @@ -113,6 +111,8 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-querystring v1.2.0 h1:yhqkPbu2/OH+V9BfpCVPZkNmUXhb2gBxJArfhIxNtP0= github.com/google/go-querystring v1.2.0/go.mod h1:8IFJqpSRITyJ8QhQ13bmbeMBDfmeEJZD5A0egEOmkqU= +github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= +github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -195,8 +195,6 @@ github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= -github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= -github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= From 20663a78e3293d30ca094a12227c749e63f77492 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:30:32 +0100 Subject: [PATCH 03/12] Fix lint: remove loop var copy, fix formatting, use slice for keywords Co-authored-by: Isaac --- bundle/schema/validate_test.go | 45 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index f49172968e..6804c94c97 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -16,23 +16,22 @@ import ( // schemaKeywords is the set of recognized JSON Schema keywords used to distinguish // schema definitions from intermediate nesting nodes in $defs. -var schemaKeywords = map[string]bool{ - "$anchor": true, "$defs": true, "$id": true, "$ref": true, "$schema": true, - "additionalItems": true, "additionalProperties": true, - "allOf": true, "anyOf": true, - "const": true, "contains": true, - "default": true, "deprecated": true, "description": true, - "else": true, "enum": true, "exclusiveMaximum": true, "exclusiveMinimum": true, - "format": true, - "if": true, "items": true, - "maxItems": true, "maxLength": true, "maxProperties": true, - "maximum": true, "minItems": true, "minLength": true, "minProperties": true, - "minimum": true, "multipleOf": true, - "not": true, "oneOf": true, - "pattern": true, "patternProperties": true, "prefixItems": true, "properties": true, - "required": true, - "then": true, "title": true, "type": true, - "uniqueItems": true, +var schemaKeywords = []string{ + "$anchor", "$defs", "$id", "$ref", "$schema", + "additionalItems", "additionalProperties", + "allOf", "anyOf", + "const", "contains", + "default", "deprecated", "description", + "else", "enum", "exclusiveMaximum", "exclusiveMinimum", + "format", "if", "items", + "maxItems", "maxLength", "maxProperties", + "maximum", "minItems", "minLength", "minProperties", + "minimum", "multipleOf", + "not", "oneOf", + "pattern", "patternProperties", "prefixItems", "properties", + "required", + "then", "title", "type", + "uniqueItems", } // isSchemaNode returns true if the object is a JSON Schema definition @@ -41,8 +40,8 @@ func isSchemaNode(obj map[string]any) bool { if len(obj) == 0 { return true } - for key := range obj { - if schemaKeywords[key] { + for _, kw := range schemaKeywords { + if _, ok := obj[kw]; ok { return true } } @@ -59,11 +58,11 @@ func flattenDefs(defs map[string]any) map[string]any { return result } -func flattenDefsHelper(prefix string, node map[string]any, result map[string]any) { +func flattenDefsHelper(prefix string, node, result map[string]any) { for key, value := range node { - fullKey := key - if prefix != "" { - fullKey = prefix + "/" + key + fullKey := prefix + "/" + key + if prefix == "" { + fullKey = key } obj, isObj := value.(map[string]any) From c202fa690a7e0e2b2db5b15ae0b0187308d5bba9 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:45:54 +0100 Subject: [PATCH 04/12] Remove extra blank line in push.yml Co-authored-by: Isaac --- .github/workflows/push.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 0333ee8842..78521c6d12 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -306,7 +306,6 @@ jobs: exit 1 fi - validate-python-codegen: needs: cleanups runs-on: ubuntu-latest From 231f1d7c37c807fb199073526226aa3e6684e297 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 15:51:38 +0100 Subject: [PATCH 05/12] Use structural check in isSchemaNode instead of keyword matching Check value types instead of matching against a keyword list. Intermediate nesting nodes only have map[string]any values, while schema definitions always have at least one non-object value (string, array, bool, etc.). Co-authored-by: Isaac --- bundle/schema/validate_test.go | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 6804c94c97..03ba86ff56 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -14,34 +14,17 @@ import ( "go.yaml.in/yaml/v3" ) -// schemaKeywords is the set of recognized JSON Schema keywords used to distinguish -// schema definitions from intermediate nesting nodes in $defs. -var schemaKeywords = []string{ - "$anchor", "$defs", "$id", "$ref", "$schema", - "additionalItems", "additionalProperties", - "allOf", "anyOf", - "const", "contains", - "default", "deprecated", "description", - "else", "enum", "exclusiveMaximum", "exclusiveMinimum", - "format", "if", "items", - "maxItems", "maxLength", "maxProperties", - "maximum", "minItems", "minLength", "minProperties", - "minimum", "multipleOf", - "not", "oneOf", - "pattern", "patternProperties", "prefixItems", "properties", - "required", - "then", "title", "type", - "uniqueItems", -} - -// isSchemaNode returns true if the object is a JSON Schema definition -// (has recognized keywords or is empty, which is the "anything" schema). +// isSchemaNode returns true if the object is a JSON Schema definition rather +// than an intermediate nesting node in the $defs tree. Intermediate nodes only +// have map[string]any values (more nesting), while schema definitions always +// have at least one non-object value ("type" is a string, "oneOf" is an array, +// etc.). An empty object {} is also a valid schema (it accepts any value). func isSchemaNode(obj map[string]any) bool { if len(obj) == 0 { return true } - for _, kw := range schemaKeywords { - if _, ok := obj[kw]; ok { + for _, v := range obj { + if _, isObj := v.(map[string]any); !isObj { return true } } From fd8611ae807d1525dee4dbd64d1543087db2617a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 16:17:12 +0100 Subject: [PATCH 06/12] Clarify rewriteRefs comment Co-authored-by: Isaac --- bundle/schema/validate_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 03ba86ff56..8486e2fb9f 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -57,8 +57,10 @@ func flattenDefsHelper(prefix string, node, result map[string]any) { } } -// rewriteRefs recursively walks a JSON value and rewrites all $ref strings -// to use JSON Pointer ~1 escaping for flat $defs keys. +// rewriteRefs recursively walks a JSON value and rewrites all $ref strings. +// After flattening, $defs keys contain literal "/" characters. In JSON Pointer +// (RFC 6901) "/" is the path separator, so these must be escaped as "~1" in +// $ref values to be treated as a single key lookup. func rewriteRefs(v any) any { switch val := v.(type) { case map[string]any: From 5c15b4e856f519cf1354aa03f851f9252078a27a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 16:19:55 +0100 Subject: [PATCH 07/12] Remove unnecessary ~0 escaping in rewriteRef $defs keys are Go package paths and never contain ~. Co-authored-by: Isaac --- bundle/schema/validate_test.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 8486e2fb9f..25a8afdf5e 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -97,9 +97,7 @@ func rewriteRef(ref string) string { return ref } path := ref[len(prefix):] - escaped := strings.ReplaceAll(path, "~", "~0") - escaped = strings.ReplaceAll(escaped, "/", "~1") - return prefix + escaped + return prefix + strings.ReplaceAll(path, "/", "~1") } // transformSchema flattens nested $defs and rewrites $ref values for compatibility From 7a331d32a5b3e2dd290aa2447559af53ab47ecd4 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 16:27:43 +0100 Subject: [PATCH 08/12] Assert expected error substrings in fail test cases Use a table mapping each fail test file to the expected schema location in the error. Also fail if a new test file is added without a table entry. Co-authored-by: Isaac --- bundle/schema/validate_test.go | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 25a8afdf5e..35ba12d92e 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -173,15 +173,38 @@ func TestSchemaValidatePassCases(t *testing.T) { func TestSchemaValidateFailCases(t *testing.T) { sch := compileSchema(t) + // Each entry maps a test file to a substring expected in the validation error. + // The error points to the schema location where validation fails. + tests := map[string]string{ + "basic.yml": "/properties/bundle", + "deprecated_job_field_format.yml": "/properties/resources", + "hidden_job_field_deployment.yml": "/properties/resources", + "hidden_job_field_edit_mode.yml": "/properties/targets", + "incorrect_volume_type.yml": "/properties/resources", + "invalid_enum_value_in_job.yml": "/properties/resources", + "invalid_enum_value_in_model.yml": "/properties/resources", + "invalid_reference_in_job.yml": "/properties/resources", + "invalid_reference_in_model.yml": "/properties/resources", + "readonly_job_field_git_snapshot.yml": "/properties/resources", + "readonly_job_field_job_source.yml": "/properties/resources", + "required_field_missing_in_job.yml": "/properties/resources", + "unknown_field_in_job.yml": "/properties/resources", + "unknown_field_in_model.yml": "/properties/resources", + } + files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml") require.NoError(t, err) require.NotEmpty(t, files) for _, file := range files { - t.Run(filepath.Base(file), func(t *testing.T) { + name := filepath.Base(file) + expectedErr, ok := tests[name] + require.True(t, ok, "no expected error for %s, please add an entry to the test table", name) + + t.Run(name, func(t *testing.T) { instance := loadYAMLAsJSON(t, file) err := sch.Validate(instance) - assert.Error(t, err) + assert.ErrorContains(t, err, expectedErr) }) } } From 09b1914c17a7b046a15de054a5df6e737cdaa1ac Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 16:29:45 +0100 Subject: [PATCH 09/12] Add MIT license comment for google/jsonschema-go in go.mod Co-authored-by: Isaac --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 93efe9e357..3a03414b7e 100644 --- a/go.mod +++ b/go.mod @@ -48,7 +48,7 @@ require gopkg.in/yaml.v3 v3.0.1 // indirect // Dependencies for experimental SSH commands require github.com/tailscale/hujson v0.0.0-20250605163823-992244df8c5a // BSD-3-Clause -require github.com/google/jsonschema-go v0.4.2 +require github.com/google/jsonschema-go v0.4.2 // MIT require ( cloud.google.com/go/auth v0.18.1 // indirect From 5aba499ef590c5203dc1b7038a4c76fe56373783 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 17:52:39 +0100 Subject: [PATCH 10/12] Assert expected error substrings in fail test cases Use a table mapping each fail test file to the expected schema location in the error. Also fail if a new test file is added without a table entry. Co-authored-by: Isaac --- bundle/schema/validate_test.go | 42 +++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 35ba12d92e..2a786115b3 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -173,23 +173,24 @@ func TestSchemaValidatePassCases(t *testing.T) { func TestSchemaValidateFailCases(t *testing.T) { sch := compileSchema(t) - // Each entry maps a test file to a substring expected in the validation error. - // The error points to the schema location where validation fails. - tests := map[string]string{ - "basic.yml": "/properties/bundle", - "deprecated_job_field_format.yml": "/properties/resources", - "hidden_job_field_deployment.yml": "/properties/resources", - "hidden_job_field_edit_mode.yml": "/properties/targets", - "incorrect_volume_type.yml": "/properties/resources", - "invalid_enum_value_in_job.yml": "/properties/resources", - "invalid_enum_value_in_model.yml": "/properties/resources", - "invalid_reference_in_job.yml": "/properties/resources", - "invalid_reference_in_model.yml": "/properties/resources", - "readonly_job_field_git_snapshot.yml": "/properties/resources", - "readonly_job_field_job_source.yml": "/properties/resources", - "required_field_missing_in_job.yml": "/properties/resources", - "unknown_field_in_job.yml": "/properties/resources", - "unknown_field_in_model.yml": "/properties/resources", + // Each entry maps a test file to substrings expected in the validation error. + // Note: the Google library discards branch-specific errors when oneOf fails, + // so we can only assert on the schema path, not the specific failure reason. + tests := map[string][]string{ + "basic.yml": {"config.Bundle", "oneOf"}, + "deprecated_job_field_format.yml": {"config.Resources", "oneOf"}, + "hidden_job_field_deployment.yml": {"config.Resources", "oneOf"}, + "hidden_job_field_edit_mode.yml": {"config.Target", "oneOf"}, + "incorrect_volume_type.yml": {"config.Resources", "oneOf"}, + "invalid_enum_value_in_job.yml": {"config.Resources", "oneOf"}, + "invalid_enum_value_in_model.yml": {"config.Resources", "oneOf"}, + "invalid_reference_in_job.yml": {"config.Resources", "oneOf"}, + "invalid_reference_in_model.yml": {"config.Resources", "oneOf"}, + "readonly_job_field_git_snapshot.yml": {"config.Resources", "oneOf"}, + "readonly_job_field_job_source.yml": {"config.Resources", "oneOf"}, + "required_field_missing_in_job.yml": {"config.Resources", "oneOf"}, + "unknown_field_in_job.yml": {"config.Resources", "oneOf"}, + "unknown_field_in_model.yml": {"config.Resources", "oneOf"}, } files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml") @@ -198,13 +199,16 @@ func TestSchemaValidateFailCases(t *testing.T) { for _, file := range files { name := filepath.Base(file) - expectedErr, ok := tests[name] + expectedSubstrings, ok := tests[name] require.True(t, ok, "no expected error for %s, please add an entry to the test table", name) t.Run(name, func(t *testing.T) { instance := loadYAMLAsJSON(t, file) err := sch.Validate(instance) - assert.ErrorContains(t, err, expectedErr) + require.Error(t, err) + for _, substr := range expectedSubstrings { + assert.ErrorContains(t, err, substr) + } }) } } From 47799796f5a624d99a5dd364cdc45f7cf9140509 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 26 Mar 2026 19:01:03 +0100 Subject: [PATCH 11/12] Simplify fail test assertions to single expected path string Co-authored-by: Isaac --- bundle/schema/validate_test.go | 41 +++++++++++++++------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 2a786115b3..60e4c58c32 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -173,24 +173,22 @@ func TestSchemaValidatePassCases(t *testing.T) { func TestSchemaValidateFailCases(t *testing.T) { sch := compileSchema(t) - // Each entry maps a test file to substrings expected in the validation error. - // Note: the Google library discards branch-specific errors when oneOf fails, - // so we can only assert on the schema path, not the specific failure reason. - tests := map[string][]string{ - "basic.yml": {"config.Bundle", "oneOf"}, - "deprecated_job_field_format.yml": {"config.Resources", "oneOf"}, - "hidden_job_field_deployment.yml": {"config.Resources", "oneOf"}, - "hidden_job_field_edit_mode.yml": {"config.Target", "oneOf"}, - "incorrect_volume_type.yml": {"config.Resources", "oneOf"}, - "invalid_enum_value_in_job.yml": {"config.Resources", "oneOf"}, - "invalid_enum_value_in_model.yml": {"config.Resources", "oneOf"}, - "invalid_reference_in_job.yml": {"config.Resources", "oneOf"}, - "invalid_reference_in_model.yml": {"config.Resources", "oneOf"}, - "readonly_job_field_git_snapshot.yml": {"config.Resources", "oneOf"}, - "readonly_job_field_job_source.yml": {"config.Resources", "oneOf"}, - "required_field_missing_in_job.yml": {"config.Resources", "oneOf"}, - "unknown_field_in_job.yml": {"config.Resources", "oneOf"}, - "unknown_field_in_model.yml": {"config.Resources", "oneOf"}, + // Each entry maps a test file to the expected schema path in the error. + tests := map[string]string{ + "basic.yml": "config.Bundle", + "deprecated_job_field_format.yml": "config.Resources", + "hidden_job_field_deployment.yml": "config.Resources", + "hidden_job_field_edit_mode.yml": "config.Target", + "incorrect_volume_type.yml": "config.Resources", + "invalid_enum_value_in_job.yml": "config.Resources", + "invalid_enum_value_in_model.yml": "config.Resources", + "invalid_reference_in_job.yml": "config.Resources", + "invalid_reference_in_model.yml": "config.Resources", + "readonly_job_field_git_snapshot.yml": "config.Resources", + "readonly_job_field_job_source.yml": "config.Resources", + "required_field_missing_in_job.yml": "config.Resources", + "unknown_field_in_job.yml": "config.Resources", + "unknown_field_in_model.yml": "config.Resources", } files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml") @@ -199,16 +197,13 @@ func TestSchemaValidateFailCases(t *testing.T) { for _, file := range files { name := filepath.Base(file) - expectedSubstrings, ok := tests[name] + expectedErr, ok := tests[name] require.True(t, ok, "no expected error for %s, please add an entry to the test table", name) t.Run(name, func(t *testing.T) { instance := loadYAMLAsJSON(t, file) err := sch.Validate(instance) - require.Error(t, err) - for _, substr := range expectedSubstrings { - assert.ErrorContains(t, err, substr) - } + assert.ErrorContains(t, err, expectedErr) }) } } From f79d99ffee5a8ecc52bafe32065efd22cced5774 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 27 Mar 2026 11:20:30 +0100 Subject: [PATCH 12/12] Address PR review: inline dep, add NOTICE, add oneOf comment Move google/jsonschema-go require into main deps section, add NOTICE entry under MIT section, and document why fail tests only assert on schema path (oneOf wrapping discards branch-specific errors). Co-authored-by: Isaac --- NOTICE | 4 ++++ bundle/schema/validate_test.go | 3 +++ go.mod | 3 +-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/NOTICE b/NOTICE index a6545e5b42..50bf0eec27 100644 --- a/NOTICE +++ b/NOTICE @@ -74,6 +74,10 @@ License - https://github.com/manifoldco/promptui/blob/master/LICENSE.md This Software contains code from the following open source projects, licensed under the MIT license: +google/jsonschema-go - https://github.com/google/jsonschema-go +Copyright 2025 Google LLC +License - https://github.com/google/jsonschema-go/blob/main/LICENSE + charmbracelet/bubbles - https://github.com/charmbracelet/bubbles Copyright (c) 2020-2025 Charmbracelet, Inc License - https://github.com/charmbracelet/bubbles/blob/master/LICENSE diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go index 60e4c58c32..92dfe44447 100644 --- a/bundle/schema/validate_test.go +++ b/bundle/schema/validate_test.go @@ -174,6 +174,9 @@ func TestSchemaValidateFailCases(t *testing.T) { sch := compileSchema(t) // Each entry maps a test file to the expected schema path in the error. + // The bundle schema wraps every type in oneOf for interpolation patterns, + // and the Google library discards per-branch errors on oneOf failure, so + // we can only assert on the schema path, not the specific failure reason. tests := map[string]string{ "basic.yml": "config.Bundle", "deprecated_job_field_format.yml": "config.Resources", diff --git a/go.mod b/go.mod index 3a03414b7e..4df213cae4 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/charmbracelet/lipgloss v1.1.0 // MIT github.com/databricks/databricks-sdk-go v0.126.0 // Apache 2.0 github.com/fatih/color v1.19.0 // MIT + github.com/google/jsonschema-go v0.4.2 // MIT github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/gorilla/mux v1.8.1 // BSD 3-Clause github.com/gorilla/websocket v1.5.3 // BSD 2-Clause @@ -48,8 +49,6 @@ require gopkg.in/yaml.v3 v3.0.1 // indirect // Dependencies for experimental SSH commands require github.com/tailscale/hujson v0.0.0-20250605163823-992244df8c5a // BSD-3-Clause -require github.com/google/jsonschema-go v0.4.2 // MIT - require ( cloud.google.com/go/auth v0.18.1 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect