diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index c0a731b356..78521c6d12 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -306,37 +306,6 @@ jobs: exit 1 fi - # Github repo: https://github.com/ajv-validator/ajv-cli - - name: Install ajv-cli - run: npm install -g ajv-cli@5.0.0 - - # Assert that the generated bundle schema is a valid JSON schema by using - # ajv-cli to validate it against bundle configuration files. - # By default the ajv-cli runs in strict mode which will fail if the schema - # itself is not valid. Strict mode is more strict than the JSON schema - # specification. See for details: https://ajv.js.org/options.html#strict-mode-options - # The ajv-cli is configured to use the markdownDescription keyword which is not part of the JSON schema specification, - # but is used in editors like VSCode to render markdown in the description field - - name: Validate bundle schema - run: | - go run main.go bundle schema > schema.json - - # Add markdownDescription keyword to ajv - echo "module.exports = function(a) { - a.addKeyword('deprecationMessage'); - a.addKeyword('doNotSuggest'); - a.addKeyword('markdownDescription'); - a.addKeyword('x-databricks-preview'); - }" >> keywords.js - - for file in ./bundle/internal/schema/testdata/pass/*.yml; do - ajv test -s schema.json -d $file --valid -c=./keywords.js - done - - for file in ./bundle/internal/schema/testdata/fail/*.yml; do - ajv test -s schema.json -d $file --invalid -c=./keywords.js - done - validate-python-codegen: needs: cleanups runs-on: ubuntu-latest diff --git a/NOTICE b/NOTICE index a6545e5b42..50bf0eec27 100644 --- a/NOTICE +++ b/NOTICE @@ -74,6 +74,10 @@ License - https://github.com/manifoldco/promptui/blob/master/LICENSE.md This Software contains code from the following open source projects, licensed under the MIT license: +google/jsonschema-go - https://github.com/google/jsonschema-go +Copyright 2025 Google LLC +License - https://github.com/google/jsonschema-go/blob/main/LICENSE + charmbracelet/bubbles - https://github.com/charmbracelet/bubbles Copyright (c) 2020-2025 Charmbracelet, Inc License - https://github.com/charmbracelet/bubbles/blob/master/LICENSE diff --git a/bundle/schema/validate_test.go b/bundle/schema/validate_test.go new file mode 100644 index 0000000000..92dfe44447 --- /dev/null +++ b/bundle/schema/validate_test.go @@ -0,0 +1,212 @@ +package schema_test + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/databricks/cli/bundle/schema" + googleschema "github.com/google/jsonschema-go/jsonschema" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.yaml.in/yaml/v3" +) + +// isSchemaNode returns true if the object is a JSON Schema definition rather +// than an intermediate nesting node in the $defs tree. Intermediate nodes only +// have map[string]any values (more nesting), while schema definitions always +// have at least one non-object value ("type" is a string, "oneOf" is an array, +// etc.). An empty object {} is also a valid schema (it accepts any value). +func isSchemaNode(obj map[string]any) bool { + if len(obj) == 0 { + return true + } + for _, v := range obj { + if _, isObj := v.(map[string]any); !isObj { + return true + } + } + return false +} + +// flattenDefs flattens the nested $defs object tree into a single-level map. +// Nested path segments are joined with "/" to form flat keys. +// e.g., $defs["github.com"]["databricks"]["resources.Job"] becomes +// $defs["github.com/databricks/resources.Job"]. +func flattenDefs(defs map[string]any) map[string]any { + result := map[string]any{} + flattenDefsHelper("", defs, result) + return result +} + +func flattenDefsHelper(prefix string, node, result map[string]any) { + for key, value := range node { + fullKey := prefix + "/" + key + if prefix == "" { + fullKey = key + } + + obj, isObj := value.(map[string]any) + if !isObj || isSchemaNode(obj) { + result[fullKey] = value + } else { + flattenDefsHelper(fullKey, obj, result) + } + } +} + +// rewriteRefs recursively walks a JSON value and rewrites all $ref strings. +// After flattening, $defs keys contain literal "/" characters. In JSON Pointer +// (RFC 6901) "/" is the path separator, so these must be escaped as "~1" in +// $ref values to be treated as a single key lookup. +func rewriteRefs(v any) any { + switch val := v.(type) { + case map[string]any: + result := make(map[string]any, len(val)) + for k, child := range val { + if k == "$ref" { + if s, ok := child.(string); ok { + result[k] = rewriteRef(s) + } else { + result[k] = child + } + } else { + result[k] = rewriteRefs(child) + } + } + return result + case []any: + result := make([]any, len(val)) + for i, item := range val { + result[i] = rewriteRefs(item) + } + return result + default: + return v + } +} + +// rewriteRef transforms a $ref from nested JSON Pointer format to flat key format. +// e.g., "#/$defs/github.com/databricks/resources.Job" +// becomes "#/$defs/github.com~1databricks~1resources.Job" +func rewriteRef(ref string) string { + const prefix = "#/$defs/" + if !strings.HasPrefix(ref, prefix) { + return ref + } + path := ref[len(prefix):] + return prefix + strings.ReplaceAll(path, "/", "~1") +} + +// transformSchema flattens nested $defs and rewrites $ref values for compatibility +// with the Google jsonschema-go library which expects flat $defs. +func transformSchema(raw map[string]any) map[string]any { + if defs, ok := raw["$defs"].(map[string]any); ok { + raw["$defs"] = flattenDefs(defs) + } + return rewriteRefs(raw).(map[string]any) +} + +func compileSchema(t *testing.T) *googleschema.Resolved { + t.Helper() + + var raw map[string]any + err := json.Unmarshal(schema.Bytes, &raw) + require.NoError(t, err) + + transformed := transformSchema(raw) + + b, err := json.Marshal(transformed) + require.NoError(t, err) + + var s googleschema.Schema + err = json.Unmarshal(b, &s) + require.NoError(t, err) + + resolved, err := s.Resolve(nil) + require.NoError(t, err) + + return resolved +} + +// loadYAMLAsJSON reads a YAML file and returns it as a JSON-compatible any value. +// The YAML -> JSON roundtrip ensures canonical JSON types (float64, string, bool, nil, +// map[string]any, []any) that the JSON schema validator expects. +func loadYAMLAsJSON(t *testing.T, path string) any { + t.Helper() + + data, err := os.ReadFile(path) + require.NoError(t, err) + + var yamlVal any + err = yaml.Unmarshal(data, &yamlVal) + require.NoError(t, err) + + jsonBytes, err := json.Marshal(yamlVal) + require.NoError(t, err) + + var instance any + err = json.Unmarshal(jsonBytes, &instance) + require.NoError(t, err) + + return instance +} + +func TestSchemaValidatePassCases(t *testing.T) { + sch := compileSchema(t) + + files, err := filepath.Glob("../internal/schema/testdata/pass/*.yml") + require.NoError(t, err) + require.NotEmpty(t, files) + + for _, file := range files { + t.Run(filepath.Base(file), func(t *testing.T) { + instance := loadYAMLAsJSON(t, file) + err := sch.Validate(instance) + assert.NoError(t, err) + }) + } +} + +func TestSchemaValidateFailCases(t *testing.T) { + sch := compileSchema(t) + + // Each entry maps a test file to the expected schema path in the error. + // The bundle schema wraps every type in oneOf for interpolation patterns, + // and the Google library discards per-branch errors on oneOf failure, so + // we can only assert on the schema path, not the specific failure reason. + tests := map[string]string{ + "basic.yml": "config.Bundle", + "deprecated_job_field_format.yml": "config.Resources", + "hidden_job_field_deployment.yml": "config.Resources", + "hidden_job_field_edit_mode.yml": "config.Target", + "incorrect_volume_type.yml": "config.Resources", + "invalid_enum_value_in_job.yml": "config.Resources", + "invalid_enum_value_in_model.yml": "config.Resources", + "invalid_reference_in_job.yml": "config.Resources", + "invalid_reference_in_model.yml": "config.Resources", + "readonly_job_field_git_snapshot.yml": "config.Resources", + "readonly_job_field_job_source.yml": "config.Resources", + "required_field_missing_in_job.yml": "config.Resources", + "unknown_field_in_job.yml": "config.Resources", + "unknown_field_in_model.yml": "config.Resources", + } + + files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml") + require.NoError(t, err) + require.NotEmpty(t, files) + + for _, file := range files { + name := filepath.Base(file) + expectedErr, ok := tests[name] + require.True(t, ok, "no expected error for %s, please add an entry to the test table", name) + + t.Run(name, func(t *testing.T) { + instance := loadYAMLAsJSON(t, file) + err := sch.Validate(instance) + assert.ErrorContains(t, err, expectedErr) + }) + } +} diff --git a/go.mod b/go.mod index 4cf2047691..4df213cae4 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/charmbracelet/lipgloss v1.1.0 // MIT github.com/databricks/databricks-sdk-go v0.126.0 // Apache 2.0 github.com/fatih/color v1.19.0 // MIT + github.com/google/jsonschema-go v0.4.2 // MIT github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/gorilla/mux v1.8.1 // BSD 3-Clause github.com/gorilla/websocket v1.5.3 // BSD 2-Clause diff --git a/go.sum b/go.sum index affad9a015..0451c80a63 100644 --- a/go.sum +++ b/go.sum @@ -111,6 +111,8 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-querystring v1.2.0 h1:yhqkPbu2/OH+V9BfpCVPZkNmUXhb2gBxJArfhIxNtP0= github.com/google/go-querystring v1.2.0/go.mod h1:8IFJqpSRITyJ8QhQ13bmbeMBDfmeEJZD5A0egEOmkqU= +github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= +github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=