|
| 1 | +package schema_test |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/json" |
| 5 | + "os" |
| 6 | + "path/filepath" |
| 7 | + "strings" |
| 8 | + "testing" |
| 9 | + |
| 10 | + "github.com/databricks/cli/bundle/schema" |
| 11 | + googleschema "github.com/google/jsonschema-go/jsonschema" |
| 12 | + "github.com/stretchr/testify/assert" |
| 13 | + "github.com/stretchr/testify/require" |
| 14 | + "go.yaml.in/yaml/v3" |
| 15 | +) |
| 16 | + |
| 17 | +// isSchemaNode returns true if the object is a JSON Schema definition rather |
| 18 | +// than an intermediate nesting node in the $defs tree. Intermediate nodes only |
| 19 | +// have map[string]any values (more nesting), while schema definitions always |
| 20 | +// have at least one non-object value ("type" is a string, "oneOf" is an array, |
| 21 | +// etc.). An empty object {} is also a valid schema (it accepts any value). |
| 22 | +func isSchemaNode(obj map[string]any) bool { |
| 23 | + if len(obj) == 0 { |
| 24 | + return true |
| 25 | + } |
| 26 | + for _, v := range obj { |
| 27 | + if _, isObj := v.(map[string]any); !isObj { |
| 28 | + return true |
| 29 | + } |
| 30 | + } |
| 31 | + return false |
| 32 | +} |
| 33 | + |
| 34 | +// flattenDefs flattens the nested $defs object tree into a single-level map. |
| 35 | +// Nested path segments are joined with "/" to form flat keys. |
| 36 | +// e.g., $defs["github.com"]["databricks"]["resources.Job"] becomes |
| 37 | +// $defs["github.com/databricks/resources.Job"]. |
| 38 | +func flattenDefs(defs map[string]any) map[string]any { |
| 39 | + result := map[string]any{} |
| 40 | + flattenDefsHelper("", defs, result) |
| 41 | + return result |
| 42 | +} |
| 43 | + |
| 44 | +func flattenDefsHelper(prefix string, node, result map[string]any) { |
| 45 | + for key, value := range node { |
| 46 | + fullKey := prefix + "/" + key |
| 47 | + if prefix == "" { |
| 48 | + fullKey = key |
| 49 | + } |
| 50 | + |
| 51 | + obj, isObj := value.(map[string]any) |
| 52 | + if !isObj || isSchemaNode(obj) { |
| 53 | + result[fullKey] = value |
| 54 | + } else { |
| 55 | + flattenDefsHelper(fullKey, obj, result) |
| 56 | + } |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +// rewriteRefs recursively walks a JSON value and rewrites all $ref strings. |
| 61 | +// After flattening, $defs keys contain literal "/" characters. In JSON Pointer |
| 62 | +// (RFC 6901) "/" is the path separator, so these must be escaped as "~1" in |
| 63 | +// $ref values to be treated as a single key lookup. |
| 64 | +func rewriteRefs(v any) any { |
| 65 | + switch val := v.(type) { |
| 66 | + case map[string]any: |
| 67 | + result := make(map[string]any, len(val)) |
| 68 | + for k, child := range val { |
| 69 | + if k == "$ref" { |
| 70 | + if s, ok := child.(string); ok { |
| 71 | + result[k] = rewriteRef(s) |
| 72 | + } else { |
| 73 | + result[k] = child |
| 74 | + } |
| 75 | + } else { |
| 76 | + result[k] = rewriteRefs(child) |
| 77 | + } |
| 78 | + } |
| 79 | + return result |
| 80 | + case []any: |
| 81 | + result := make([]any, len(val)) |
| 82 | + for i, item := range val { |
| 83 | + result[i] = rewriteRefs(item) |
| 84 | + } |
| 85 | + return result |
| 86 | + default: |
| 87 | + return v |
| 88 | + } |
| 89 | +} |
| 90 | + |
| 91 | +// rewriteRef transforms a $ref from nested JSON Pointer format to flat key format. |
| 92 | +// e.g., "#/$defs/github.com/databricks/resources.Job" |
| 93 | +// becomes "#/$defs/github.com~1databricks~1resources.Job" |
| 94 | +func rewriteRef(ref string) string { |
| 95 | + const prefix = "#/$defs/" |
| 96 | + if !strings.HasPrefix(ref, prefix) { |
| 97 | + return ref |
| 98 | + } |
| 99 | + path := ref[len(prefix):] |
| 100 | + return prefix + strings.ReplaceAll(path, "/", "~1") |
| 101 | +} |
| 102 | + |
| 103 | +// transformSchema flattens nested $defs and rewrites $ref values for compatibility |
| 104 | +// with the Google jsonschema-go library which expects flat $defs. |
| 105 | +func transformSchema(raw map[string]any) map[string]any { |
| 106 | + if defs, ok := raw["$defs"].(map[string]any); ok { |
| 107 | + raw["$defs"] = flattenDefs(defs) |
| 108 | + } |
| 109 | + return rewriteRefs(raw).(map[string]any) |
| 110 | +} |
| 111 | + |
| 112 | +func compileSchema(t *testing.T) *googleschema.Resolved { |
| 113 | + t.Helper() |
| 114 | + |
| 115 | + var raw map[string]any |
| 116 | + err := json.Unmarshal(schema.Bytes, &raw) |
| 117 | + require.NoError(t, err) |
| 118 | + |
| 119 | + transformed := transformSchema(raw) |
| 120 | + |
| 121 | + b, err := json.Marshal(transformed) |
| 122 | + require.NoError(t, err) |
| 123 | + |
| 124 | + var s googleschema.Schema |
| 125 | + err = json.Unmarshal(b, &s) |
| 126 | + require.NoError(t, err) |
| 127 | + |
| 128 | + resolved, err := s.Resolve(nil) |
| 129 | + require.NoError(t, err) |
| 130 | + |
| 131 | + return resolved |
| 132 | +} |
| 133 | + |
| 134 | +// loadYAMLAsJSON reads a YAML file and returns it as a JSON-compatible any value. |
| 135 | +// The YAML -> JSON roundtrip ensures canonical JSON types (float64, string, bool, nil, |
| 136 | +// map[string]any, []any) that the JSON schema validator expects. |
| 137 | +func loadYAMLAsJSON(t *testing.T, path string) any { |
| 138 | + t.Helper() |
| 139 | + |
| 140 | + data, err := os.ReadFile(path) |
| 141 | + require.NoError(t, err) |
| 142 | + |
| 143 | + var yamlVal any |
| 144 | + err = yaml.Unmarshal(data, &yamlVal) |
| 145 | + require.NoError(t, err) |
| 146 | + |
| 147 | + jsonBytes, err := json.Marshal(yamlVal) |
| 148 | + require.NoError(t, err) |
| 149 | + |
| 150 | + var instance any |
| 151 | + err = json.Unmarshal(jsonBytes, &instance) |
| 152 | + require.NoError(t, err) |
| 153 | + |
| 154 | + return instance |
| 155 | +} |
| 156 | + |
| 157 | +func TestSchemaValidatePassCases(t *testing.T) { |
| 158 | + sch := compileSchema(t) |
| 159 | + |
| 160 | + files, err := filepath.Glob("../internal/schema/testdata/pass/*.yml") |
| 161 | + require.NoError(t, err) |
| 162 | + require.NotEmpty(t, files) |
| 163 | + |
| 164 | + for _, file := range files { |
| 165 | + t.Run(filepath.Base(file), func(t *testing.T) { |
| 166 | + instance := loadYAMLAsJSON(t, file) |
| 167 | + err := sch.Validate(instance) |
| 168 | + assert.NoError(t, err) |
| 169 | + }) |
| 170 | + } |
| 171 | +} |
| 172 | + |
| 173 | +func TestSchemaValidateFailCases(t *testing.T) { |
| 174 | + sch := compileSchema(t) |
| 175 | + |
| 176 | + // Each entry maps a test file to the expected schema path in the error. |
| 177 | + // The bundle schema wraps every type in oneOf for interpolation patterns, |
| 178 | + // and the Google library discards per-branch errors on oneOf failure, so |
| 179 | + // we can only assert on the schema path, not the specific failure reason. |
| 180 | + tests := map[string]string{ |
| 181 | + "basic.yml": "config.Bundle", |
| 182 | + "deprecated_job_field_format.yml": "config.Resources", |
| 183 | + "hidden_job_field_deployment.yml": "config.Resources", |
| 184 | + "hidden_job_field_edit_mode.yml": "config.Target", |
| 185 | + "incorrect_volume_type.yml": "config.Resources", |
| 186 | + "invalid_enum_value_in_job.yml": "config.Resources", |
| 187 | + "invalid_enum_value_in_model.yml": "config.Resources", |
| 188 | + "invalid_reference_in_job.yml": "config.Resources", |
| 189 | + "invalid_reference_in_model.yml": "config.Resources", |
| 190 | + "readonly_job_field_git_snapshot.yml": "config.Resources", |
| 191 | + "readonly_job_field_job_source.yml": "config.Resources", |
| 192 | + "required_field_missing_in_job.yml": "config.Resources", |
| 193 | + "unknown_field_in_job.yml": "config.Resources", |
| 194 | + "unknown_field_in_model.yml": "config.Resources", |
| 195 | + } |
| 196 | + |
| 197 | + files, err := filepath.Glob("../internal/schema/testdata/fail/*.yml") |
| 198 | + require.NoError(t, err) |
| 199 | + require.NotEmpty(t, files) |
| 200 | + |
| 201 | + for _, file := range files { |
| 202 | + name := filepath.Base(file) |
| 203 | + expectedErr, ok := tests[name] |
| 204 | + require.True(t, ok, "no expected error for %s, please add an entry to the test table", name) |
| 205 | + |
| 206 | + t.Run(name, func(t *testing.T) { |
| 207 | + instance := loadYAMLAsJSON(t, file) |
| 208 | + err := sch.Validate(instance) |
| 209 | + assert.ErrorContains(t, err, expectedErr) |
| 210 | + }) |
| 211 | + } |
| 212 | +} |
0 commit comments