Skip to content

Commit eadca40

Browse files
Add docs-only JSON schema with version annotations (#4414)
## Summary This PR adds a documentation-optimized JSON schema (`jsonschema_for_docs.json`) that: - Removes variable interpolation patterns for easier parsing - Adds `x-since-version` annotations by analyzing git history to track when fields were introduced - Provides a cleaner schema for documentation generation ## Changes - Add `since_version.go` to compute version annotations from git tags - Generate `jsonschema_for_docs.json` with `--docs` flag - Update Makefile with `schema-for-docs` target ## Test plan - [x] Unit tests pass - [x] Schema generates correctly with version annotations 🤖 Generated with [Claude Code](https://claude.com/claude-code)
1 parent 9950c38 commit eadca40

File tree

8 files changed

+9102
-10
lines changed

8 files changed

+9102
-10
lines changed

.wsignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
.codegen/_openapi_sha
44
.release_metadata.json
55
bundle/schema/jsonschema.json
6+
bundle/schema/jsonschema_for_docs.json
67
python/docs/images/databricks-logo.svg
78
**/*.dist-info/METADATA
89
**/*.dist-info/WHEEL

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ snapshot-release:
133133
schema:
134134
go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json
135135

136+
schema-for-docs:
137+
go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema_for_docs.json --docs
138+
136139
docs:
137140
go run ./bundle/docsgen ./bundle/internal/schema ./bundle/docsgen
138141

@@ -186,7 +189,7 @@ bundle/direct/dresources/apitypes.generated.yml: ./bundle/direct/tools/generate_
186189
bundle/direct/dresources/resources.generated.yml: ./bundle/direct/tools/generate_resources.py .codegen/openapi.json bundle/direct/dresources/apitypes.generated.yml bundle/direct/dresources/apitypes.yml acceptance/bundle/refschema/out.fields.txt
187190
python3 $^ > $@
188191

189-
.PHONY: lint lintfull tidy lintcheck fmt fmtfull test test-unit test-acc test-slow test-slow-unit test-slow-acc cover showcover build snapshot snapshot-release schema integration integration-short acc-cover acc-showcover docs ws wsfix links checks test-update test-update-templates generate-out-test-toml test-update-aws test-update-all generate-validation
192+
.PHONY: lint lintfull tidy lintcheck fmt fmtfull test test-unit test-acc test-slow test-slow-unit test-slow-acc cover showcover build snapshot snapshot-release schema schema-for-docs integration integration-short acc-cover acc-showcover docs ws wsfix links checks test-update test-update-templates generate-out-test-toml test-update-aws test-update-all generate-validation
190193

191194
test-exp-aitools:
192195
make test TEST_PACKAGES="./experimental/aitools/..." ACCEPTANCE_TEST_FILTER="TestAccept/apps"

bundle/internal/schema/main.go

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ func removeOutputOnlyFields(typ reflect.Type, s jsonschema.Schema) jsonschema.Sc
183183
}
184184

185185
func main() {
186-
if len(os.Args) != 3 {
187-
fmt.Println("Usage: go run main.go <work-dir> <output-file>")
186+
if len(os.Args) < 3 {
187+
fmt.Println("Usage: go run main.go <work-dir> <output-file> [--docs]")
188188
os.Exit(1)
189189
}
190190

@@ -193,10 +193,14 @@ func main() {
193193
// Output file, where the generated JSON schema will be written to.
194194
outputFile := os.Args[2]
195195

196-
generateSchema(workdir, outputFile)
196+
// When --docs is passed, skip interpolation patterns and add sinceVersion annotations.
197+
// This generates a schema optimized for documentation.
198+
docsMode := len(os.Args) >= 4 && os.Args[3] == "--docs"
199+
200+
generateSchema(workdir, outputFile, docsMode)
197201
}
198202

199-
func generateSchema(workdir, outputFile string) {
203+
func generateSchema(workdir, outputFile string, docsMode bool) {
200204
annotationsPath := filepath.Join(workdir, "annotations.yml")
201205
annotationsOpenApiPath := filepath.Join(workdir, "annotations_openapi.yml")
202206
annotationsOpenApiOverridesPath := filepath.Join(workdir, "annotations_openapi_overrides.yml")
@@ -220,15 +224,19 @@ func generateSchema(workdir, outputFile string) {
220224
log.Fatal(err)
221225
}
222226

223-
// Generate the JSON schema from the bundle Go struct.
224-
s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{
227+
transforms := []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{
225228
removeJobsFields,
226229
removePipelineFields,
227230
makeVolumeTypeOptional,
228231
a.addAnnotations,
229232
removeOutputOnlyFields,
230-
addInterpolationPatterns,
231-
})
233+
}
234+
if !docsMode {
235+
transforms = append(transforms, addInterpolationPatterns)
236+
}
237+
238+
// Generate the JSON schema from the bundle Go struct.
239+
s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), transforms)
232240

233241
// AdditionalProperties is set to an empty schema to allow non-typed keys used as yaml-anchors
234242
// Example:
@@ -248,6 +256,16 @@ func generateSchema(workdir, outputFile string) {
248256
log.Fatal(err)
249257
}
250258

259+
// In docs mode, add sinceVersion annotations by analyzing git history.
260+
if docsMode {
261+
sinceVersions, err := computeSinceVersions()
262+
if err != nil {
263+
fmt.Printf("Warning: could not compute sinceVersion annotations: %v\n", err)
264+
} else {
265+
addSinceVersionToSchema(&s, sinceVersions)
266+
}
267+
}
268+
251269
b, err := json.MarshalIndent(s, "", " ")
252270
if err != nil {
253271
log.Fatal(err)

bundle/internal/schema/main_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func TestRequiredAnnotationsForNewFields(t *testing.T) {
6060
err = copyFile("annotations_openapi_overrides.yml", annotationsOpenApiOverridesPath)
6161
assert.NoError(t, err)
6262

63-
generateSchema(workdir, path.Join(t.TempDir(), "schema.json"))
63+
generateSchema(workdir, path.Join(t.TempDir(), "schema.json"), false)
6464

6565
originalFile, err := os.ReadFile("annotations.yml")
6666
assert.NoError(t, err)
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"maps"
7+
"os/exec"
8+
"strconv"
9+
"strings"
10+
11+
"github.com/databricks/cli/libs/jsonschema"
12+
)
13+
14+
// Version when bundle/schema/jsonschema.json was added to the repo.
15+
var embeddedSchemaVersion = [3]int{0, 229, 0}
16+
17+
// computeSinceVersions computes when each field was first introduced by analyzing git history.
18+
// It returns a map from "typePath.fieldName" to the version string (e.g., "v0.229.0").
19+
// This function always recomputes versions at runtime without storing state.
20+
func computeSinceVersions() (map[string]string, error) {
21+
versions, err := getVersionTags()
22+
if err != nil {
23+
return nil, fmt.Errorf("getting version tags: %w", err)
24+
}
25+
if len(versions) == 0 {
26+
return nil, nil
27+
}
28+
29+
sinceVersions := make(map[string]string)
30+
for _, version := range versions {
31+
schema, err := getSchemaAtVersion(version)
32+
if err != nil {
33+
continue
34+
}
35+
36+
for field := range flattenSchema(schema) {
37+
if _, seen := sinceVersions[field]; !seen {
38+
sinceVersions[field] = version
39+
}
40+
}
41+
}
42+
43+
return sinceVersions, nil
44+
}
45+
46+
// parseVersion parses a version tag like "v0.228.0" into [0, 228, 0].
47+
func parseVersion(tag string) ([3]int, error) {
48+
tag = strings.TrimPrefix(tag, "v")
49+
parts := strings.Split(tag, ".")
50+
if len(parts) < 3 {
51+
return [3]int{}, fmt.Errorf("invalid version tag: %s", tag)
52+
}
53+
var v [3]int
54+
for i := range 3 {
55+
n, err := strconv.Atoi(parts[i])
56+
if err != nil {
57+
return [3]int{}, fmt.Errorf("invalid version component: %s", parts[i])
58+
}
59+
v[i] = n
60+
}
61+
return v, nil
62+
}
63+
64+
// compareVersions returns -1 if a < b, 0 if a == b, 1 if a > b.
65+
func compareVersions(a, b [3]int) int {
66+
for i := range 3 {
67+
if a[i] < b[i] {
68+
return -1
69+
}
70+
if a[i] > b[i] {
71+
return 1
72+
}
73+
}
74+
return 0
75+
}
76+
77+
// getVersionTags returns sorted list of version tags from git (oldest first).
78+
func getVersionTags() ([]string, error) {
79+
cmd := exec.Command("git", "tag", "--list", "v*", "--sort=version:refname")
80+
output, err := cmd.Output()
81+
if err != nil {
82+
return nil, fmt.Errorf("failed to get git tags: %w", err)
83+
}
84+
85+
var tags []string
86+
for _, line := range strings.Split(string(output), "\n") {
87+
tag := strings.TrimSpace(line)
88+
if tag == "" {
89+
continue
90+
}
91+
v, err := parseVersion(tag)
92+
if err != nil {
93+
continue
94+
}
95+
if compareVersions(v, embeddedSchemaVersion) >= 0 {
96+
tags = append(tags, tag)
97+
}
98+
}
99+
return tags, nil
100+
}
101+
102+
// getSchemaAtVersion extracts the JSON schema from the embedded file at a given version.
103+
func getSchemaAtVersion(version string) (map[string]any, error) {
104+
cmd := exec.Command("git", "show", version+":bundle/schema/jsonschema.json")
105+
output, err := cmd.Output()
106+
if err != nil {
107+
return nil, fmt.Errorf("failed to get schema at %s: %w", version, err)
108+
}
109+
110+
var schema map[string]any
111+
if err := json.Unmarshal(output, &schema); err != nil {
112+
return nil, fmt.Errorf("failed to parse schema at %s: %w", version, err)
113+
}
114+
return schema, nil
115+
}
116+
117+
// flattenSchema extracts all field paths from a JSON schema.
118+
// Returns a map of "typePath.fieldName" -> true for all fields in the schema.
119+
func flattenSchema(schema map[string]any) map[string]bool {
120+
fields := make(map[string]bool)
121+
122+
if defs, ok := schema["$defs"].(map[string]any); ok {
123+
typeDefs := walkDefs(defs, "")
124+
for typePath, propNames := range typeDefs {
125+
for _, propName := range propNames {
126+
fields[typePath+"."+propName] = true
127+
}
128+
}
129+
}
130+
131+
rootType := "github.com/databricks/cli/bundle/config.Root"
132+
if props, ok := schema["properties"].(map[string]any); ok {
133+
for propName := range props {
134+
fields[rootType+"."+propName] = true
135+
}
136+
}
137+
138+
return fields
139+
}
140+
141+
// walkDefs recursively walks $defs to extract type definitions.
142+
func walkDefs(defs map[string]any, prefix string) map[string][]string {
143+
result := make(map[string][]string)
144+
145+
for key, value := range defs {
146+
valueMap, ok := value.(map[string]any)
147+
if !ok {
148+
continue
149+
}
150+
151+
currentPath := prefix
152+
if currentPath != "" {
153+
currentPath += "/" + key
154+
} else {
155+
currentPath = key
156+
}
157+
158+
props := extractProperties(valueMap)
159+
if props != nil {
160+
var propNames []string
161+
for propName := range props {
162+
propNames = append(propNames, propName)
163+
}
164+
result[currentPath] = propNames
165+
} else if _, hasType := valueMap["type"]; !hasType {
166+
// It's a nested namespace, recurse into it
167+
maps.Copy(result, walkDefs(valueMap, currentPath))
168+
}
169+
}
170+
return result
171+
}
172+
173+
// extractProperties extracts the properties map from a schema definition,
174+
// checking direct properties first, then oneOf/anyOf variants.
175+
func extractProperties(valueMap map[string]any) map[string]any {
176+
if props, ok := valueMap["properties"].(map[string]any); ok {
177+
return props
178+
}
179+
180+
// Check oneOf and anyOf variants for properties
181+
for _, key := range []string{"oneOf", "anyOf"} {
182+
if variants, ok := valueMap[key].([]any); ok {
183+
for _, variant := range variants {
184+
if variantMap, ok := variant.(map[string]any); ok {
185+
if props, ok := variantMap["properties"].(map[string]any); ok {
186+
return props
187+
}
188+
}
189+
}
190+
}
191+
}
192+
193+
return nil
194+
}
195+
196+
// addSinceVersionToSchema applies sinceVersion annotations to the generated schema.
197+
// The sinceVersions map uses keys in the format "typePath.fieldName".
198+
func addSinceVersionToSchema(s *jsonschema.Schema, sinceVersions map[string]string) {
199+
if sinceVersions == nil || s == nil {
200+
return
201+
}
202+
203+
// Apply to root properties
204+
rootType := "github.com/databricks/cli/bundle/config.Root"
205+
for propName, prop := range s.Properties {
206+
key := rootType + "." + propName
207+
if version, ok := sinceVersions[key]; ok {
208+
prop.SinceVersion = version
209+
}
210+
}
211+
212+
// Apply to $defs - the definitions are nested maps like:
213+
// {"github.com": {"databricks": {"cli": {"bundle": {"config.Root": Schema{...}}}}}}
214+
if s.Definitions == nil {
215+
return
216+
}
217+
218+
walkDefinitions(s.Definitions, "", sinceVersions)
219+
}
220+
221+
// walkDefinitions recursively walks the nested definitions map and applies sinceVersion.
222+
func walkDefinitions(defs map[string]any, pathPrefix string, sinceVersions map[string]string) {
223+
for key, value := range defs {
224+
var currentPath string
225+
if pathPrefix != "" {
226+
currentPath = pathPrefix + "/" + key
227+
} else {
228+
currentPath = key
229+
}
230+
231+
// Try to convert to *jsonschema.Schema
232+
if schema, ok := value.(*jsonschema.Schema); ok {
233+
addSinceVersionToProperties(schema, currentPath, sinceVersions)
234+
continue
235+
}
236+
237+
// Try to convert to jsonschema.Schema (non-pointer)
238+
if schema, ok := value.(jsonschema.Schema); ok {
239+
addSinceVersionToProperties(&schema, currentPath, sinceVersions)
240+
// Update the map with the modified schema
241+
defs[key] = schema
242+
continue
243+
}
244+
245+
// Otherwise, it's a nested map - recurse into it
246+
if nestedMap, ok := value.(map[string]any); ok {
247+
walkDefinitions(nestedMap, currentPath, sinceVersions)
248+
}
249+
}
250+
}
251+
252+
// addSinceVersionToProperties applies sinceVersion to schema properties.
253+
func addSinceVersionToProperties(s *jsonschema.Schema, typePath string, sinceVersions map[string]string) {
254+
if s == nil {
255+
return
256+
}
257+
258+
// Apply to direct properties
259+
for propName, prop := range s.Properties {
260+
key := typePath + "." + propName
261+
if version, ok := sinceVersions[key]; ok {
262+
prop.SinceVersion = version
263+
}
264+
}
265+
266+
// Handle OneOf variants (need to use index to modify in place)
267+
for i := range s.OneOf {
268+
addSinceVersionToProperties(&s.OneOf[i], typePath, sinceVersions)
269+
}
270+
271+
// Handle AnyOf variants (need to use index to modify in place)
272+
for i := range s.AnyOf {
273+
addSinceVersionToProperties(&s.AnyOf[i], typePath, sinceVersions)
274+
}
275+
}

0 commit comments

Comments
 (0)