Skip to content

Commit 05f860a

Browse files
authored
Support changing CLI defaults in config-sync (#4427)
## Changes 1. Support changing fields that have CLI defaults but not defined in the config 2. Support adding new tasks when task array already exists 3. Support task renaming (new field will be added, previously - error) 4. Add an explicit reset value for the `queue.enabled`, as remote returns it as `nil` when the queueing is disabled, and we can't define `nil` in config 5. Simplify logic with server-side defaults using `reflect.DeepEqual`, and move them to a separate file `defaults.go` 6. Remove SDK JSON marshaling round-trip, use `dyn.FromTyped` instead 7. Handle shift in indices - now when element is removed from the sequence we adjust indices in all following operations 8. Filter server-side defaults from created payloads (e.g., new tasks are added - in `plan -> cd.Remote` we have payload with server-side defaults, which are applied to the config) ## Why Some bugs were found after testing the feature in the workspace ## Tests Added tests for new tasks, task renames, and for CLI defaults <!-- If your PR needs to be included in the release notes for next release, add a separate entry in NEXT_CHANGELOG.md as part of your PR. -->
1 parent c18dfac commit 05f860a

File tree

13 files changed

+681
-229
lines changed

13 files changed

+681
-229
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
bundle:
2+
name: test-bundle-$UNIQUE_NAME
3+
4+
targets:
5+
default:
6+
mode: development
7+
8+
resources:
9+
jobs:
10+
job1:
11+
trigger:
12+
periodic:
13+
interval: 1
14+
unit: DAYS
15+
tasks:
16+
- task_key: main
17+
notebook_task:
18+
notebook_path: /Users/{{workspace_user_name}}/notebook
19+
new_cluster:
20+
spark_version: $DEFAULT_SPARK_VERSION
21+
node_type_id: $NODE_TYPE_ID
22+
num_workers: 1
23+
24+
job2:
25+
tasks:
26+
- task_key: main
27+
notebook_task:
28+
notebook_path: /Users/{{workspace_user_name}}/notebook
29+
new_cluster:
30+
spark_version: $DEFAULT_SPARK_VERSION
31+
node_type_id: $NODE_TYPE_ID
32+
num_workers: 1
33+
34+
pipelines:
35+
pipeline1:
36+
name: test-pipeline-$UNIQUE_NAME
37+
libraries:
38+
- notebook:
39+
path: /Users/{{workspace_user_name}}/notebook

acceptance/bundle/config-remote-sync/cli_defaults/out.test.toml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files...
2+
Deploying resources...
3+
Updating deployment state...
4+
Deployment complete!
5+
6+
=== Modify jobs
7+
Change max_concurrent_runs, name, change default tag
8+
9+
=== Modify pipeline
10+
Change edition and channel
11+
12+
=== Detect and save all changes
13+
Detected changes in 2 resource(s):
14+
15+
Resource: resources.jobs.job1
16+
max_concurrent_runs: add
17+
name: add
18+
tags['dev']: add
19+
trigger.pause_status: add
20+
21+
Resource: resources.pipelines.pipeline1
22+
channel: add
23+
edition: add
24+
25+
26+
27+
=== Configuration changes
28+
29+
>>> diff.py databricks.yml.backup databricks.yml
30+
--- databricks.yml.backup
31+
+++ databricks.yml
32+
@@ -1,9 +1,7 @@
33+
bundle:
34+
name: test-bundle-[UNIQUE_NAME]
35+
-
36+
targets:
37+
default:
38+
mode: development
39+
-
40+
resources:
41+
jobs:
42+
@@ -13,4 +11,5 @@
43+
interval: 1
44+
unit: DAYS
45+
+ pause_status: UNPAUSED
46+
tasks:
47+
- task_key: main
48+
@@ -21,5 +20,8 @@
49+
node_type_id: [NODE_TYPE_ID]
50+
num_workers: 1
51+
-
52+
+ tags:
53+
+ dev: default_tag_changed
54+
+ max_concurrent_runs: 5
55+
+ name: Custom Job Name
56+
job2:
57+
tasks:
58+
@@ -31,5 +33,4 @@
59+
node_type_id: [NODE_TYPE_ID]
60+
num_workers: 1
61+
-
62+
pipelines:
63+
pipeline1:
64+
@@ -38,2 +39,4 @@
65+
- notebook:
66+
path: /Users/{{workspace_user_name}}/notebook
67+
+ channel: PREVIEW
68+
+ edition: CORE
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/bin/bash
2+
3+
envsubst < databricks.yml.tmpl > databricks.yml
4+
5+
$CLI bundle deploy
6+
job1_id="$(read_id.py job1)"
7+
job2_id="$(read_id.py job2)"
8+
pipeline1_id="$(read_id.py pipeline1)"
9+
10+
title "Modify jobs"
11+
echo
12+
echo "Change max_concurrent_runs, name, change default tag"
13+
edit_resource.py jobs $job1_id <<EOF
14+
r["max_concurrent_runs"] = 5
15+
r["name"] = "Custom Job Name"
16+
r["tags"]["dev"] = "default_tag_changed"
17+
r["trigger"] = {
18+
"pause_status":"UNPAUSED",
19+
"periodic": {
20+
"interval":1,
21+
"unit":"DAYS"
22+
}
23+
}
24+
EOF
25+
26+
# Disabled for now - queue removal test
27+
# echo
28+
# echo "Disable queueing"
29+
# edit_resource.py jobs $job2_id <<EOF
30+
# r["queue"] = None
31+
# EOF
32+
33+
title "Modify pipeline"
34+
echo
35+
echo "Change edition and channel"
36+
edit_resource.py pipelines $pipeline1_id <<EOF
37+
r["edition"] = "CORE"
38+
r["channel"] = "PREVIEW"
39+
EOF
40+
41+
title "Detect and save all changes"
42+
echo
43+
cp databricks.yml databricks.yml.backup
44+
$CLI bundle config-remote-sync --save
45+
46+
title "Configuration changes"
47+
echo
48+
trace diff.py databricks.yml.backup databricks.yml
49+
rm databricks.yml.backup
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Cloud = true
2+
3+
RecordRequests = false
4+
Ignore = [".databricks", "dummy.whl", "databricks.yml", "databricks.yml.backup"]
5+
6+
[Env]
7+
DATABRICKS_BUNDLE_ENABLE_EXPERIMENTAL_YAML_SYNC = "true"
8+
9+
[EnvMatrix]
10+
DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"]

acceptance/bundle/config-remote-sync/job_multiple_tasks/databricks.yml.tmpl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,22 @@ resources:
3939
num_workers: 1
4040
depends_on:
4141
- task_key: task3
42+
43+
rename_task_job:
44+
tasks:
45+
- task_key: task_rename_1
46+
notebook_task:
47+
notebook_path: /Users/{{workspace_user_name}}/rename_task_1
48+
new_cluster:
49+
spark_version: $DEFAULT_SPARK_VERSION
50+
node_type_id: $NODE_TYPE_ID
51+
num_workers: 1
52+
- task_key: task_rename_2
53+
notebook_task:
54+
notebook_path: /Users/{{workspace_user_name}}/rename_task_2
55+
new_cluster:
56+
spark_version: $DEFAULT_SPARK_VERSION
57+
node_type_id: $NODE_TYPE_ID
58+
num_workers: 1
59+
depends_on:
60+
- task_key: task_rename_1

acceptance/bundle/config-remote-sync/job_multiple_tasks/output.txt

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@ Deployment complete!
88
Detected changes in 1 resource(s):
99

1010
Resource: resources.jobs.my_job
11+
tasks[task_key='new_task']: add
1112
tasks[task_key='task2']: remove
1213
tasks[task_key='task3'].depends_on[0].task_key: replace
1314
tasks[task_key='task3'].new_cluster.num_workers: replace
1415
tasks[task_key='task3'].timeout_seconds: add
1516

1617

1718

18-
=== Configuration changes
19+
=== Configuration changes for new task
1920

2021
>>> diff.py databricks.yml.backup databricks.yml
2122
--- databricks.yml.backup
@@ -26,26 +27,83 @@ Resource: resources.jobs.my_job
2627
-
2728
resources:
2829
jobs:
29-
@@ -13,13 +12,4 @@
30+
@@ -13,13 +12,11 @@
3031
node_type_id: [NODE_TYPE_ID]
3132
num_workers: 1
3233
- - task_key: task2
33-
- notebook_task:
34+
+ - new_cluster:
35+
+ node_type_id: [NODE_TYPE_ID]
36+
+ num_workers: 1
37+
+ spark_version: 13.3.x-snapshot-scala2.12
38+
notebook_task:
3439
- notebook_path: /Users/{{workspace_user_name}}/task2
3540
- new_cluster:
3641
- spark_version: 13.3.x-snapshot-scala2.12
3742
- node_type_id: [NODE_TYPE_ID]
3843
- num_workers: 2
3944
- depends_on:
4045
- - task_key: task1
46+
+ notebook_path: /Users/[USERNAME]/new_task
47+
+ task_key: new_task
4148
- task_key: task3
4249
notebook_task:
43-
@@ -37,5 +27,6 @@
50+
@@ -28,7 +25,8 @@
4451
spark_version: 13.3.x-snapshot-scala2.12
4552
node_type_id: [NODE_TYPE_ID]
46-
- num_workers: 1
53+
- num_workers: 2
4754
+ num_workers: 5
4855
depends_on:
49-
- - task_key: task3
56+
- - task_key: task2
5057
+ - task_key: task1
5158
+ timeout_seconds: 3600
59+
- task_key: task4
60+
notebook_task:
61+
@@ -40,5 +38,4 @@
62+
depends_on:
63+
- task_key: task3
64+
-
65+
rename_task_job:
66+
tasks:
67+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files...
68+
Deploying resources...
69+
Updating deployment state...
70+
Deployment complete!
71+
72+
=== Rename task_rename_1 to task_rename_new
73+
=== Detect task key rename
74+
Detected changes in 1 resource(s):
75+
76+
Resource: resources.jobs.rename_task_job
77+
tasks[task_key='task_rename_1']: remove
78+
tasks[task_key='task_rename_2'].depends_on[0].task_key: replace
79+
tasks[task_key='task_rename_new']: add
80+
81+
82+
83+
=== Configuration changes for task key rename
84+
85+
>>> diff.py databricks.yml.backup2 databricks.yml
86+
--- databricks.yml.backup2
87+
+++ databricks.yml
88+
@@ -40,11 +40,11 @@
89+
rename_task_job:
90+
tasks:
91+
- - task_key: task_rename_1
92+
+ - new_cluster:
93+
+ node_type_id: [NODE_TYPE_ID]
94+
+ num_workers: 1
95+
+ spark_version: 13.3.x-snapshot-scala2.12
96+
notebook_task:
97+
notebook_path: /Users/{{workspace_user_name}}/rename_task_1
98+
- new_cluster:
99+
- spark_version: 13.3.x-snapshot-scala2.12
100+
- node_type_id: [NODE_TYPE_ID]
101+
- num_workers: 1
102+
+ task_key: task_rename_new
103+
- task_key: task_rename_2
104+
notebook_task:
105+
@@ -55,3 +55,3 @@
106+
num_workers: 1
107+
depends_on:
108+
- - task_key: task_rename_1
109+
+ - task_key: task_rename_new

acceptance/bundle/config-remote-sync/job_multiple_tasks/script

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,57 @@ for task in r["tasks"]:
1717
1818
r["tasks"] = [task for task in r["tasks"] if task["task_key"] != "task2"]
1919
20+
r["tasks"].append({
21+
"task_key": "new_task",
22+
"notebook_task": {
23+
"notebook_path": "/Users/${CURRENT_USER_NAME}/new_task"
24+
},
25+
"new_cluster": {
26+
"spark_version": "${DEFAULT_SPARK_VERSION}",
27+
"node_type_id": "${NODE_TYPE_ID}",
28+
"num_workers": 1
29+
}
30+
})
2031
EOF
2132

2233
title "Detect and save changes"
2334
echo
2435
cp databricks.yml databricks.yml.backup
2536
$CLI bundle config-remote-sync --save
2637

27-
title "Configuration changes"
38+
title "Configuration changes for new task"
2839
echo
2940
trace diff.py databricks.yml.backup databricks.yml
3041
rm databricks.yml.backup
42+
43+
# Resolve environment variables that may have been added by config-remote-sync
44+
envsubst < databricks.yml > databricks.yml.resolved
45+
mv databricks.yml.resolved databricks.yml
46+
47+
# Deploy the updated configuration to sync state
48+
$CLI bundle deploy
49+
50+
title "Rename task_rename_1 to task_rename_new"
51+
rename_job_id="$(read_id.py rename_task_job)"
52+
edit_resource.py jobs $rename_job_id <<'EOF'
53+
for task in r["tasks"]:
54+
if task["task_key"] == "task_rename_1":
55+
task["task_key"] = "task_rename_new"
56+
# Update dependencies that reference the old key
57+
if "depends_on" in task:
58+
for dep in task["depends_on"]:
59+
if dep["task_key"] == "task_rename_1":
60+
dep["task_key"] = "task_rename_new"
61+
EOF
62+
63+
title "Detect task key rename"
64+
echo
65+
cp databricks.yml databricks.yml.backup2
66+
$CLI bundle config-remote-sync --save || true
67+
68+
title "Configuration changes for task key rename"
69+
echo
70+
if [ -f databricks.yml.backup2 ]; then
71+
trace diff.py databricks.yml.backup2 databricks.yml || true
72+
rm databricks.yml.backup2
73+
fi

0 commit comments

Comments
 (0)