Skip to content

Commit 2d39dd8

Browse files
authored
Fix pulling remote state snapshot in config-remote-sync (#4444)
## Changes Currently, CLI stores the remote resource snapshot file to the `/Workspace/Users/<user>.bundle/<bundle>/dev/state/resources-config-sync-snapshot.json`, but during config-remote-sync it's never used; the command only relies on the snapshot from the cache. This PR adds a step to the config-remote-sync to ensure that the snapshot is loaded ## Why <!-- Why are these changes needed? Provide the context that the reviewer might be missing. For example, were there any decisions behind the change that are not reflected in the code itself? --> In DABs in the Workspace, cache is flushed when compute is reattached, which leads to unexpected behavior ## Tests <!-- How have you tested the changes? --> <!-- If your PR needs to be included in the release notes for next release, add a separate entry in NEXT_CHANGELOG.md as part of your PR. -->
1 parent ee62276 commit 2d39dd8

File tree

6 files changed

+145
-0
lines changed

6 files changed

+145
-0
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
bundle:
2+
name: test-bundle-$UNIQUE_NAME
3+
4+
resources:
5+
jobs:
6+
test_job:
7+
max_concurrent_runs: 1
8+
tasks:
9+
- task_key: main
10+
notebook_task:
11+
notebook_path: /Users/{{workspace_user_name}}/notebook
12+
new_cluster:
13+
spark_version: $DEFAULT_SPARK_VERSION
14+
node_type_id: $NODE_TYPE_ID
15+
num_workers: 1

acceptance/bundle/config-remote-sync/flushed_cache/out.test.toml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
=== Deploy bundle
3+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files...
4+
Deploying resources...
5+
Updating deployment state...
6+
Deployment complete!
7+
8+
=== Get job ID and modify remote resource
9+
Modify max_concurrent_runs to 5
10+
11+
=== Flush local cache
12+
Removing .databricks directory to simulate flushed cache
13+
ls: .databricks: No such file or directory
14+
Cache directory removed
15+
16+
=== Check for changes after cache flush
17+
Detected changes in 1 resource(s):
18+
19+
Resource: resources.jobs.test_job
20+
max_concurrent_runs: replace
21+
22+
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
envsubst < databricks.yml.tmpl > databricks.yml
4+
5+
title "Deploy bundle"
6+
echo
7+
$CLI bundle deploy
8+
9+
title "Get job ID and modify remote resource"
10+
echo
11+
job_id="$(read_id.py test_job)"
12+
echo "Modify max_concurrent_runs to 5"
13+
edit_resource.py jobs $job_id <<EOF
14+
r["max_concurrent_runs"] = 5
15+
EOF
16+
17+
title "Flush local cache"
18+
echo
19+
echo "Removing .databricks directory to simulate flushed cache"
20+
rm -rf .databricks
21+
ls -la .databricks 2>&1 || echo "Cache directory removed"
22+
23+
title "Check for changes after cache flush"
24+
echo
25+
$CLI bundle config-remote-sync
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Cloud = true
2+
3+
RecordRequests = false
4+
Ignore = [".databricks", "databricks.yml"]
5+
6+
[Env]
7+
DATABRICKS_BUNDLE_ENABLE_EXPERIMENTAL_YAML_SYNC = "true"
8+
9+
[EnvMatrix]
10+
DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"]
11+
12+
# Normalize ls error messages across platforms
13+
[[Repls]]
14+
Old = "ls: cannot access '\\.databricks': No such file or directory"
15+
New = "ls: .databricks: No such file or directory"

bundle/configsync/diff.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@ package configsync
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
7+
"io"
8+
"io/fs"
9+
"os"
10+
"path/filepath"
611

712
"github.com/databricks/cli/bundle"
813
"github.com/databricks/cli/bundle/config/engine"
14+
"github.com/databricks/cli/bundle/deploy"
915
"github.com/databricks/cli/bundle/deployplan"
1016
"github.com/databricks/cli/bundle/direct"
1117
"github.com/databricks/cli/libs/dyn"
@@ -120,6 +126,11 @@ func convertChangeDesc(path string, cd *deployplan.ChangeDesc) (*ConfigChangeDes
120126
func DetectChanges(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) (Changes, error) {
121127
changes := make(Changes)
122128

129+
err := ensureSnapshotAvailable(ctx, b, engine)
130+
if err != nil {
131+
return nil, fmt.Errorf("state snapshot not available: %w", err)
132+
}
133+
123134
deployBundle := &direct.DeploymentBundle{}
124135
var statePath string
125136
if engine.IsDirect() {
@@ -164,3 +175,52 @@ func DetectChanges(ctx context.Context, b *bundle.Bundle, engine engine.EngineTy
164175

165176
return changes, nil
166177
}
178+
179+
func ensureSnapshotAvailable(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) error {
180+
if engine.IsDirect() {
181+
return nil
182+
}
183+
184+
remotePathSnapshot, localPathSnapshot := b.StateFilenameConfigSnapshot(ctx)
185+
186+
if _, err := os.Stat(localPathSnapshot); err == nil {
187+
return nil
188+
} else if !errors.Is(err, fs.ErrNotExist) {
189+
return fmt.Errorf("checking snapshot file: %w", err)
190+
}
191+
192+
log.Debugf(ctx, "Resources state snapshot not found locally, pulling from remote")
193+
194+
f, err := deploy.StateFiler(b)
195+
if err != nil {
196+
return fmt.Errorf("getting state filer: %w", err)
197+
}
198+
199+
r, err := f.Read(ctx, remotePathSnapshot)
200+
if err != nil {
201+
if errors.Is(err, fs.ErrNotExist) {
202+
return fmt.Errorf("resources state snapshot not found remotely at %s", remotePathSnapshot)
203+
}
204+
return fmt.Errorf("reading remote snapshot: %w", err)
205+
}
206+
defer r.Close()
207+
208+
content, err := io.ReadAll(r)
209+
if err != nil {
210+
return fmt.Errorf("reading snapshot content: %w", err)
211+
}
212+
213+
localStateDir := filepath.Dir(localPathSnapshot)
214+
err = os.MkdirAll(localStateDir, 0o700)
215+
if err != nil {
216+
return fmt.Errorf("creating snapshot directory: %w", err)
217+
}
218+
219+
err = os.WriteFile(localPathSnapshot, content, 0o600)
220+
if err != nil {
221+
return fmt.Errorf("writing snapshot file: %w", err)
222+
}
223+
224+
log.Debugf(ctx, "Pulled config snapshot from remote to %s", localPathSnapshot)
225+
return nil
226+
}

0 commit comments

Comments
 (0)