Skip to content

Commit 4876824

Browse files
author
Varun Deep Saini
committed
Fix bundle generate job to preserve nested notebook directory structure
Signed-off-by: Varun Deep Saini <varun.23bcs10048@ms.sst.scaler.com>
1 parent ce3b35e commit 4876824

File tree

12 files changed

+364
-10
lines changed

12 files changed

+364
-10
lines changed

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
### Bundles
88
* Modify grants to use SDK types ([#4666](https://github.com/databricks/cli/pull/4666))
9+
* Fix `bundle generate` job to preserve nested notebook directory structure ([#4596](https://github.com/databricks/cli/pull/4596))
910

1011
### Dependency updates
1112
* Bump databricks-sdk-go from v0.112.0 to v0.119.0 ([#4631](https://github.com/databricks/cli/pull/4631), [#4695](https://github.com/databricks/cli/pull/4695))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
bundle:
2+
name: nested_notebooks
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
resources:
2+
jobs:
3+
out:
4+
name: dev.my_repo.my_job
5+
tasks:
6+
- task_key: my_notebook_task
7+
notebook_task:
8+
notebook_path: src/my_folder/my_notebook.py
9+
- task_key: other_notebook_task
10+
notebook_task:
11+
notebook_path: src/other_folder/other_notebook.py

acceptance/bundle/generate/job_nested_notebooks/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
File successfully saved to src/my_folder/my_notebook.py
2+
File successfully saved to src/other_folder/other_notebook.py
3+
Job configuration successfully saved to out.job.yml
4+
=== old flattened files should be gone ===
5+
src/my_notebook.py removed
6+
src/other_notebook.py removed
7+
=== new nested files ===
8+
src/my_folder/my_notebook.py
9+
src/other_folder/other_notebook.py
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
mkdir -p src
2+
echo "old" > src/my_notebook.py
3+
echo "old" > src/other_notebook.py
4+
5+
$CLI bundle generate job --existing-job-id 1234 --config-dir . --key out --force --source-dir src 2>&1 | sort
6+
7+
echo "=== old flattened files should be gone ==="
8+
test ! -f src/my_notebook.py && echo "src/my_notebook.py removed" || echo "src/my_notebook.py still exists"
9+
test ! -f src/other_notebook.py && echo "src/other_notebook.py removed" || echo "src/other_notebook.py still exists"
10+
11+
echo "=== new nested files ==="
12+
find src -type f | sort
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
Ignore = ["src"]
2+
3+
[[Server]]
4+
Pattern = "GET /api/2.2/jobs/get"
5+
Response.Body = '''
6+
{
7+
"job_id": 11223344,
8+
"settings": {
9+
"name": "dev.my_repo.my_job",
10+
"tasks": [
11+
{
12+
"task_key": "my_notebook_task",
13+
"notebook_task": {
14+
"notebook_path": "/my_data_product/dev/my_folder/my_notebook"
15+
}
16+
},
17+
{
18+
"task_key": "other_notebook_task",
19+
"notebook_task": {
20+
"notebook_path": "/my_data_product/dev/other_folder/other_notebook"
21+
}
22+
}
23+
]
24+
}
25+
}
26+
'''
27+
28+
[[Server]]
29+
Pattern = "GET /api/2.0/workspace/get-status"
30+
Response.Body = '''
31+
{
32+
"object_type": "NOTEBOOK",
33+
"language": "PYTHON",
34+
"repos_export_format": "SOURCE"
35+
}
36+
'''
37+
38+
[[Server]]
39+
Pattern = "GET /api/2.0/workspace/export"
40+
Response.Body = '''
41+
print("Hello, World!")
42+
'''

bundle/generate/downloader.go

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212

1313
"github.com/databricks/cli/libs/cmdio"
14+
"github.com/databricks/cli/libs/log"
1415
"github.com/databricks/cli/libs/notebook"
1516
"github.com/databricks/databricks-sdk-go"
1617
"github.com/databricks/databricks-sdk-go/service/jobs"
@@ -73,7 +74,7 @@ func (n *Downloader) markFileForDownload(ctx context.Context, filePath *string)
7374
return err
7475
}
7576

76-
*filePath = rel
77+
*filePath = filepath.ToSlash(rel)
7778
return nil
7879
}
7980

@@ -109,7 +110,7 @@ func (n *Downloader) MarkDirectoryForDownload(ctx context.Context, dirPath *stri
109110
return err
110111
}
111112

112-
*dirPath = rel
113+
*dirPath = filepath.ToSlash(rel)
113114
return nil
114115
}
115116

@@ -203,10 +204,75 @@ func (n *Downloader) markNotebookForDownload(ctx context.Context, notebookPath *
203204
return err
204205
}
205206

206-
*notebookPath = rel
207+
*notebookPath = filepath.ToSlash(rel)
207208
return nil
208209
}
209210

211+
func (n *Downloader) MarkTasksForDownload(ctx context.Context, tasks []jobs.Task) error {
212+
var paths []string
213+
for _, task := range tasks {
214+
if task.NotebookTask != nil {
215+
paths = append(paths, task.NotebookTask.NotebookPath)
216+
}
217+
}
218+
if len(paths) > 0 {
219+
n.basePath = commonDirPrefix(paths)
220+
}
221+
for i := range tasks {
222+
if err := n.MarkTaskForDownload(ctx, &tasks[i]); err != nil {
223+
return err
224+
}
225+
}
226+
return nil
227+
}
228+
229+
func (n *Downloader) CleanupOldFiles(ctx context.Context) {
230+
for targetPath := range n.files {
231+
rel, err := filepath.Rel(n.sourceDir, targetPath)
232+
if err != nil {
233+
continue
234+
}
235+
if filepath.Base(rel) == rel {
236+
continue
237+
}
238+
oldPath := filepath.Join(n.sourceDir, filepath.Base(rel))
239+
if _, isNewFile := n.files[oldPath]; isNewFile {
240+
continue
241+
}
242+
if err := os.Remove(oldPath); err == nil {
243+
log.Infof(ctx, "Removed previously generated file %s", filepath.ToSlash(oldPath))
244+
}
245+
}
246+
}
247+
248+
// commonDirPrefix returns the longest common directory-aligned prefix of the given paths.
249+
func commonDirPrefix(paths []string) string {
250+
if len(paths) == 0 {
251+
return ""
252+
}
253+
if len(paths) == 1 {
254+
return path.Dir(paths[0])
255+
}
256+
257+
prefix := paths[0]
258+
for _, p := range paths[1:] {
259+
for !strings.HasPrefix(p, prefix) {
260+
prefix = prefix[:len(prefix)-1]
261+
if prefix == "" {
262+
return ""
263+
}
264+
}
265+
}
266+
267+
// Truncate to last '/' to ensure directory alignment.
268+
if i := strings.LastIndex(prefix, "/"); i >= 0 {
269+
prefix = prefix[:i]
270+
} else {
271+
prefix = ""
272+
}
273+
return prefix
274+
}
275+
210276
func (n *Downloader) relativePath(fullPath string) string {
211277
basePath := path.Dir(fullPath)
212278
if n.basePath != "" {

0 commit comments

Comments
 (0)