Skip to content

Commit dc572e1

Browse files
author
Varun Deep Saini
committed
Fix bundle generate job to preserve nested notebook directory structure
Signed-off-by: Varun Deep Saini <varun.23bcs10048@ms.sst.scaler.com>
1 parent 3542987 commit dc572e1

File tree

9 files changed

+276
-5
lines changed

9 files changed

+276
-5
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
bundle:
2+
name: nested_notebooks
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
resources:
2+
jobs:
3+
out:
4+
name: dev.my_repo.my_job
5+
tasks:
6+
- task_key: my_notebook_task
7+
notebook_task:
8+
notebook_path: src/my_folder/my_notebook.py
9+
- task_key: other_notebook_task
10+
notebook_task:
11+
notebook_path: src/other_folder/other_notebook.py

acceptance/bundle/generate/job_nested_notebooks/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
File successfully saved to src/my_folder/my_notebook.py
2+
File successfully saved to src/other_folder/other_notebook.py
3+
Job configuration successfully saved to out.job.yml
4+
src/my_folder/my_notebook.py
5+
src/other_folder/other_notebook.py
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
$CLI bundle generate job --existing-job-id 1234 --config-dir . --key out --force --source-dir src 2>&1 | sort
2+
find src -type f | sort
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
Ignore = ["src"]
2+
3+
[[Server]]
4+
Pattern = "GET /api/2.2/jobs/get"
5+
Response.Body = '''
6+
{
7+
"job_id": 11223344,
8+
"settings": {
9+
"name": "dev.my_repo.my_job",
10+
"tasks": [
11+
{
12+
"task_key": "my_notebook_task",
13+
"notebook_task": {
14+
"notebook_path": "/my_data_product/dev/my_folder/my_notebook"
15+
}
16+
},
17+
{
18+
"task_key": "other_notebook_task",
19+
"notebook_task": {
20+
"notebook_path": "/my_data_product/dev/other_folder/other_notebook"
21+
}
22+
}
23+
]
24+
}
25+
}
26+
'''
27+
28+
[[Server]]
29+
Pattern = "GET /api/2.0/workspace/get-status"
30+
Response.Body = '''
31+
{
32+
"object_type": "NOTEBOOK",
33+
"language": "PYTHON",
34+
"repos_export_format": "SOURCE"
35+
}
36+
'''
37+
38+
[[Server]]
39+
Pattern = "GET /api/2.0/workspace/export"
40+
Response.Body = '''
41+
print("Hello, World!")
42+
'''

bundle/generate/downloader.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,52 @@ func (n *Downloader) markNotebookForDownload(ctx context.Context, notebookPath *
207207
return nil
208208
}
209209

210+
func (n *Downloader) MarkTasksForDownload(ctx context.Context, tasks []jobs.Task) error {
211+
var paths []string
212+
for _, task := range tasks {
213+
if task.NotebookTask != nil {
214+
paths = append(paths, task.NotebookTask.NotebookPath)
215+
}
216+
}
217+
if len(paths) > 0 {
218+
n.basePath = commonDirPrefix(paths)
219+
}
220+
for i := range tasks {
221+
if err := n.MarkTaskForDownload(ctx, &tasks[i]); err != nil {
222+
return err
223+
}
224+
}
225+
return nil
226+
}
227+
228+
// commonDirPrefix returns the longest common directory-aligned prefix of the given paths.
229+
func commonDirPrefix(paths []string) string {
230+
if len(paths) == 0 {
231+
return ""
232+
}
233+
if len(paths) == 1 {
234+
return path.Dir(paths[0])
235+
}
236+
237+
prefix := paths[0]
238+
for _, p := range paths[1:] {
239+
for !strings.HasPrefix(p, prefix) {
240+
prefix = prefix[:len(prefix)-1]
241+
if prefix == "" {
242+
return ""
243+
}
244+
}
245+
}
246+
247+
// Truncate to last '/' to ensure directory alignment.
248+
if i := strings.LastIndex(prefix, "/"); i >= 0 {
249+
prefix = prefix[:i]
250+
} else {
251+
prefix = ""
252+
}
253+
return prefix
254+
}
255+
210256
func (n *Downloader) relativePath(fullPath string) string {
211257
basePath := path.Dir(fullPath)
212258
if n.basePath != "" {

bundle/generate/downloader_test.go

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@ package generate
22

33
import (
44
"context"
5+
"encoding/json"
6+
"net/http"
7+
"net/http/httptest"
58
"path/filepath"
69
"testing"
710

11+
"github.com/databricks/databricks-sdk-go"
812
"github.com/databricks/databricks-sdk-go/experimental/mocks"
13+
"github.com/databricks/databricks-sdk-go/service/jobs"
914
"github.com/databricks/databricks-sdk-go/service/workspace"
1015
"github.com/stretchr/testify/assert"
1116
"github.com/stretchr/testify/require"
@@ -94,3 +99,158 @@ func TestDownloader_DoesNotRecurseIntoNodeModules(t *testing.T) {
9499
assert.Contains(t, downloader.files, filepath.Join(sourceDir, "app.py"))
95100
assert.Contains(t, downloader.files, filepath.Join(sourceDir, "src/index.js"))
96101
}
102+
103+
func TestCommonDirPrefix(t *testing.T) {
104+
tests := []struct {
105+
name string
106+
paths []string
107+
want string
108+
}{
109+
{
110+
name: "empty",
111+
paths: nil,
112+
want: "",
113+
},
114+
{
115+
name: "single path",
116+
paths: []string{"/a/b/c"},
117+
want: "/a/b",
118+
},
119+
{
120+
name: "shared parent",
121+
paths: []string{"/a/b/c", "/a/b/d"},
122+
want: "/a/b",
123+
},
124+
{
125+
name: "root divergence",
126+
paths: []string{"/x/y", "/z/w"},
127+
want: "",
128+
},
129+
{
130+
name: "partial dir name safety",
131+
paths: []string{"/a/bc/d", "/a/bd/e"},
132+
want: "/a",
133+
},
134+
{
135+
name: "nested shared prefix",
136+
paths: []string{"/Users/user/project/etl/extract", "/Users/user/project/reporting/dashboard"},
137+
want: "/Users/user/project",
138+
},
139+
{
140+
name: "identical paths",
141+
paths: []string{"/a/b/c", "/a/b/c"},
142+
want: "/a/b",
143+
},
144+
}
145+
for _, tt := range tests {
146+
t.Run(tt.name, func(t *testing.T) {
147+
assert.Equal(t, tt.want, commonDirPrefix(tt.paths))
148+
})
149+
}
150+
}
151+
152+
func newTestWorkspaceClient(t *testing.T, handler http.HandlerFunc) *databricks.WorkspaceClient {
153+
server := httptest.NewServer(handler)
154+
t.Cleanup(server.Close)
155+
156+
w, err := databricks.NewWorkspaceClient(&databricks.Config{
157+
Host: server.URL,
158+
Token: "test-token",
159+
})
160+
require.NoError(t, err)
161+
return w
162+
}
163+
164+
func notebookStatusHandler(t *testing.T) http.HandlerFunc {
165+
return func(w http.ResponseWriter, r *http.Request) {
166+
if r.URL.Path != "/api/2.0/workspace/get-status" {
167+
t.Fatalf("unexpected request path: %s", r.URL.Path)
168+
}
169+
resp := workspaceStatus{
170+
Language: workspace.LanguagePython,
171+
ObjectType: workspace.ObjectTypeNotebook,
172+
ExportFormat: workspace.ExportFormatSource,
173+
}
174+
w.Header().Set("Content-Type", "application/json")
175+
err := json.NewEncoder(w).Encode(resp)
176+
if err != nil {
177+
t.Fatal(err)
178+
}
179+
}
180+
}
181+
182+
func TestDownloader_MarkTasksForDownload_PreservesStructure(t *testing.T) {
183+
ctx := context.Background()
184+
w := newTestWorkspaceClient(t, notebookStatusHandler(t))
185+
186+
dir := "base/dir"
187+
sourceDir := filepath.Join(dir, "source")
188+
configDir := filepath.Join(dir, "config")
189+
downloader := NewDownloader(w, sourceDir, configDir)
190+
191+
tasks := []jobs.Task{
192+
{
193+
TaskKey: "extract_task",
194+
NotebookTask: &jobs.NotebookTask{
195+
NotebookPath: "/Users/user/project/etl/extract",
196+
},
197+
},
198+
{
199+
TaskKey: "dashboard_task",
200+
NotebookTask: &jobs.NotebookTask{
201+
NotebookPath: "/Users/user/project/reporting/dashboard",
202+
},
203+
},
204+
}
205+
206+
err := downloader.MarkTasksForDownload(ctx, tasks)
207+
require.NoError(t, err)
208+
209+
assert.Equal(t, filepath.FromSlash("../source/etl/extract.py"), tasks[0].NotebookTask.NotebookPath)
210+
assert.Equal(t, filepath.FromSlash("../source/reporting/dashboard.py"), tasks[1].NotebookTask.NotebookPath)
211+
assert.Len(t, downloader.files, 2)
212+
}
213+
214+
func TestDownloader_MarkTasksForDownload_SingleNotebook(t *testing.T) {
215+
ctx := context.Background()
216+
w := newTestWorkspaceClient(t, notebookStatusHandler(t))
217+
218+
dir := "base/dir"
219+
sourceDir := filepath.Join(dir, "source")
220+
configDir := filepath.Join(dir, "config")
221+
downloader := NewDownloader(w, sourceDir, configDir)
222+
223+
tasks := []jobs.Task{
224+
{
225+
TaskKey: "task1",
226+
NotebookTask: &jobs.NotebookTask{
227+
NotebookPath: "/Users/user/project/notebook",
228+
},
229+
},
230+
}
231+
232+
err := downloader.MarkTasksForDownload(ctx, tasks)
233+
require.NoError(t, err)
234+
235+
// Single notebook: basePath = path.Dir => same as old behavior.
236+
assert.Equal(t, filepath.FromSlash("../source/notebook.py"), tasks[0].NotebookTask.NotebookPath)
237+
assert.Len(t, downloader.files, 1)
238+
}
239+
240+
func TestDownloader_MarkTasksForDownload_NoNotebooks(t *testing.T) {
241+
ctx := context.Background()
242+
w := newTestWorkspaceClient(t, func(w http.ResponseWriter, r *http.Request) {
243+
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
244+
})
245+
246+
downloader := NewDownloader(w, "source", "config")
247+
248+
tasks := []jobs.Task{
249+
{TaskKey: "spark_task"},
250+
{TaskKey: "python_wheel_task"},
251+
}
252+
253+
err := downloader.MarkTasksForDownload(ctx, tasks)
254+
require.NoError(t, err)
255+
assert.Empty(t, downloader.files)
256+
}

cmd/bundle/generate/job.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,9 @@ After generation, you can deploy this job to other targets using:
9292
if job.Settings.GitSource != nil {
9393
cmdio.LogString(ctx, "Job is using Git source, skipping downloading files")
9494
} else {
95-
for _, task := range job.Settings.Tasks {
96-
err := downloader.MarkTaskForDownload(ctx, &task)
97-
if err != nil {
98-
return err
99-
}
95+
err = downloader.MarkTasksForDownload(ctx, job.Settings.Tasks)
96+
if err != nil {
97+
return err
10098
}
10199
}
102100

0 commit comments

Comments
 (0)