databricks
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/databricks.yml‎
Lines changed: 25 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/databricks.yml‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/out.test.toml‎
Lines changed: 5 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/out.test.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/output.txt‎
Lines changed: 56 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/output.txt‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/script‎
Lines changed: 35 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/script‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/sort_warnings.py‎
Lines changed: 87 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/sort_warnings.py‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/test.py‎
Lines changed: 1 addition & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/test.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/test.toml‎
Lines changed: 13 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/corrupted-wal-entry/test.toml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/crash-after-create/databricks.yml‎
Lines changed: 15 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/crash-after-create/databricks.yml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/crash-after-create/out.test.toml‎
Lines changed: 5 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/crash-after-create/out.test.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎acceptance/bundle/deploy/wal/crash-after-create/output.txt‎
Lines changed: 38 additions & 0 deletions b/‎acceptance/bundle/deploy/wal/crash-after-create/output.txt‎
Lines changed: 38 additions & 0 deletions
@@ -0,0 +1,25 @@
+bundle:
+  name: wal-corrupted-test
+
+resources:
+  jobs:
+    valid_job:
+      name: "valid-job"
+      tasks:
+        - task_key: "task-a"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
+            num_workers: 0
+    another_valid:
+      name: "another-valid"
+      tasks:
+        - task_key: "task-b"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
+            num_workers: 0
@@ -0,0 +1,56 @@
+=== Creating state file with serial 5 ===
+=== Creating WAL with corrupted entry ===
+=== WAL content ===
+{"lineage":"test-lineage-123","serial": [SERIAL]}
+{"k":"resources.jobs.valid_job","v":{"__id__": "[ID]","state":{"name":"valid-job"}}}
+not valid json - this line should be skipped
+{"k":"resources.jobs.another_valid","v":{"__id__": "[ID]","state":{"name":"another-valid"}}}
+=== Deploy (should recover valid entries, skip corrupted) ===
+
+>>> [CLI] bundle deploy
+Warning: Single node cluster is not correctly configured
+  at resources.jobs.another_valid.tasks[0].new_cluster
+  in databricks.yml:23:13
+
+num_workers should be 0 only for single-node clusters. To create a
+valid single node cluster please ensure that the following properties
+are correctly set in the cluster specification:
+
+  spark_conf:
+    spark.databricks.cluster.profile: singleNode
+    spark.master: local[*]
+
+  custom_tags:
+    ResourceClass: SingleNode
+  
+
+Warning: Single node cluster is not correctly configured
+  at resources.jobs.valid_job.tasks[0].new_cluster
+  in databricks.yml:13:13
+
+num_workers should be 0 only for single-node clusters. To create a
+valid single node cluster please ensure that the following properties
+are correctly set in the cluster specification:
+
+  spark_conf:
+    spark.databricks.cluster.profile: singleNode
+    spark.master: local[*]
+
+  custom_tags:
+    ResourceClass: SingleNode
+  
+
+Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/wal-corrupted-test/default/files...
+Deploying resources...
+Updating deployment state...
+Deployment complete!
+=== Final state (should have recovered entries) ===
+{
+  "serial": [SERIAL],
+  "state_keys": [
+    "resources.jobs.another_valid",
+    "resources.jobs.valid_job"
+  ]
+}
+=== WAL after successful deploy ===
+WAL deleted (expected)
@@ -0,0 +1,35 @@
+echo "=== Creating state file with serial 5 ==="
+mkdir -p .databricks/bundle/default
+cat > .databricks/bundle/default/resources.json << 'EOF'
+{
+  "state_version": 1,
+  "cli_version": "0.0.0",
+  "lineage": "test-lineage-123",
+  "serial": 5,
+  "state": {}
+}
+EOF
+
+echo "=== Creating WAL with corrupted entry ==="
+cat > .databricks/bundle/default/resources.json.wal << 'EOF'
+{"lineage":"test-lineage-123","serial":6}
+{"k":"resources.jobs.valid_job","v":{"__id__":"1111","state":{"name":"valid-job"}}}
+not valid json - this line should be skipped
+{"k":"resources.jobs.another_valid","v":{"__id__":"2222","state":{"name":"another-valid"}}}
+EOF
+
+echo "=== WAL content ==="
+cat .databricks/bundle/default/resources.json.wal
+
+echo "=== Deploy (should recover valid entries, skip corrupted) ==="
+trace $CLI bundle deploy 2>&1 | python3 sort_warnings.py
+
+echo "=== Final state (should have recovered entries) ==="
+cat .databricks/bundle/default/resources.json | jq -S '{serial: .serial, state_keys: (.state | keys | sort)}'
+
+echo "=== WAL after successful deploy ==="
+if [ -f ".databricks/bundle/default/resources.json.wal" ]; then
+    echo "WAL exists (unexpected)"
+else
+    echo "WAL deleted (expected)"
+fi
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+"""Sort warning blocks in CLI output to make test output deterministic.
+
+Warning blocks look like:
+Warning: Single node cluster is not correctly configured
+  at resources.jobs.XXX.tasks[0].new_cluster
+  in databricks.yml:NN:NN
+
+num_workers should be 0 only for single-node clusters...
+  spark_conf:
+    ...
+  custom_tags:
+    ...
+
+This script groups consecutive warning blocks, sorts them by job name, and outputs.
+"""
+
+import re
+import sys
+
+
+def main():
+    content = sys.stdin.read()
+    lines = content.split("\n")
+
+    result = []
+    i = 0
+
+    while i < len(lines):
+        line = lines[i]
+
+        # Check if this is the start of a warning block
+        if line.startswith("Warning:"):
+            # Collect all consecutive warning blocks
+            warnings = []
+            while i < len(lines) and (
+                lines[i].startswith("Warning:")
+                or (
+                    warnings
+                    and not lines[i].startswith("Uploading")
+                    and not lines[i].startswith("Deploying")
+                    and not lines[i].startswith(">>>")
+                    and not lines[i].startswith("===")
+                )
+            ):
+                # Collect one complete warning block
+                block = []
+                if lines[i].startswith("Warning:"):
+                    block.append(lines[i])
+                    i += 1
+                    # Collect until next Warning or end marker
+                    while i < len(lines):
+                        if lines[i].startswith("Warning:"):
+                            break
+                        if lines[i].startswith("Uploading") or lines[i].startswith("Deploying"):
+                            break
+                        if lines[i].startswith(">>>") or lines[i].startswith("==="):
+                            break
+                        block.append(lines[i])
+                        i += 1
+                    warnings.append(block)
+                else:
+                    i += 1
+
+            # Sort warnings by the job name in "at resources.jobs.XXX"
+            def get_sort_key(block):
+                for line in block:
+                    match = re.search(r"at resources\.jobs\.(\w+)", line)
+                    if match:
+                        return match.group(1)
+                return ""
+
+            warnings.sort(key=get_sort_key)
+
+            # Output sorted warnings
+            for block in warnings:
+                for line in block:
+                    result.append(line)
+        else:
+            result.append(line)
+            i += 1
+
+    print("\n".join(result), end="")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1 @@
+print("test")
@@ -0,0 +1,13 @@
+# WAL with corrupted entry - valid entries should be recovered, corrupted skipped.
+
+[[Server]]
+Pattern = "POST /api/2.2/jobs/reset"
+Response.Body = '{}'
+
+[[Server]]
+Pattern = "GET /api/2.2/jobs/get?job_id=1111"
+Response.Body = '{"job_id": 1111, "settings": {"name": "valid-job"}}'
+
+[[Server]]
+Pattern = "GET /api/2.2/jobs/get?job_id=2222"
+Response.Body = '{"job_id": 2222, "settings": {"name": "another-valid"}}'
@@ -0,0 +1,15 @@
+bundle:
+  name: wal-crash-test
+
+resources:
+  jobs:
+    job_a:
+      name: "test-job-a"
+      tasks:
+        - task_key: "task-a"
+          spark_python_task:
+            python_file: ./test.py
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
+            num_workers: 0
@@ -0,0 +1,38 @@
+=== Creating state directory ===
+=== Creating WAL file (simulating crash after job create) ===
+=== WAL content before deploy ===
+{"lineage":"test-lineage-123","serial": [SERIAL]}
+{"k":"resources.jobs.job_a","v":{"__id__": "[ID]","state":{"name":"test-job-a"}}}
+=== Deploy (should recover from WAL) ===
+
+>>> [CLI] bundle deploy
+Warning: Single node cluster is not correctly configured
+  at resources.jobs.job_a.tasks[0].new_cluster
+  in databricks.yml:13:13
+
+num_workers should be 0 only for single-node clusters. To create a
+valid single node cluster please ensure that the following properties
+are correctly set in the cluster specification:
+
+  spark_conf:
+    spark.databricks.cluster.profile: singleNode
+    spark.master: local[*]
+
+  custom_tags:
+    ResourceClass: SingleNode
+  
+
+Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/wal-crash-test/default/files...
+Deploying resources...
+Updating deployment state...
+Deployment complete!
+=== State file after recovery ===
+{
+  "lineage": "test-lineage-123",
+  "serial": [SERIAL],
+  "state_keys": [
+    "resources.jobs.job_a"
+  ]
+}
+=== WAL file after successful deploy ===
+WAL file deleted (expected)