databricks
diff --git a/‎acceptance/pipelines/e2e/output.txt‎
Lines changed: 30 additions & 23 deletions b/‎acceptance/pipelines/e2e/output.txt‎
Lines changed: 30 additions & 23 deletions
diff --git a/‎…tput/my_project/.vscode/__builtins__.pyi‎ ‎…akeflow_project/.vscode/__builtins__.pyi‎acceptance/pipelines/e2e/output/my_project/.vscode/__builtins__.pyi renamed to acceptance/pipelines/e2e/output/lakeflow_project/.vscode/__builtins__.pyi b/‎…tput/my_project/.vscode/__builtins__.pyi‎ ‎…akeflow_project/.vscode/__builtins__.pyi‎acceptance/pipelines/e2e/output/my_project/.vscode/__builtins__.pyi renamed to acceptance/pipelines/e2e/output/lakeflow_project/.vscode/__builtins__.pyi
diff --git a/‎…utput/my_project/.vscode/extensions.json‎ ‎…lakeflow_project/.vscode/extensions.json‎acceptance/pipelines/e2e/output/my_project/.vscode/extensions.json renamed to acceptance/pipelines/e2e/output/lakeflow_project/.vscode/extensions.json
Lines changed: 2 additions & 2 deletions b/‎…utput/my_project/.vscode/extensions.json‎ ‎…lakeflow_project/.vscode/extensions.json‎acceptance/pipelines/e2e/output/my_project/.vscode/extensions.json renamed to acceptance/pipelines/e2e/output/lakeflow_project/.vscode/extensions.json
Lines changed: 2 additions & 2 deletions
diff --git a/‎acceptance/pipelines/e2e/output/lakeflow_project/.vscode/settings.json‎
Lines changed: 39 additions & 0 deletions b/‎acceptance/pipelines/e2e/output/lakeflow_project/.vscode/settings.json‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎acceptance/pipelines/e2e/output/lakeflow_project/README.md‎
Lines changed: 54 additions & 0 deletions b/‎acceptance/pipelines/e2e/output/lakeflow_project/README.md‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎…r-cases/output/my_project/databricks.yml‎ ‎…e/output/lakeflow_project/databricks.yml‎acceptance/pipelines/init/error-cases/output/my_project/databricks.yml renamed to acceptance/pipelines/e2e/output/lakeflow_project/databricks.yml
Lines changed: 8 additions & 12 deletions b/‎…r-cases/output/my_project/databricks.yml‎ ‎…e/output/lakeflow_project/databricks.yml‎acceptance/pipelines/init/error-cases/output/my_project/databricks.yml renamed to acceptance/pipelines/e2e/output/lakeflow_project/databricks.yml
Lines changed: 8 additions & 12 deletions
diff --git a/‎…ines/e2e/output/my_project/out.gitignore‎ ‎…2e/output/lakeflow_project/out.gitignore‎acceptance/pipelines/e2e/output/my_project/out.gitignore renamed to acceptance/pipelines/e2e/output/lakeflow_project/out.gitignore
Lines changed: 2 additions & 0 deletions b/‎…ines/e2e/output/my_project/out.gitignore‎ ‎…2e/output/lakeflow_project/out.gitignore‎acceptance/pipelines/e2e/output/my_project/out.gitignore renamed to acceptance/pipelines/e2e/output/lakeflow_project/out.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎acceptance/pipelines/e2e/output/lakeflow_project/pyproject.toml‎
Lines changed: 34 additions & 0 deletions b/‎acceptance/pipelines/e2e/output/lakeflow_project/pyproject.toml‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎acceptance/pipelines/e2e/output/lakeflow_project/resources/lakeflow_project_etl.pipeline.yml‎
Lines changed: 21 additions & 0 deletions b/‎acceptance/pipelines/e2e/output/lakeflow_project/resources/lakeflow_project_etl.pipeline.yml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎acceptance/pipelines/e2e/output/lakeflow_project/resources/lakeflow_project_etl_2.pipeline.yml‎
Lines changed: 4 additions & 0 deletions b/‎acceptance/pipelines/e2e/output/lakeflow_project/resources/lakeflow_project_etl_2.pipeline.yml‎
Lines changed: 4 additions & 0 deletions
@@ -2,21 +2,26 @@
 === E2E Test: Complete pipeline lifecycle (init, deploy, run, stop, destroy)
 === Initialize pipeline project
 >>> [PIPELINES] init --output-dir output
+Welcome to the template for Lakeflow Declarative Pipelines!
 
-Welcome to the template for pipelines!
+Please answer the below to tailor your project to your preferences.
+You can always change your mind and change your configuration in the databricks.yml file later.
 
+Note that [DATABRICKS_URL] is used for initialization
+(see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change your profile).
 
-Your new project has been created in the 'my_project' directory!
+✨ Your new project has been created in the 'lakeflow_project' directory!
 
-Refer to the README.md file for "getting started" instructions!
+Please refer to the README.md file for "getting started" instructions.
 
 === Deploy pipeline
 >>> [PIPELINES] deploy
-Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/my_project/dev/files...
+Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/lakeflow_project/dev/files...
 Deploying resources...
 Updating deployment state...
 Deployment complete!
-View your pipeline my_project_pipeline here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]
+View your job sample_job here: [DATABRICKS_URL]/jobs/[NUMID]?o=[NUMID]
+View your pipeline lakeflow_project_etl here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]
 
 === Run pipeline
 >>> [PIPELINES] run
@@ -31,31 +36,32 @@ Pipeline configurations for this update:
 
 === Edit project by creating and running a new second pipeline
 >>> [PIPELINES] deploy
-Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/my_project/dev/files...
+Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/lakeflow_project/dev/files...
 Deploying resources...
 Updating deployment state...
 Deployment complete!
-View your pipeline my_project_pipeline here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]
-View your pipeline my_project_pipeline_2 here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]
+View your job sample_job here: [DATABRICKS_URL]/jobs/[NUMID]?o=[NUMID]
+View your pipeline lakeflow_project_etl here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]
+View your pipeline lakeflow_project_etl_2 here: [DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]
 
 === Assert the second pipeline is created
 >>> [CLI] pipelines get [UUID]
 {
   "creator_user_name":"[USERNAME]",
   "last_modified":[UNIX_TIME_MILLIS],
-  "name":"[dev [USERNAME]] my_project_pipeline_2",
+  "name":"[dev [USERNAME]] lakeflow_project_etl_2",
   "pipeline_id":"[UUID]",
   "run_as_user_name":"[USERNAME]",
   "spec": {
     "channel":"CURRENT",
     "deployment": {
       "kind":"BUNDLE",
-      "metadata_file_path":"/Workspace/Users/[USERNAME]/.bundle/my_project/dev/state/metadata.json"
+      "metadata_file_path":"/Workspace/Users/[USERNAME]/.bundle/lakeflow_project/dev/state/metadata.json"
     },
     "development":true,
     "edition":"ADVANCED",
     "id":"[UUID]",
-    "name":"[dev [USERNAME]] my_project_pipeline_2",
+    "name":"[dev [USERNAME]] lakeflow_project_etl_2",
     "storage":"dbfs:/pipelines/[UUID]",
     "tags": {
       "dev":"[USERNAME]"
@@ -64,7 +70,7 @@ View your pipeline my_project_pipeline_2 here: [DATABRICKS_URL]/pipelines/[UUID]
   "state":"IDLE"
 }
 
->>> [PIPELINES] run my_project_pipeline_2
+>>> [PIPELINES] run lakeflow_project_etl_2
 Update URL: [DATABRICKS_URL]/#joblist/pipelines/[UUID]/updates/[UUID]
 
 Update ID: [UUID]
@@ -75,26 +81,27 @@ Pipeline configurations for this update:
 • All tables are refreshed
 
 === Stop both pipelines before destroy
->>> [PIPELINES] stop my_project_pipeline
-Stopping my_project_pipeline...
-my_project_pipeline has been stopped.
+>>> [PIPELINES] stop lakeflow_project_etl
+Stopping lakeflow_project_etl...
+lakeflow_project_etl has been stopped.
 
->>> [PIPELINES] stop my_project_pipeline_2
-Stopping my_project_pipeline_2...
-my_project_pipeline_2 has been stopped.
+>>> [PIPELINES] stop lakeflow_project_etl_2
+Stopping lakeflow_project_etl_2...
+lakeflow_project_etl_2 has been stopped.
 
 === Destroy project
 >>> [PIPELINES] destroy --auto-approve
 The following resources will be deleted:
-  delete resources.pipelines.my_project_pipeline
-  delete resources.pipelines.my_project_pipeline_2
+  delete resources.jobs.sample_job
+  delete resources.pipelines.lakeflow_project_etl
+  delete resources.pipelines.lakeflow_project_etl_2
 
 This action will result in the deletion of the following Lakeflow Declarative Pipelines along with the
 Streaming Tables (STs) and Materialized Views (MVs) managed by them:
-  delete resources.pipelines.my_project_pipeline
-  delete resources.pipelines.my_project_pipeline_2
+  delete resources.pipelines.lakeflow_project_etl
+  delete resources.pipelines.lakeflow_project_etl_2
 
-All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/my_project/dev
+All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/lakeflow_project/dev
 
 Deleting files...
 Destroy complete!
@@ -1,7 +1,7 @@
 {
     "recommendations": [
         "databricks.databricks",
-        "ms-python.vscode-pylance",
-        "redhat.vscode-yaml"
+        "redhat.vscode-yaml",
+        "charliermarsh.ruff"
     ]
 }
@@ -0,0 +1,39 @@
+{
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+        "dist": true,
+    },
+    "files.associations": {
+        "**/.gitkeep": "markdown"
+    },
+
+    // Pylance settings (VS Code)
+    // Set typeCheckingMode to "basic" to enable type checking!
+    "python.analysis.typeCheckingMode": "off",
+    "python.analysis.extraPaths": ["src", "lib", "resources"],
+    "python.analysis.diagnosticMode": "workspace",
+    "python.analysis.stubPath": ".vscode",
+
+    // Pyright settings (Cursor)
+    // Set typeCheckingMode to "basic" to enable type checking!
+    "cursorpyright.analysis.typeCheckingMode": "off",
+    "cursorpyright.analysis.extraPaths": ["src", "lib", "resources"],
+    "cursorpyright.analysis.diagnosticMode": "workspace",
+    "cursorpyright.analysis.stubPath": ".vscode",
+
+    // General Python settings
+    "python.defaultInterpreterPath": "./.venv/bin/python",
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "[python]": {
+        "editor.defaultFormatter": "charliermarsh.ruff",
+        "editor.formatOnSave": true,
+    },
+}
@@ -0,0 +1,54 @@
+# lakeflow_project
+
+The 'lakeflow_project' project was generated by using the lakeflow-pipelines template.
+
+* `src/`: Python source code for this project.
+* `resources/`:  Resource configurations (jobs, pipelines, etc.)
+
+## Getting started
+
+Choose how you want to work on this project:
+
+(a) Directly in your Databricks workspace, see
+    https://docs.databricks.com/dev-tools/bundles/workspace.
+
+(b) Locally with an IDE like Cursor or VS Code, see
+    https://docs.databricks.com/dev-tools/vscode-ext.html.
+
+(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+# Using this project using the CLI
+
+The Databricks workspace and IDE extensions provide a graphical interface for working
+with this project. It's also possible to interact with it directly using the CLI:
+
+1. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+2. To deploy a development copy of this project, type:
+    ```
+    $ databricks bundle deploy --target dev
+    ```
+    (Note that "dev" is the default target, so the `--target` parameter
+    is optional here.)
+
+    This deploys everything that's defined for this project.
+    For example, the default template would deploy a pipeline called
+    `[dev yourname] lakeflow_project_etl` to your workspace.
+    You can find that resource by opening your workpace and clicking on **Jobs & Pipelines**.
+
+3. Similarly, to deploy a production copy, type:
+   ```
+   $ databricks bundle deploy --target prod
+   ```
+   Note the default template has a includes a job that runs the pipeline every day
+   (defined in resources/sample_job.job.yml). The schedule
+   is paused when deploying in development mode (see
+   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
+
+4. To run a job or pipeline, use the "run" command:
+   ```
+   $ databricks bundle run
+   ```
@@ -1,46 +1,42 @@
-# This is a Databricks pipelines definition for my_project.
+# This is a Databricks asset bundle definition for lakeflow_project.
 # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
 bundle:
-  name: my_project
+  name: lakeflow_project
   uuid: [UUID]
 
 include:
   - resources/*.yml
   - resources/*/*.yml
-  - ./*.yml
 
 # Variable declarations. These variables are assigned in the dev/prod targets below.
 variables:
   catalog:
     description: The catalog to use
   schema:
     description: The schema to use
-  notifications:
-    description: The email addresses to use for failure notifications
 
 targets:
   dev:
     # The default target uses 'mode: development' to create a development copy.
-    # - Deployed pipelines get prefixed with '[dev my_user_name]'
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
     mode: development
     default: true
     workspace:
       host: [DATABRICKS_URL]
     variables:
       catalog: hive_metastore
       schema: ${workspace.current_user.short_name}
-      notifications: []
-
   prod:
     mode: production
     workspace:
       host: [DATABRICKS_URL]
       # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
       root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
+    variables:
+      catalog: hive_metastore
+      schema: prod
     permissions:
       - user_name: [USERNAME]
         level: CAN_MANAGE
-    variables:
-      catalog: hive_metastore
-      schema: default
-      notifications: [[USERNAME]]
@@ -4,5 +4,7 @@ dist/
 __pycache__/
 *.egg-info
 .venv/
+scratch/**
+!scratch/README.md
 **/explorations/**
 **/!explorations/README.md
@@ -0,0 +1,34 @@
+[project]
+name = "lakeflow_project"
+version = "0.0.1"
+authors = [{ name = "[USERNAME]" }]
+requires-python = ">=3.10,<3.13"
+dependencies = [
+    # Any dependencies for jobs and pipelines in this project can be added here
+    # See also https://docs.databricks.com/dev-tools/bundles/library-dependencies
+    #
+    # LIMITATION: for pipelines, dependencies are cached during development;
+    # add dependencies to the 'environment' section of your pipeline.yml file instead
+]
+
+[dependency-groups]
+dev = [
+    "pytest",
+    "ruff",
+    "databricks-dlt",
+    "databricks-connect>=15.4,<15.5",
+    "ipykernel",
+]
+
+[project.scripts]
+main = "lakeflow_project.main:main"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src"]
+
+[tool.ruff]
+line-length = 120
@@ -0,0 +1,21 @@
+# The main pipeline for lakeflow_project
+
+resources:
+  pipelines:
+    lakeflow_project_etl:
+      name: lakeflow_project_etl
+      # Catalog is required for serverless compute
+      catalog: main
+      schema: ${var.schema}
+      serverless: true
+      root_path: "../src/lakeflow_project_etl"
+
+      libraries:
+        - glob:
+            include: ../src/lakeflow_project_etl/transformations/**
+
+      environment:
+        dependencies:
+          # We include every dependency defined by pyproject.toml by defining an editable environment
+          # that points to the folder where pyproject.toml is deployed.
+          - --editable ${workspace.file_path}
@@ -0,0 +1,4 @@
+resources:
+  pipelines:
+    lakeflow_project_etl_2:
+      name: lakeflow_project_etl_2
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"recommendations": [`
`3`	`3`	`"databricks.databricks",`
`4`		`- "ms-python.vscode-pylance",`
`5`		`- "redhat.vscode-yaml"`
	`4`	`+ "redhat.vscode-yaml",`
	`5`	`+ "charliermarsh.ruff"`
`6`	`6`	`]`
`7`	`7`	`}`