From ae21179cad6ca49049793247dc4696f4afa923b0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 1 Mar 2026 23:55:48 +0000
Subject: [PATCH 01/26] Replace community.general.npm with
 ansible.builtin.command

Removes the dependency on the community.general collection which
was producing a warning about not supporting the installed Ansible
version. Uses npm install --prefix directly instead.

https://claude.ai/code/session_01Vm5EEsQ5uFKoni6qWEDQd8
---
 ansible/tasks/neovim.yml | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/ansible/tasks/neovim.yml b/ansible/tasks/neovim.yml
index ce22212..ab7b438 100644
--- a/ansible/tasks/neovim.yml
+++ b/ansible/tasks/neovim.yml
@@ -31,11 +31,14 @@
     - { src: "nvim/lua", dest: ".config/nvim/lua" }
     - { src: "nvim/ftplugin", dest: ".config/nvim/ftplugin" }
 
-- name: Install jsonlint node.js package.
-  community.general.npm:
-    name: jsonlint
-    path: "{{ venv_path }}/nvim/bin"
-    global: false
+- name: Install jsonlint node.js package
+  ansible.builtin.command:
+    argv:
+      - npm
+      - install
+      - --prefix
+      - "{{ venv_path }}/nvim"
+      - jsonlint
 
 - name: Run Lazy sync for Neovim plugins
   ansible.builtin.command:

From 9b88edd8d5b78ce720f0b693764bd6117657f1e3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 2 Mar 2026 03:17:00 +0000
Subject: [PATCH 02/26] Add CI workflow to validate Ansible playbook on Ubuntu

Runs the full playbook (minus macOS-only launch agents) on
ubuntu-latest: syntax check then an actual apply. Installs
Neovim, Python 3.11, and virtualenv so the neovim setup tasks
(pip venv, Lazy sync, treesitter) can run too.

https://claude.ai/code/session_01Vm5EEsQ5uFKoni6qWEDQd8
---
 .github/workflows/ansible.yml | 56 +++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 .github/workflows/ansible.yml

diff --git a/.github/workflows/ansible.yml b/.github/workflows/ansible.yml
new file mode 100644
index 0000000..06e0c14
--- /dev/null
+++ b/.github/workflows/ansible.yml
@@ -0,0 +1,56 @@
+name: Ansible Playbook
+
+on: [push]
+
+env:
+  TERM: xterm
+
+jobs:
+  playbook:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Symlink checkout to ~/.dotfiles
+        run: ln -s "$GITHUB_WORKSPACE" "$HOME/.dotfiles"
+
+      - name: Create stubs for files excluded from version control
+        run: |
+          touch "$HOME/.dotfiles/.mise.toml"
+          mkdir -p "$HOME/.dotfiles/ssh"
+          touch "$HOME/.dotfiles/ssh/config"
+          mkdir -p "$HOME/.config/fish/completions"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install Neovim
+        run: |
+          curl -LO https://github.com/neovim/neovim/releases/latest/download/nvim-linux-x86_64.tar.gz
+          sudo tar -C /opt -xzf nvim-linux-x86_64.tar.gz
+          echo "/opt/nvim-linux-x86_64/bin" >> "$GITHUB_PATH"
+
+      - name: Install virtualenv
+        run: pip install virtualenv
+
+      - name: Check playbook syntax
+        run: >
+          uv run ansible-playbook
+          -i "localhost,"
+          -c local
+          "$HOME/.dotfiles/ansible/dotfiles.yml"
+          --syntax-check
+
+      - name: Run playbook (skip macOS-only tasks)
+        run: >
+          uv run ansible-playbook
+          -i "localhost,"
+          -c local
+          "$HOME/.dotfiles/ansible/dotfiles.yml"
+          --skip-tags agents

From a7f5784a7f98da5aaf90ef5b1d7f8cb9d52fc437 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 05:46:52 +0000
Subject: [PATCH 03/26] Add dbt keybindings and SQL treesitter support

Add <leader>dr (dbt run), <leader>dc (dbt compile), and <leader>dt
(dbt test) keybindings that format the current SQL file with sqlfmt
via conform, save, then send the dbt command to toggleterm. Also add
sql to treesitter ensure_installed for better SQL highlighting.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua   | 47 +++++++++++++++++++++++++++++++++++
 nvim/lua/plugins/language.lua |  1 +
 2 files changed, 48 insertions(+)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 9880f21..6ccc08a 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -57,3 +57,50 @@ vim.keymap.set("n", "<leader>wi", function()
     },
   })
 end, { noremap = true, silent = true, desc = "[W]iki [I]nsert Link" })
+
+-- dbt: extract model name from current file path (e.g., models/staging/stg_orders.sql -> stg_orders)
+local function dbt_model_name()
+  local filepath = vim.fn.expand("%:t:r")
+  if filepath == "" then
+    vim.notify("No file open", vim.log.levels.WARN)
+    return nil
+  end
+  return filepath
+end
+
+-- dbt: format the current SQL file, then send a dbt command to the terminal
+local function dbt_cmd(cmd_template)
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+
+  -- Format with conform (sqlfmt), then save
+  require("conform").format({ async = false, lsp_fallback = true })
+  vim.cmd("write")
+
+  -- Build the command
+  local cmd = string.format(cmd_template, model)
+
+  -- Send to toggleterm (terminal 1)
+  local term = require("toggleterm.terminal").get(1)
+  if not term then
+    term = require("toggleterm.terminal").Terminal:new({ id = 1 })
+  end
+  if not term:is_open() then
+    term:toggle()
+  end
+  term:send(cmd)
+end
+
+vim.keymap.set("n", "<leader>dr", function()
+  dbt_cmd("dbt run -s %s")
+end, { desc = "[D]bt [R]un current model" })
+
+vim.keymap.set("n", "<leader>dc", function()
+  dbt_cmd("dbt compile -s %s")
+end, { desc = "[D]bt [C]ompile current model" })
+
+vim.keymap.set("n", "<leader>dt", function()
+  dbt_cmd("dbt test -s %s")
+end, { desc = "[D]bt [T]est current model" })
diff --git a/nvim/lua/plugins/language.lua b/nvim/lua/plugins/language.lua
index 946c320..3546b2d 100644
--- a/nvim/lua/plugins/language.lua
+++ b/nvim/lua/plugins/language.lua
@@ -33,6 +33,7 @@ return {
         "python",
         "query",
         "regex",
+        "sql",
         "vim",
         "yaml",
       },

From ab536f3abc3325af27dae636e3d82b085eeabf3f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 06:01:16 +0000
Subject: [PATCH 04/26] Add more dbt keybindings: build, show, run+downstream

- <leader>dR: dbt run -s model+ (model and all downstream dependents)
- <leader>db: dbt build (run + test in DAG order)
- <leader>ds: dbt show (preview query results without materializing)

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 6ccc08a..e67fee7 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -97,6 +97,14 @@ vim.keymap.set("n", "<leader>dr", function()
   dbt_cmd("dbt run -s %s")
 end, { desc = "[D]bt [R]un current model" })
 
+vim.keymap.set("n", "<leader>dR", function()
+  dbt_cmd("dbt run -s %s+")
+end, { desc = "[D]bt [R]un model + downstream" })
+
+vim.keymap.set("n", "<leader>db", function()
+  dbt_cmd("dbt build -s %s")
+end, { desc = "[D]bt [B]uild current model (run + test)" })
+
 vim.keymap.set("n", "<leader>dc", function()
   dbt_cmd("dbt compile -s %s")
 end, { desc = "[D]bt [C]ompile current model" })
@@ -104,3 +112,7 @@ end, { desc = "[D]bt [C]ompile current model" })
 vim.keymap.set("n", "<leader>dt", function()
   dbt_cmd("dbt test -s %s")
 end, { desc = "[D]bt [T]est current model" })
+
+vim.keymap.set("n", "<leader>ds", function()
+  dbt_cmd("dbt show -s %s")
+end, { desc = "[D]bt [S]how preview results" })

From b6171e346ee5d357223f9251d6b7f42bbc47836c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 20:49:44 +0000
Subject: [PATCH 05/26] Add dbt navigation, fuzzy model picker, and model
 search

- <leader>dg: jump to ref() or source() under cursor
- <leader>df: fzf model picker with ctrl-r/b/t to run/build/test
- <leader>do: open compiled SQL in readonly vsplit
- <leader>d/: grep across all models (find columns, CTEs, etc.)

Inspired by fzf-dbt CLI tool, adapted for neovim with fzf-lua.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 160 ++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index e67fee7..b712a3e 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -93,6 +93,166 @@ local function dbt_cmd(cmd_template)
   term:send(cmd)
 end
 
+-- dbt: find the project root (directory containing dbt_project.yml)
+local function dbt_project_root()
+  local path = vim.fn.findfile("dbt_project.yml", ".;")
+  if path == "" then
+    return nil
+  end
+  return vim.fn.fnamemodify(path, ":p:h")
+end
+
+-- dbt: jump to model under cursor from {{ ref('model_name') }} or {{ source('src', 'table') }}
+vim.keymap.set("n", "<leader>dg", function()
+  local line = vim.api.nvim_get_current_line()
+  local col = vim.api.nvim_win_get_cursor(0)[2] + 1
+
+  -- Try to find ref('model') or ref("model") around cursor
+  local ref_model = nil
+  for start_pos, name, end_pos in line:gmatch("()ref%(['\"]([^'\"]+)['\"]%)()" ) do
+    if col >= start_pos and col <= end_pos then
+      ref_model = name
+      break
+    end
+  end
+
+  -- Try source('source_name', 'table_name') if no ref found
+  local source_name, source_table = nil, nil
+  if not ref_model then
+    for start_pos, src, tbl, end_pos in line:gmatch("()source%(['\"]([^'\"]+)['\"]%s*,%s*['\"]([^'\"]+)['\"]%)()" ) do
+      if col >= start_pos and col <= end_pos then
+        source_name, source_table = src, tbl
+        break
+      end
+    end
+  end
+
+  if not ref_model and not source_table then
+    vim.notify("No ref() or source() under cursor", vim.log.levels.WARN)
+    return
+  end
+
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+
+  -- Search for the model file
+  local search_name = ref_model or source_table
+  local matches = vim.fn.globpath(root, "**/" .. search_name .. ".sql", false, true)
+  if #matches == 0 then
+    -- Also try .yml for source definitions
+    matches = vim.fn.globpath(root, "**/" .. search_name .. ".yml", false, true)
+  end
+
+  if #matches == 1 then
+    vim.cmd.edit(matches[1])
+  elseif #matches > 1 then
+    vim.ui.select(matches, { prompt = "Multiple matches:" }, function(choice)
+      if choice then
+        vim.cmd.edit(choice)
+      end
+    end)
+  else
+    vim.notify("No file found for: " .. search_name, vim.log.levels.WARN)
+  end
+end, { desc = "[D]bt [G]o to ref/source" })
+
+-- dbt: send a raw command string to toggleterm (used by fzf actions)
+local function dbt_cmd_raw(cmd)
+  local term = require("toggleterm.terminal").get(1)
+  if not term then
+    term = require("toggleterm.terminal").Terminal:new({ id = 1 })
+  end
+  if not term:is_open() then
+    term:toggle()
+  end
+  term:send(cmd)
+end
+
+-- dbt: fuzzy model picker — select a model then choose an action
+vim.keymap.set("n", "<leader>df", function()
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+
+  local fzf = require("fzf-lua")
+  fzf.files({
+    prompt = "dbt model  ",
+    cwd = root,
+    cmd = "fd -e sql . models",
+    actions = {
+      -- Default: open the file
+      ["default"] = fzf.actions.file_edit,
+      -- Ctrl-r: run the selected model
+      ["ctrl-r"] = function(selected)
+        if selected and #selected > 0 then
+          local name = selected[1]:match("([^/]+)%.sql$")
+          if name then
+            dbt_cmd_raw("dbt run -s " .. name)
+          end
+        end
+      end,
+      -- Ctrl-b: build the selected model
+      ["ctrl-b"] = function(selected)
+        if selected and #selected > 0 then
+          local name = selected[1]:match("([^/]+)%.sql$")
+          if name then
+            dbt_cmd_raw("dbt build -s " .. name)
+          end
+        end
+      end,
+      -- Ctrl-t: test the selected model
+      ["ctrl-t"] = function(selected)
+        if selected and #selected > 0 then
+          local name = selected[1]:match("([^/]+)%.sql$")
+          if name then
+            dbt_cmd_raw("dbt test -s " .. name)
+          end
+        end
+      end,
+    },
+    fzf_opts = {
+      ["--header"] = "enter=open | ctrl-r=run | ctrl-b=build | ctrl-t=test",
+      ["--multi"] = true,
+    },
+  })
+end, { desc = "[D]bt [F]ind model (fzf)" })
+
+-- dbt: open the compiled SQL for the current model in a split
+vim.keymap.set("n", "<leader>do", function()
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+  local compiled = vim.fn.globpath(root, "target/compiled/**/" .. model .. ".sql", false, true)
+  if #compiled == 0 then
+    vim.notify("No compiled SQL found — run dbt compile first", vim.log.levels.WARN)
+    return
+  end
+  vim.cmd("vsplit " .. compiled[1])
+  vim.bo.readonly = true
+  vim.bo.modifiable = false
+end, { desc = "[D]bt [O]pen compiled SQL" })
+
+-- dbt: grep across all models (search for column names, CTEs, etc.)
+vim.keymap.set("n", "<leader>d/", function()
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+  require("fzf-lua").grep({ prompt = "dbt grep  ", cwd = root .. "/models" })
+end, { desc = "[D]bt search models" })
+
 vim.keymap.set("n", "<leader>dr", function()
   dbt_cmd("dbt run -s %s")
 end, { desc = "[D]bt [R]un current model" })

From 93d5ca47ebdb3b3cb61885062059e7c6ae14acf2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 21:33:26 +0000
Subject: [PATCH 06/26] Add claude agent keymaps for dbt model analysis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<leader>da: quick analysis with sonnet — reviews the current model
and suggests improvements in non-interactive mode.

<leader>dA: deep analysis with sonnet thinking — interrogates the
duckdb database and cross-references with the current model to check
data quality, joins, types, and more.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index b712a3e..e7bede1 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -276,3 +276,35 @@ end, { desc = "[D]bt [T]est current model" })
 vim.keymap.set("n", "<leader>ds", function()
   dbt_cmd("dbt show -s %s")
 end, { desc = "[D]bt [S]how preview results" })
+
+-- dbt: run claude agent on current model (quick analysis with sonnet)
+vim.keymap.set("n", "<leader>da", function()
+  local filepath = vim.fn.expand("%:p")
+  if filepath == "" then
+    vim.notify("No file open", vim.log.levels.WARN)
+    return
+  end
+  local cmd = string.format(
+    'claude -p "Review this dbt model and add brief comments suggesting improvements, potential issues, or best-practice violations. Be concise." --model claude-sonnet-4-6 %s',
+    vim.fn.shellescape(filepath)
+  )
+  dbt_cmd_raw(cmd)
+end, { desc = "[D]bt [A]nalyse model (quick)" })
+
+-- dbt: run claude agent on current model (deep analysis with sonnet thinking, cross-reference DB)
+vim.keymap.set("n", "<leader>dA", function()
+  local filepath = vim.fn.expand("%:p")
+  if filepath == "" then
+    vim.notify("No file open", vim.log.levels.WARN)
+    return
+  end
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  local cmd = string.format(
+    'claude -p "You have access to a duckdb database. Interrogate the database to understand the schema and data, then cross-reference with this dbt model. Check for: data quality issues, join correctness, missing filters, column type mismatches, and potential improvements. Run queries to validate assumptions." --model claude-sonnet-4-6 --thinking %s',
+    vim.fn.shellescape(filepath)
+  )
+  dbt_cmd_raw(cmd)
+end, { desc = "[D]bt [A]nalyse model (deep, DB cross-ref)" })

From 45dcc020b5b80bce61e9f005714ec5ab9ce4bcc5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 21:36:25 +0000
Subject: [PATCH 07/26] Include compiled SQL and sample rows in deep dbt
 analysis

The <leader>dA keymap now compiles the model first, then passes
the compiled SQL and first 20 rows from dbt show as extra context
to the claude agent for more informed analysis.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index e7bede1..8bffc11 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -292,6 +292,7 @@ vim.keymap.set("n", "<leader>da", function()
 end, { desc = "[D]bt [A]nalyse model (quick)" })
 
 -- dbt: run claude agent on current model (deep analysis with sonnet thinking, cross-reference DB)
+-- Compiles the model first, then passes compiled SQL + first 20 rows as extra context
 vim.keymap.set("n", "<leader>dA", function()
   local filepath = vim.fn.expand("%:p")
   if filepath == "" then
@@ -302,8 +303,28 @@ vim.keymap.set("n", "<leader>dA", function()
   if not model then
     return
   end
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+
+  -- Build a shell script that:
+  -- 1. Compiles the model and captures compiled SQL
+  -- 2. Runs dbt show --limit 20 to get sample rows
+  -- 3. Feeds everything to claude
   local cmd = string.format(
-    'claude -p "You have access to a duckdb database. Interrogate the database to understand the schema and data, then cross-reference with this dbt model. Check for: data quality issues, join correctness, missing filters, column type mismatches, and potential improvements. Run queries to validate assumptions." --model claude-sonnet-4-6 --thinking %s',
+    [[dbt compile -s %s --quiet && compiled_sql=$(cat $(find %s/target/compiled -name '%s.sql' | head -1) 2>/dev/null) && sample_rows=$(dbt show -s %s --limit 20 2>/dev/null) && claude -p "You have access to a duckdb database. Interrogate the database to understand the schema and data, then cross-reference with this dbt model. Check for: data quality issues, join correctness, missing filters, column type mismatches, and potential improvements. Run queries to validate assumptions.
+
+Here is the compiled SQL:
+${compiled_sql}
+
+Here are the first 20 rows returned by this model:
+${sample_rows}" --model claude-sonnet-4-6 --thinking %s]],
+    model,
+    vim.fn.shellescape(root),
+    model,
+    model,
     vim.fn.shellescape(filepath)
   )
   dbt_cmd_raw(cmd)

From 70af540b05fb2a3d07fde78dca38680611251fa8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 21:37:27 +0000
Subject: [PATCH 08/26] Move dbt agent prompts to separate markdown template
 files

Prompts now live in nvim/prompts/ as markdown files with {{var}}
template placeholders. The quick analysis prompt is loaded and
substituted in Lua; the deep analysis prompt uses sed at runtime
to inject compiled SQL and sample rows before passing to claude.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua        | 42 +++++++++++++++++++++---------
 nvim/prompts/dbt_deep_analysis.md  |  7 +++++
 nvim/prompts/dbt_quick_analysis.md |  1 +
 3 files changed, 38 insertions(+), 12 deletions(-)
 create mode 100644 nvim/prompts/dbt_deep_analysis.md
 create mode 100644 nvim/prompts/dbt_quick_analysis.md

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 8bffc11..6d12068 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -277,6 +277,22 @@ vim.keymap.set("n", "<leader>ds", function()
   dbt_cmd("dbt show -s %s")
 end, { desc = "[D]bt [S]how preview results" })
 
+-- dbt: read a prompt template from nvim/prompts/ and substitute {{key}} placeholders
+local function dbt_load_prompt(name, vars)
+  local prompt_dir = vim.fn.stdpath("config") .. "/prompts/"
+  local path = prompt_dir .. name .. ".md"
+  local lines = vim.fn.readfile(path)
+  if #lines == 0 then
+    vim.notify("Prompt not found: " .. path, vim.log.levels.ERROR)
+    return nil
+  end
+  local prompt = table.concat(lines, "\n")
+  for key, value in pairs(vars or {}) do
+    prompt = prompt:gsub("{{" .. key .. "}}", value)
+  end
+  return prompt
+end
+
 -- dbt: run claude agent on current model (quick analysis with sonnet)
 vim.keymap.set("n", "<leader>da", function()
   local filepath = vim.fn.expand("%:p")
@@ -284,8 +300,13 @@ vim.keymap.set("n", "<leader>da", function()
     vim.notify("No file open", vim.log.levels.WARN)
     return
   end
+  local prompt = dbt_load_prompt("dbt_quick_analysis", {})
+  if not prompt then
+    return
+  end
   local cmd = string.format(
-    'claude -p "Review this dbt model and add brief comments suggesting improvements, potential issues, or best-practice violations. Be concise." --model claude-sonnet-4-6 %s',
+    "claude -p %s --model claude-sonnet-4-6 %s",
+    vim.fn.shellescape(prompt),
     vim.fn.shellescape(filepath)
   )
   dbt_cmd_raw(cmd)
@@ -309,22 +330,19 @@ vim.keymap.set("n", "<leader>dA", function()
     return
   end
 
-  -- Build a shell script that:
-  -- 1. Compiles the model and captures compiled SQL
-  -- 2. Runs dbt show --limit 20 to get sample rows
-  -- 3. Feeds everything to claude
+  -- Compile + gather sample rows, then template the prompt and pass to claude
+  local prompt_path = vim.fn.stdpath("config") .. "/prompts/dbt_deep_analysis.md"
   local cmd = string.format(
-    [[dbt compile -s %s --quiet && compiled_sql=$(cat $(find %s/target/compiled -name '%s.sql' | head -1) 2>/dev/null) && sample_rows=$(dbt show -s %s --limit 20 2>/dev/null) && claude -p "You have access to a duckdb database. Interrogate the database to understand the schema and data, then cross-reference with this dbt model. Check for: data quality issues, join correctness, missing filters, column type mismatches, and potential improvements. Run queries to validate assumptions.
-
-Here is the compiled SQL:
-${compiled_sql}
-
-Here are the first 20 rows returned by this model:
-${sample_rows}" --model claude-sonnet-4-6 --thinking %s]],
+    [[dbt compile -s %s --quiet ]]
+      .. [[&& compiled_sql=$(cat $(find %s/target/compiled -name '%s.sql' | head -1) 2>/dev/null) ]]
+      .. [[&& sample_rows=$(dbt show -s %s --limit 20 2>/dev/null) ]]
+      .. [[&& prompt=$(sed -e "s|{{compiled_sql}}|${compiled_sql}|g" -e "s|{{sample_rows}}|${sample_rows}|g" %s) ]]
+      .. [[&& claude -p "${prompt}" --model claude-sonnet-4-6 --thinking %s]],
     model,
     vim.fn.shellescape(root),
     model,
     model,
+    vim.fn.shellescape(prompt_path),
     vim.fn.shellescape(filepath)
   )
   dbt_cmd_raw(cmd)
diff --git a/nvim/prompts/dbt_deep_analysis.md b/nvim/prompts/dbt_deep_analysis.md
new file mode 100644
index 0000000..397ca3d
--- /dev/null
+++ b/nvim/prompts/dbt_deep_analysis.md
@@ -0,0 +1,7 @@
+You have access to a duckdb database. Interrogate the database to understand the schema and data, then cross-reference with this dbt model. Check for: data quality issues, join correctness, missing filters, column type mismatches, and potential improvements. Run queries to validate assumptions.
+
+Here is the compiled SQL:
+{{compiled_sql}}
+
+Here are the first 20 rows returned by this model:
+{{sample_rows}}
diff --git a/nvim/prompts/dbt_quick_analysis.md b/nvim/prompts/dbt_quick_analysis.md
new file mode 100644
index 0000000..ee1eeea
--- /dev/null
+++ b/nvim/prompts/dbt_quick_analysis.md
@@ -0,0 +1 @@
+Review this dbt model and add brief comments suggesting improvements, potential issues, or best-practice violations. Be concise.

From 25dc51ae30c0d19f0eb7a1bef7670ccbec55fc95 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 21:41:28 +0000
Subject: [PATCH 09/26] Write inline comments to buffer and prefix all dbt
 commands with uv run

- <leader>da now replaces the buffer with the annotated SQL (undo with :u)
- Updated prompt to allow brief explanations alongside suggestions
- All dbt commands (run, build, test, compile, show) now use uv run

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua        | 61 +++++++++++++++++++++---------
 nvim/prompts/dbt_quick_analysis.md |  5 ++-
 2 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 6d12068..78cf646 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -192,7 +192,7 @@ vim.keymap.set("n", "<leader>df", function()
         if selected and #selected > 0 then
           local name = selected[1]:match("([^/]+)%.sql$")
           if name then
-            dbt_cmd_raw("dbt run -s " .. name)
+            dbt_cmd_raw("uv run dbt run -s " .. name)
           end
         end
       end,
@@ -201,7 +201,7 @@ vim.keymap.set("n", "<leader>df", function()
         if selected and #selected > 0 then
           local name = selected[1]:match("([^/]+)%.sql$")
           if name then
-            dbt_cmd_raw("dbt build -s " .. name)
+            dbt_cmd_raw("uv run dbt build -s " .. name)
           end
         end
       end,
@@ -210,7 +210,7 @@ vim.keymap.set("n", "<leader>df", function()
         if selected and #selected > 0 then
           local name = selected[1]:match("([^/]+)%.sql$")
           if name then
-            dbt_cmd_raw("dbt test -s " .. name)
+            dbt_cmd_raw("uv run dbt test -s " .. name)
           end
         end
       end,
@@ -254,27 +254,27 @@ vim.keymap.set("n", "<leader>d/", function()
 end, { desc = "[D]bt search models" })
 
 vim.keymap.set("n", "<leader>dr", function()
-  dbt_cmd("dbt run -s %s")
+  dbt_cmd("uv run dbt run -s %s")
 end, { desc = "[D]bt [R]un current model" })
 
 vim.keymap.set("n", "<leader>dR", function()
-  dbt_cmd("dbt run -s %s+")
+  dbt_cmd("uv run dbt run -s %s+")
 end, { desc = "[D]bt [R]un model + downstream" })
 
 vim.keymap.set("n", "<leader>db", function()
-  dbt_cmd("dbt build -s %s")
+  dbt_cmd("uv run dbt build -s %s")
 end, { desc = "[D]bt [B]uild current model (run + test)" })
 
 vim.keymap.set("n", "<leader>dc", function()
-  dbt_cmd("dbt compile -s %s")
+  dbt_cmd("uv run dbt compile -s %s")
 end, { desc = "[D]bt [C]ompile current model" })
 
 vim.keymap.set("n", "<leader>dt", function()
-  dbt_cmd("dbt test -s %s")
+  dbt_cmd("uv run dbt test -s %s")
 end, { desc = "[D]bt [T]est current model" })
 
 vim.keymap.set("n", "<leader>ds", function()
-  dbt_cmd("dbt show -s %s")
+  dbt_cmd("uv run dbt show -s %s")
 end, { desc = "[D]bt [S]how preview results" })
 
 -- dbt: read a prompt template from nvim/prompts/ and substitute {{key}} placeholders
@@ -294,6 +294,7 @@ local function dbt_load_prompt(name, vars)
 end
 
 -- dbt: run claude agent on current model (quick analysis with sonnet)
+-- Replaces buffer contents with the file + inline SQL comments
 vim.keymap.set("n", "<leader>da", function()
   local filepath = vim.fn.expand("%:p")
   if filepath == "" then
@@ -304,12 +305,38 @@ vim.keymap.set("n", "<leader>da", function()
   if not prompt then
     return
   end
-  local cmd = string.format(
-    "claude -p %s --model claude-sonnet-4-6 %s",
-    vim.fn.shellescape(prompt),
-    vim.fn.shellescape(filepath)
-  )
-  dbt_cmd_raw(cmd)
+  local bufnr = vim.api.nvim_get_current_buf()
+
+  vim.notify("Running quick analysis...", vim.log.levels.INFO)
+
+  local cmd = { "claude", "-p", prompt, "--model", "claude-sonnet-4-6", filepath }
+  local output = {}
+  vim.fn.jobstart(cmd, {
+    stdout_buffered = true,
+    on_stdout = function(_, data)
+      if data then
+        output = data
+      end
+    end,
+    on_exit = function(_, exit_code)
+      vim.schedule(function()
+        if exit_code ~= 0 then
+          vim.notify("claude exited with code " .. exit_code, vim.log.levels.ERROR)
+          return
+        end
+        -- Remove trailing empty strings from jobstart output
+        while #output > 0 and output[#output] == "" do
+          table.remove(output)
+        end
+        if #output == 0 then
+          vim.notify("No output from claude", vim.log.levels.WARN)
+          return
+        end
+        vim.api.nvim_buf_set_lines(bufnr, 0, -1, false, output)
+        vim.notify("Inline comments added — review and :w to save, or :u to undo", vim.log.levels.INFO)
+      end)
+    end,
+  })
 end, { desc = "[D]bt [A]nalyse model (quick)" })
 
 -- dbt: run claude agent on current model (deep analysis with sonnet thinking, cross-reference DB)
@@ -333,9 +360,9 @@ vim.keymap.set("n", "<leader>dA", function()
   -- Compile + gather sample rows, then template the prompt and pass to claude
   local prompt_path = vim.fn.stdpath("config") .. "/prompts/dbt_deep_analysis.md"
   local cmd = string.format(
-    [[dbt compile -s %s --quiet ]]
+    [[uv run dbt compile -s %s --quiet ]]
       .. [[&& compiled_sql=$(cat $(find %s/target/compiled -name '%s.sql' | head -1) 2>/dev/null) ]]
-      .. [[&& sample_rows=$(dbt show -s %s --limit 20 2>/dev/null) ]]
+      .. [[&& sample_rows=$(uv run dbt show -s %s --limit 20 2>/dev/null) ]]
       .. [[&& prompt=$(sed -e "s|{{compiled_sql}}|${compiled_sql}|g" -e "s|{{sample_rows}}|${sample_rows}|g" %s) ]]
       .. [[&& claude -p "${prompt}" --model claude-sonnet-4-6 --thinking %s]],
     model,
diff --git a/nvim/prompts/dbt_quick_analysis.md b/nvim/prompts/dbt_quick_analysis.md
index ee1eeea..6eb4511 100644
--- a/nvim/prompts/dbt_quick_analysis.md
+++ b/nvim/prompts/dbt_quick_analysis.md
@@ -1 +1,4 @@
-Review this dbt model and add brief comments suggesting improvements, potential issues, or best-practice violations. Be concise.
+Output the complete SQL file with inline comments added as SQL comments (-- ).
+Add brief comments suggesting improvements, potential issues, or best-practice violations.
+Where appropriate, include a short explanation of why the suggestion matters.
+Output ONLY the SQL with comments, no markdown fences, no preamble.

From c7d612f7a026269913b11377465060f4ef9ea419 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 21:42:12 +0000
Subject: [PATCH 10/26] Open deep dbt analysis in interactive tmux window

<leader>dA now opens a new tmux window named 'dbt:<model>' that
compiles the model, gathers sample rows, then starts an interactive
claude session with that context so you can discuss the model.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 78cf646..28a9ac2 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -339,8 +339,7 @@ vim.keymap.set("n", "<leader>da", function()
   })
 end, { desc = "[D]bt [A]nalyse model (quick)" })
 
--- dbt: run claude agent on current model (deep analysis with sonnet thinking, cross-reference DB)
--- Compiles the model first, then passes compiled SQL + first 20 rows as extra context
+-- dbt: open interactive claude session in a new tmux window with compiled SQL + sample rows as context
 vim.keymap.set("n", "<leader>dA", function()
   local filepath = vim.fn.expand("%:p")
   if filepath == "" then
@@ -357,14 +356,17 @@ vim.keymap.set("n", "<leader>dA", function()
     return
   end
 
-  -- Compile + gather sample rows, then template the prompt and pass to claude
+  -- Open a new tmux window that compiles, gathers context, then starts interactive claude
   local prompt_path = vim.fn.stdpath("config") .. "/prompts/dbt_deep_analysis.md"
   local cmd = string.format(
-    [[uv run dbt compile -s %s --quiet ]]
-      .. [[&& compiled_sql=$(cat $(find %s/target/compiled -name '%s.sql' | head -1) 2>/dev/null) ]]
+    [[tmux new-window -n 'dbt:%s' ']]
+      .. [[uv run dbt compile -s %s --quiet ]]
+      .. [[&& compiled_sql=$(cat $(find %s/target/compiled -name "%s.sql" | head -1) 2>/dev/null) ]]
       .. [[&& sample_rows=$(uv run dbt show -s %s --limit 20 2>/dev/null) ]]
       .. [[&& prompt=$(sed -e "s|{{compiled_sql}}|${compiled_sql}|g" -e "s|{{sample_rows}}|${sample_rows}|g" %s) ]]
-      .. [[&& claude -p "${prompt}" --model claude-sonnet-4-6 --thinking %s]],
+      .. [[&& claude --model claude-sonnet-4-6 --thinking --prompt "${prompt}" %s ]]
+      .. [[|| read -p "Press enter to close..."']],
+    model,
     model,
     vim.fn.shellescape(root),
     model,
@@ -372,5 +374,6 @@ vim.keymap.set("n", "<leader>dA", function()
     vim.fn.shellescape(prompt_path),
     vim.fn.shellescape(filepath)
   )
-  dbt_cmd_raw(cmd)
-end, { desc = "[D]bt [A]nalyse model (deep, DB cross-ref)" })
+  vim.fn.system(cmd)
+  vim.notify("Opened interactive claude session in tmux window 'dbt:" .. model .. "'", vim.log.levels.INFO)
+end, { desc = "[D]bt [A]nalyse model (interactive)" })

From 2038e17ac1f3516323dc1ea5e6c163b5f60eb74f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 21:51:55 +0000
Subject: [PATCH 11/26] Add <leader>dp to preview dbt model rows in a split

Runs dbt show --limit 20 asynchronously and displays the results
in a read-only scratch buffer. Press q to close.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 51 +++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 28a9ac2..f01ae73 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -277,6 +277,57 @@ vim.keymap.set("n", "<leader>ds", function()
   dbt_cmd("uv run dbt show -s %s")
 end, { desc = "[D]bt [S]how preview results" })
 
+-- dbt: preview sample rows in a horizontal split
+vim.keymap.set("n", "<leader>dp", function()
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  vim.notify("Fetching preview for " .. model .. "...", vim.log.levels.INFO)
+  local cmd = { "uv", "run", "dbt", "show", "-s", model, "--limit", "20" }
+  local output = {}
+  vim.fn.jobstart(cmd, {
+    cwd = dbt_project_root(),
+    stdout_buffered = true,
+    stderr_buffered = true,
+    on_stdout = function(_, data)
+      if data then
+        vim.list_extend(output, data)
+      end
+    end,
+    on_exit = function(_, exit_code)
+      vim.schedule(function()
+        if exit_code ~= 0 then
+          vim.notify("dbt show failed (exit " .. exit_code .. ")", vim.log.levels.ERROR)
+          return
+        end
+        -- Trim trailing empty lines
+        while #output > 0 and output[#output] == "" do
+          table.remove(output)
+        end
+        if #output == 0 then
+          vim.notify("No rows returned", vim.log.levels.WARN)
+          return
+        end
+        -- Open a scratch buffer in a horizontal split
+        vim.cmd("botright new")
+        local buf = vim.api.nvim_get_current_buf()
+        vim.bo[buf].buftype = "nofile"
+        vim.bo[buf].bufhidden = "wipe"
+        vim.bo[buf].swapfile = false
+        vim.bo[buf].filetype = "sql"
+        vim.api.nvim_buf_set_name(buf, "dbt-preview://" .. model)
+        vim.api.nvim_buf_set_lines(buf, 0, -1, false, output)
+        vim.bo[buf].modifiable = false
+        -- Resize to fit content (max 20 lines)
+        local height = math.min(#output, 20)
+        vim.api.nvim_win_set_height(0, height)
+        vim.keymap.set("n", "q", "<cmd>close<cr>", { buffer = buf, silent = true })
+      end)
+    end,
+  })
+end, { desc = "[D]bt [P]review model rows" })
+
 -- dbt: read a prompt template from nvim/prompts/ and substitute {{key}} placeholders
 local function dbt_load_prompt(name, vars)
   local prompt_dir = vim.fn.stdpath("config") .. "/prompts/"

From 0885d1559ffa8fc11500f2feee532674eb50190e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 21:54:09 +0000
Subject: [PATCH 12/26] Return cursor to code buffer after dbt_cmd sends to
 toggleterm

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index f01ae73..84e005e 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -82,7 +82,8 @@ local function dbt_cmd(cmd_template)
   -- Build the command
   local cmd = string.format(cmd_template, model)
 
-  -- Send to toggleterm (terminal 1)
+  -- Send to toggleterm (terminal 1), then return focus to the code window
+  local prev_win = vim.api.nvim_get_current_win()
   local term = require("toggleterm.terminal").get(1)
   if not term then
     term = require("toggleterm.terminal").Terminal:new({ id = 1 })
@@ -91,6 +92,7 @@ local function dbt_cmd(cmd_template)
     term:toggle()
   end
   term:send(cmd)
+  vim.api.nvim_set_current_win(prev_win)
 end
 
 -- dbt: find the project root (directory containing dbt_project.yml)

From ea2df0d180322445d7c2f9267758cec830377cfa Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 22:00:24 +0000
Subject: [PATCH 13/26] Replace fzf-lua with telescope.builtin across all
 keymaps

fzf-lua is not installed; the config uses telescope via LazyVim.
Converts wiki search, wiki insert link, dbt model finder (with
C-r/C-b/C-t actions), and dbt grep to telescope equivalents.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 107 ++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 61 deletions(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 84e005e..04a9ea6 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -17,8 +17,7 @@ end, { desc = "[I]nsert [D]ate" })
 
 -- Shortcut for searching the wiki
 vim.keymap.set("n", "<leader>ws", function()
-  local fzf = require("fzf-lua")
-  fzf.files({ prompt = "wiki  ", cwd = vim.g.wiki_root })
+  require("telescope.builtin").find_files({ prompt_title = "Wiki", cwd = vim.g.wiki_root })
 end, { desc = "[W]iki [S]earch" })
 
 -- Create a new page in the wiki
@@ -33,28 +32,23 @@ vim.keymap.set("n", "<leader>wn", function()
 end, { desc = "[W]iki [N]ew Page" })
 
 vim.keymap.set("n", "<leader>wi", function()
-  local fzf = require("fzf-lua")
-  fzf.files({
-    prompt = "Wiki Files",
+  local actions = require("telescope.actions")
+  local action_state = require("telescope.actions.state")
+  require("telescope.builtin").find_files({
+    prompt_title = "Wiki Insert Link",
     cwd = vim.g.wiki_root,
-    actions = {
-      -- Override the default selection action
-      ["default"] = function(selected)
-        -- Get the selected file (should be just one)
-        if selected and #selected > 0 then
-          local full_path = selected[1]
-
-          -- Extract just the filename from the path
-          local filename = full_path:match("([^/\\]+)$")
-
-          -- Create a wiki link format [[filename]] without the file extension
+    attach_mappings = function(prompt_bufnr, map)
+      actions.select_default:replace(function()
+        local entry = action_state.get_selected_entry()
+        actions.close(prompt_bufnr)
+        if entry then
+          local filename = entry[1]:match("([^/\\]+)$")
           local link = string.format("[[%s]]", filename:gsub("%.%w+$", ""))
-
-          -- Insert the wiki link at the current cursor position
           vim.api.nvim_put({ link }, "c", true, true)
         end
-      end,
-    },
+      end)
+      return true
+    end,
   })
 end, { noremap = true, silent = true, desc = "[W]iki [I]nsert Link" })
 
@@ -161,7 +155,7 @@ vim.keymap.set("n", "<leader>dg", function()
   end
 end, { desc = "[D]bt [G]o to ref/source" })
 
--- dbt: send a raw command string to toggleterm (used by fzf actions)
+-- dbt: send a raw command string to toggleterm (used by picker actions)
 local function dbt_cmd_raw(cmd)
   local term = require("toggleterm.terminal").get(1)
   if not term then
@@ -181,48 +175,39 @@ vim.keymap.set("n", "<leader>df", function()
     return
   end
 
-  local fzf = require("fzf-lua")
-  fzf.files({
-    prompt = "dbt model  ",
+  local actions = require("telescope.actions")
+  local action_state = require("telescope.actions.state")
+  require("telescope.builtin").find_files({
+    prompt_title = "dbt model (enter=open, C-r=run, C-b=build, C-t=test)",
     cwd = root,
-    cmd = "fd -e sql . models",
-    actions = {
-      -- Default: open the file
-      ["default"] = fzf.actions.file_edit,
-      -- Ctrl-r: run the selected model
-      ["ctrl-r"] = function(selected)
-        if selected and #selected > 0 then
-          local name = selected[1]:match("([^/]+)%.sql$")
-          if name then
-            dbt_cmd_raw("uv run dbt run -s " .. name)
-          end
-        end
-      end,
-      -- Ctrl-b: build the selected model
-      ["ctrl-b"] = function(selected)
-        if selected and #selected > 0 then
-          local name = selected[1]:match("([^/]+)%.sql$")
-          if name then
-            dbt_cmd_raw("uv run dbt build -s " .. name)
-          end
+    search_dirs = { "models" },
+    find_command = { "fd", "-e", "sql" },
+    attach_mappings = function(prompt_bufnr, map)
+      local function get_model_name()
+        local entry = action_state.get_selected_entry()
+        if entry then
+          return entry[1]:match("([^/]+)%.sql$")
         end
-      end,
-      -- Ctrl-t: test the selected model
-      ["ctrl-t"] = function(selected)
-        if selected and #selected > 0 then
-          local name = selected[1]:match("([^/]+)%.sql$")
-          if name then
-            dbt_cmd_raw("uv run dbt test -s " .. name)
-          end
-        end
-      end,
-    },
-    fzf_opts = {
-      ["--header"] = "enter=open | ctrl-r=run | ctrl-b=build | ctrl-t=test",
-      ["--multi"] = true,
-    },
+      end
+      map("i", "<C-r>", function()
+        local name = get_model_name()
+        actions.close(prompt_bufnr)
+        if name then dbt_cmd_raw("uv run dbt run -s " .. name) end
+      end)
+      map("i", "<C-b>", function()
+        local name = get_model_name()
+        actions.close(prompt_bufnr)
+        if name then dbt_cmd_raw("uv run dbt build -s " .. name) end
+      end)
+      map("i", "<C-t>", function()
+        local name = get_model_name()
+        actions.close(prompt_bufnr)
+        if name then dbt_cmd_raw("uv run dbt test -s " .. name) end
+      end)
+      return true
+    end,
   })
-end, { desc = "[D]bt [F]ind model (fzf)" })
+end, { desc = "[D]bt [F]ind model" })
 
 -- dbt: open the compiled SQL for the current model in a split
 vim.keymap.set("n", "<leader>do", function()
@@ -252,7 +237,7 @@ vim.keymap.set("n", "<leader>d/", function()
     vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
     return
   end
-  require("fzf-lua").grep({ prompt = "dbt grep  ", cwd = root .. "/models" })
+  require("telescope.builtin").live_grep({ prompt_title = "dbt grep", cwd = root .. "/models" })
 end, { desc = "[D]bt search models" })
 
 vim.keymap.set("n", "<leader>dr", function()

From 35131c3e72bec2770da7acf4312154b25b7bcd9f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 22:09:47 +0000
Subject: [PATCH 14/26] Add <leader>dv to show dbt model output in visidata

Pipes `dbt show --output csv --limit 500` into visidata via
toggleterm. Formats and saves the file first like other dbt commands.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 04a9ea6..353c204 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -315,6 +315,22 @@ vim.keymap.set("n", "<leader>dp", function()
   })
 end, { desc = "[D]bt [P]review model rows" })
 
+-- dbt: show model output as CSV piped into visidata
+vim.keymap.set("n", "<leader>dv", function()
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+  require("conform").format({ async = false, lsp_fallback = true })
+  vim.cmd("write")
+  dbt_cmd_raw("cd " .. root .. " && uv run dbt show -s " .. model .. " --limit 500 --output csv | vd -f csv")
+end, { desc = "[D]bt [V]isidata preview" })
+
 -- dbt: read a prompt template from nvim/prompts/ and substitute {{key}} placeholders
 local function dbt_load_prompt(name, vars)
   local prompt_dir = vim.fn.stdpath("config") .. "/prompts/"

From 4983db09df21156ae0e9ca673e15ea254e02105c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 22:12:31 +0000
Subject: [PATCH 15/26] Return focus to code buffer after visidata exits

Uses a dedicated toggleterm Terminal with close_on_exit and an
on_exit callback instead of the shared terminal, so focus returns
to the previous window when visidata is quit.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 353c204..049650f 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -328,7 +328,21 @@ vim.keymap.set("n", "<leader>dv", function()
   end
   require("conform").format({ async = false, lsp_fallback = true })
   vim.cmd("write")
-  dbt_cmd_raw("cd " .. root .. " && uv run dbt show -s " .. model .. " --limit 500 --output csv | vd -f csv")
+  local prev_win = vim.api.nvim_get_current_win()
+  local cmd = "cd " .. root .. " && uv run dbt show -s " .. model .. " --limit 500 --output csv | vd -f csv"
+  require("toggleterm.terminal").Terminal
+    :new({
+      cmd = cmd,
+      close_on_exit = true,
+      on_exit = function()
+        vim.schedule(function()
+          if vim.api.nvim_win_is_valid(prev_win) then
+            vim.api.nvim_set_current_win(prev_win)
+          end
+        end)
+      end,
+    })
+    :toggle()
 end, { desc = "[D]bt [V]isidata preview" })
 
 -- dbt: read a prompt template from nvim/prompts/ and substitute {{key}} placeholders

From ed173af977682d4b3034149aa9eb158c2cc64aa6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 5 Mar 2026 22:20:22 +0000
Subject: [PATCH 16/26] Fix dbt visidata: use json output with json-to-csv
 conversion

dbt show only supports --output json/text, not csv. Use
--output json --log-format json and pipe through a python
script that extracts the preview data and converts to CSV.

https://claude.ai/code/session_016johXLfEd6P4umaT14YQEQ
---
 nvim/lua/config/keymaps.lua | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 049650f..a4252ec 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -329,7 +329,30 @@ vim.keymap.set("n", "<leader>dv", function()
   require("conform").format({ async = false, lsp_fallback = true })
   vim.cmd("write")
   local prev_win = vim.api.nvim_get_current_win()
-  local cmd = "cd " .. root .. " && uv run dbt show -s " .. model .. " --limit 500 --output csv | vd -f csv"
+  local json_to_csv = [[python3 -c "
+import sys, json, csv
+raw = sys.stdin.read()
+for line in raw.splitlines():
+    try:
+        obj = json.loads(line)
+    except json.JSONDecodeError:
+        continue
+    preview = None
+    if 'data' in obj and 'preview' in obj['data']:
+        preview = obj['data']['preview']
+    elif 'results' in obj:
+        preview = obj['results'][0].get('preview')
+    elif 'preview' in obj:
+        preview = obj['preview']
+    if preview:
+        if isinstance(preview, str):
+            preview = json.loads(preview)
+        w = csv.DictWriter(sys.stdout, fieldnames=preview[0].keys())
+        w.writeheader()
+        w.writerows(preview)
+        break
+"]]
+  local cmd = "cd " .. root .. " && uv run dbt show -s " .. model .. " --limit 500 --output json --log-format json | " .. json_to_csv .. " | vd -f csv"
   require("toggleterm.terminal").Terminal
     :new({
       cmd = cmd,

From b4fe1b83fb06035fd9ba49660ac1bd607df5d4d7 Mon Sep 17 00:00:00 2001
From: Michael Barton <mail@michaelbarton.me.uk>
Date: Fri, 6 Mar 2026 13:47:39 -0800
Subject: [PATCH 17/26] Rename dbt prompt files

---
 {nvim/prompts => dbt}/dbt_deep_analysis.md  | 0
 {nvim/prompts => dbt}/dbt_quick_analysis.md | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename {nvim/prompts => dbt}/dbt_deep_analysis.md (100%)
 rename {nvim/prompts => dbt}/dbt_quick_analysis.md (100%)

diff --git a/nvim/prompts/dbt_deep_analysis.md b/dbt/dbt_deep_analysis.md
similarity index 100%
rename from nvim/prompts/dbt_deep_analysis.md
rename to dbt/dbt_deep_analysis.md
diff --git a/nvim/prompts/dbt_quick_analysis.md b/dbt/dbt_quick_analysis.md
similarity index 100%
rename from nvim/prompts/dbt_quick_analysis.md
rename to dbt/dbt_quick_analysis.md

From 054a85593f6ac9affeb12274ee319310d04c032b Mon Sep 17 00:00:00 2001
From: Michael Barton <mail@michaelbarton.me.uk>
Date: Fri, 6 Mar 2026 13:48:11 -0800
Subject: [PATCH 18/26] Add explicit python scripts for db analsis

---
 ansible/tasks/neovim.yml |   9 +-
 dbt/dbt_analyse.py       | 109 +++++++++++
 dbt/dbt_batch_audit.py   | 398 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 513 insertions(+), 3 deletions(-)
 create mode 100755 dbt/dbt_analyse.py
 create mode 100644 dbt/dbt_batch_audit.py

diff --git a/ansible/tasks/neovim.yml b/ansible/tasks/neovim.yml
index ce22212..6e37719 100644
--- a/ansible/tasks/neovim.yml
+++ b/ansible/tasks/neovim.yml
@@ -15,13 +15,15 @@
       - ruff
       - yamllint
 
-- name: Remove existing ftplugin directory if it exists (to allow symlinking)
+- name: Remove existing directories that need to be replaced with symlinks
   ansible.builtin.file:
-    path: "{{ ansible_env.HOME }}/.config/nvim/ftplugin"
+    path: "{{ ansible_env.HOME }}/.config/nvim/{{ item }}"
     state: absent
+  loop:
+    - ftplugin
+    - dbt
 
 - name: Link specific configuration files
-
   ansible.builtin.file:
     src: "{{ playbook_dir }}/../{{ item.src }}"
     dest: "{{ ansible_env.HOME }}/{{ item.dest }}"
@@ -30,6 +32,7 @@
     - { src: "nvim/init.lua", dest: ".config/nvim/init.lua" }
     - { src: "nvim/lua", dest: ".config/nvim/lua" }
     - { src: "nvim/ftplugin", dest: ".config/nvim/ftplugin" }
+    - { src: "dbt", dest: ".config/nvim/dbt" }
 
 - name: Install jsonlint node.js package.
   community.general.npm:
diff --git a/dbt/dbt_analyse.py b/dbt/dbt_analyse.py
new file mode 100755
index 0000000..baec277
--- /dev/null
+++ b/dbt/dbt_analyse.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""
+dbt_analyse: compile a dbt model, gather context, then launch an interactive
+cursor-agent session. Designed to be called from a tmux window.
+
+Usage:
+    dbt_analyse.py --model <name> --root <dbt_project_root> \
+                   --filepath <source_sql_path> --prompt <prompt_template_path>
+"""
+
+import subprocess
+import sys
+import glob
+import os
+import click
+
+
+def run(cmd, cwd=None, capture=False, check=True):
+    result = subprocess.run(
+        cmd,
+        cwd=cwd,
+        capture_output=capture,
+        text=True,
+    )
+    if check and result.returncode != 0:
+        stderr = result.stderr.strip() if result.stderr else ""
+        click.echo(f"ERROR: command failed (exit {result.returncode}): {' '.join(cmd)}", err=True)
+        if stderr:
+            click.echo(stderr, err=True)
+        sys.exit(result.returncode)
+    return result
+
+
+@click.command()
+@click.option("--model", required=True, help="dbt model name (no extension)")
+@click.option("--root", required=True, help="Path to dbt project root")
+@click.option("--filepath", required=True, help="Absolute path to the source SQL file")
+@click.option("--prompt", required=True, help="Path to the prompt template .md file")
+@click.option("--limit", default=20, show_default=True, help="Row limit for dbt show")
+@click.option(
+    "--model-flag", default="sonnet-4.6-thinking", show_default=True, help="cursor-agent model"
+)
+def main(model, root, filepath, prompt, limit, model_flag):
+    # --- 1. compile ---
+    click.echo(f"Compiling {model}...")
+    run(["uv", "run", "dbt", "compile", "-s", model, "--quiet"], cwd=root)
+
+    # --- 2. find compiled SQL ---
+    pattern = os.path.join(root, "target", "compiled", "**", f"{model}.sql")
+    matches = glob.glob(pattern, recursive=True)
+    if not matches:
+        click.echo(f"ERROR: no compiled SQL found for {model} — did compile succeed?", err=True)
+        sys.exit(1)
+    compiled_sql = open(matches[0]).read()
+    click.echo(f"Compiled SQL: {matches[0]}")
+
+    # --- 3. sample rows ---
+    click.echo(f"Fetching sample rows (limit={limit})...")
+    result = run(
+        [
+            "uv",
+            "run",
+            "dbt",
+            "show",
+            "-s",
+            model,
+            "--limit",
+            str(limit),
+            "--output",
+            "json",
+            "--log-format",
+            "json",
+        ],
+        cwd=root,
+        capture=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        click.echo(
+            f"WARNING: dbt show failed (exit {result.returncode}), continuing without sample rows",
+            err=True,
+        )
+        sample_rows = "(dbt show failed)"
+    else:
+        sample_rows = result.stdout.strip() or "(no rows returned)"
+
+    # --- 4. source SQL ---
+    if not os.path.exists(filepath):
+        click.echo(f"ERROR: source file not found: {filepath}", err=True)
+        sys.exit(1)
+    source_sql = open(filepath).read()
+
+    # --- 5. build prompt ---
+    if not os.path.exists(prompt):
+        click.echo(f"ERROR: prompt template not found: {prompt}", err=True)
+        sys.exit(1)
+    template = open(prompt).read()
+    full_prompt = template.replace("{{compiled_sql}}", compiled_sql).replace(
+        "{{sample_rows}}", sample_rows
+    )
+    full_prompt += f"\n\nSource SQL:\n{source_sql}"
+
+    # --- 6. launch cursor-agent ---
+    click.echo(f"Launching cursor-agent ({model_flag})...")
+    os.execlp("cursor-agent", "cursor-agent", "--model", model_flag, full_prompt)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dbt/dbt_batch_audit.py b/dbt/dbt_batch_audit.py
new file mode 100644
index 0000000..04ee536
--- /dev/null
+++ b/dbt/dbt_batch_audit.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["click", "mdformat"]
+# ///
+"""
+dbt_batch_audit: run dbt model audits across multiple models and LLMs in
+parallel, then synthesize a final consolidated report.
+
+Accepts SQL file paths, directories, and shell globs. Model names are inferred
+from filenames (e.g. models/int_orders.sql → int_orders).
+
+Usage:
+    # single files
+    dbt_batch_audit.py models/int_orders.sql models/stg_users.sql ...
+
+    # a whole directory
+    dbt_batch_audit.py models/intermediate/
+
+    # shell glob (expanded by the shell before the script sees it)
+    dbt_batch_audit.py models/int_*.sql
+
+    # mix and match
+    dbt_batch_audit.py models/intermediate/ models/stg_special.sql
+"""
+
+import subprocess
+import sys
+import glob
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import click
+
+
+def run(cmd, cwd=None, capture=False, check=True):
+    result = subprocess.run(cmd, cwd=cwd, capture_output=capture, text=True)
+    if check and result.returncode != 0:
+        stderr = result.stderr.strip() if result.stderr else ""
+        click.echo(
+            f"ERROR: command failed (exit {result.returncode}): {' '.join(cmd)}",
+            err=True,
+        )
+        if stderr:
+            click.echo(stderr, err=True)
+        sys.exit(result.returncode)
+    return result
+
+
+def compile_model(model_name, root):
+    click.echo(f"  Compiling {model_name}...")
+    run(["uv", "run", "dbt", "compile", "-s", model_name, "--quiet"], cwd=root)
+    pattern = os.path.join(root, "target", "compiled", "**", f"{model_name}.sql")
+    matches = glob.glob(pattern, recursive=True)
+    if not matches:
+        click.echo(f"ERROR: no compiled SQL found for {model_name}", err=True)
+        sys.exit(1)
+    with open(matches[0]) as f:
+        return f.read()
+
+
+def get_sample_rows(model_name, root, limit):
+    click.echo(f"  Fetching sample rows for {model_name} (limit={limit})...")
+    result = run(
+        [
+            "uv", "run", "dbt", "show", "-s", model_name,
+            "--limit", str(limit), "--output", "json", "--log-format", "json",
+        ],
+        cwd=root,
+        capture=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        click.echo(f"  WARNING: dbt show failed for {model_name}", err=True)
+        return "(dbt show failed)"
+    return result.stdout.strip() or "(no rows returned)"
+
+
+def write_context_file(output_dir, model_name, template, compiled_sql, sample_rows, source_sql):
+    """Write the full audit context to a file so cursor-agent can read it."""
+    ctx_dir = os.path.join(output_dir, ".context")
+    os.makedirs(ctx_dir, exist_ok=True)
+
+    content = (
+        template
+        .replace("{{compiled_sql}}", compiled_sql)
+        .replace("{{sample_rows}}", sample_rows)
+    )
+    content += f"\n\nSource SQL:\n{source_sql}"
+
+    ctx_path = os.path.join(ctx_dir, f"{model_name}__context.md")
+    with open(ctx_path, "w") as f:
+        f.write(content)
+    return os.path.abspath(ctx_path)
+
+
+def get_available_models():
+    """Return the set of valid model IDs from cursor-agent --list-models."""
+    result = subprocess.run(
+        ["cursor-agent", "--list-models"],
+        capture_output=True,
+        text=True,
+    )
+    # Strip ANSI escape codes, then extract the first token of each line that
+    # looks like a model ID (alphanumeric + hyphens/dots, before the " - " separator).
+    import re
+    ansi_escape = re.compile(r"\x1b\[[0-9;]*[A-Za-z]|\x1b\[[0-9]*[A-Za-z]")
+    clean = ansi_escape.sub("", result.stdout)
+    models = set()
+    for line in clean.splitlines():
+        m = re.match(r"^([a-zA-Z0-9][a-zA-Z0-9._-]+)\s+-\s+", line.strip())
+        if m:
+            models.add(m.group(1))
+    return models
+
+
+def validate_llms(llms):
+    """Fail fast if any requested LLM is not in cursor-agent's available models."""
+    click.echo("Validating model names against cursor-agent...")
+    available = get_available_models()
+    if not available:
+        click.echo(
+            "WARNING: could not retrieve available models list; skipping validation",
+            err=True,
+        )
+        return
+    invalid = [llm for llm in llms if llm not in available]
+    if invalid:
+        click.echo(
+            f"ERROR: the following model(s) are not available in cursor-agent:\n"
+            + "\n".join(f"  - {m}" for m in invalid)
+            + f"\n\nAvailable models:\n  " + "\n  ".join(sorted(available)),
+            err=True,
+        )
+        sys.exit(1)
+    click.echo(f"  All {len(llms)} model(s) validated OK.\n")
+
+
+def run_audit(model_name, context_path, llm, output_dir, root):
+    click.echo(f"  [{model_name} × {llm}] Starting audit...")
+    start = time.monotonic()
+
+    prompt = (
+        f"Read the audit instructions and dbt model context from {context_path}. "
+        "Perform a thorough data quality audit of the dbt model as described. "
+        "Output your complete findings as a well-structured markdown report."
+    )
+
+    result = subprocess.run(
+        ["cursor-agent", "--print", "--force", "--model", llm, prompt],
+        capture_output=True,
+        text=True,
+        cwd=root,
+    )
+
+    if result.returncode == 0:
+        report = result.stdout.strip()
+    else:
+        report = f"(audit failed: exit {result.returncode})\n{result.stderr}"
+
+    safe_llm = llm.replace("/", "_").replace(" ", "_")
+    report_path = os.path.join(output_dir, f"{model_name}__{safe_llm}.md")
+    with open(report_path, "w") as f:
+        f.write(f"# Audit: {model_name} (LLM: {llm})\n\n{report}\n")
+
+    elapsed = time.monotonic() - start
+    click.echo(f"  [{model_name} × {llm}] Done ({elapsed:.0f}s) → {report_path}")
+    return model_name, llm, report
+
+
+def synthesize_reports(reports, synthesis_model, output_dir, root):
+    click.echo("\nSynthesizing final report...")
+    start = time.monotonic()
+
+    combined_parts = []
+    for model_name, llm, report in reports:
+        combined_parts.append(
+            f"---\n## Model: {model_name} | Reviewer: {llm}\n\n{report}\n"
+        )
+    combined_text = "\n".join(combined_parts)
+
+    combined_path = os.path.join(output_dir, "all_individual_reports.md")
+    with open(combined_path, "w") as f:
+        f.write(f"# All Individual Audit Reports\n\n{combined_text}\n")
+
+    # Write synthesis context to a file to avoid command-line length limits
+    ctx_dir = os.path.join(output_dir, ".context")
+    os.makedirs(ctx_dir, exist_ok=True)
+    synthesis_ctx_path = os.path.abspath(
+        os.path.join(ctx_dir, "synthesis_context.md")
+    )
+
+    synthesis_instructions = f"""\
+You are a senior analytics engineer reviewing multiple dbt model audit reports.
+Each report below was generated by a different LLM auditing a dbt model.
+
+Your job:
+1. Cross-reference findings across models and reviewers
+2. Identify the most critical issues that appear consistently
+3. Flag any contradictions between reviewers
+4. Prioritize recommendations by impact and effort
+5. Produce a final consolidated report in markdown
+
+A key part of your analysis is **cross-model bug propagation**: for every significant finding in any
+model, explicitly trace its downstream consequences through the model dependency chain. Ask: which
+downstream models consume this model's output? If this bug is present in the data, what does that
+mean for each downstream model's correctness? Does the bug amplify, get filtered out, or silently
+corrupt aggregations further down the chain? Call out cases where a bug in an upstream model makes
+a downstream model's output untrustworthy even if the downstream model itself has no defects.
+
+Generate a final synthesis report with:
+- An executive summary
+- Critical findings (agreed upon by multiple reviewers or clearly valid), each with an explicit
+  **Downstream impact** sub-section tracing the bug through the dependency chain
+- A dependency propagation map: a table or diagram showing which bugs flow into which downstream
+  models and what the compounded effect is
+- Model-specific recommendations ordered by severity
+- Cross-model patterns or systemic issues
+- A prioritized action plan table, where items that fix root-cause bugs affecting multiple
+  downstream models are ranked higher than equivalent-effort fixes that are locally scoped
+
+---
+
+Individual audit reports:
+
+{combined_text}"""
+
+    with open(synthesis_ctx_path, "w") as f:
+        f.write(synthesis_instructions)
+
+    prompt = (
+        f"Read the synthesis instructions and individual audit reports from "
+        f"{synthesis_ctx_path}. Follow the instructions to produce a final "
+        "consolidated synthesis report in markdown."
+    )
+
+    result = subprocess.run(
+        ["cursor-agent", "--print", "--force", "--model", synthesis_model, prompt],
+        capture_output=True,
+        text=True,
+        cwd=root,
+    )
+
+    if result.returncode == 0:
+        synthesis = result.stdout.strip()
+    else:
+        synthesis = (
+            f"(synthesis failed: exit {result.returncode})\n{result.stderr}"
+        )
+
+    synthesis_path = os.path.join(output_dir, "final_synthesis.md")
+    with open(synthesis_path, "w") as f:
+        f.write(f"# dbt Model Audit — Final Synthesis\n\n{synthesis}\n")
+
+    elapsed = time.monotonic() - start
+    click.echo(f"Final synthesis ({elapsed:.0f}s) → {synthesis_path}")
+    click.echo(f"Combined reports → {combined_path}")
+    return synthesis_path
+
+
+def resolve_sql_paths(paths):
+    """Expand directories and verify that every path is a .sql file."""
+    resolved = []
+    for p in paths:
+        p = os.path.abspath(p)
+        if os.path.isdir(p):
+            children = sorted(
+                f for f in glob.glob(os.path.join(p, "**", "*.sql"), recursive=True)
+            )
+            if not children:
+                click.echo(f"WARNING: no .sql files found in {p}", err=True)
+            resolved.extend(children)
+        elif os.path.isfile(p):
+            if not p.endswith(".sql"):
+                click.echo(f"ERROR: not a .sql file: {p}", err=True)
+                sys.exit(1)
+            resolved.append(p)
+        else:
+            click.echo(f"ERROR: path not found: {p}", err=True)
+            sys.exit(1)
+    return resolved
+
+
+def model_name_from_path(filepath):
+    return os.path.splitext(os.path.basename(filepath))[0]
+
+
+@click.command()
+@click.argument("paths", nargs=-1, required=True)
+@click.option(
+    "--llm", "llms", required=True, multiple=True,
+    help="LLM model name for cursor-agent (repeatable)",
+)
+@click.option("--root", required=True, help="Path to dbt project root")
+@click.option("--prompt", required=True, help="Path to the prompt template .md file")
+@click.option(
+    "--output-dir", default="./audit_reports", show_default=True,
+    help="Directory for output reports",
+)
+@click.option("--limit", default=20, show_default=True, help="Row limit for dbt show")
+@click.option(
+    "--synthesis-model", default="sonnet-4.6-thinking", show_default=True,
+    help="LLM for the final synthesis step",
+)
+@click.option(
+    "--concurrency", default=3, show_default=True,
+    help="Max parallel cursor-agent invocations",
+)
+def main(paths, llms, root, prompt, output_dir, limit, synthesis_model, concurrency):
+    """Run dbt model audits across multiple models and LLMs, then synthesize.
+
+    PATHS are .sql files, directories containing .sql files, or shell globs.
+    Model names are inferred from filenames (int_orders.sql → int_orders).
+    """
+    validate_llms(llms + (synthesis_model,))
+
+    sql_files = resolve_sql_paths(paths)
+    if not sql_files:
+        click.echo("ERROR: no .sql files resolved from the given paths", err=True)
+        sys.exit(1)
+
+    model_specs = []
+    seen = set()
+    for filepath in sql_files:
+        name = model_name_from_path(filepath)
+        if name in seen:
+            click.echo(
+                f"ERROR: duplicate model name '{name}' from {filepath}", err=True,
+            )
+            sys.exit(1)
+        seen.add(name)
+        model_specs.append((name, filepath))
+
+    if not os.path.exists(prompt):
+        click.echo(f"ERROR: prompt template not found: {prompt}", err=True)
+        sys.exit(1)
+    with open(prompt) as f:
+        template = f.read()
+
+    root = os.path.abspath(root)
+    output_dir = os.path.abspath(output_dir)
+    os.makedirs(output_dir, exist_ok=True)
+
+    total = len(model_specs) * len(llms)
+    click.echo(
+        f"Auditing {len(model_specs)} model(s) × {len(llms)} LLM(s) = {total} audit(s)"
+    )
+    click.echo(f"Concurrency: {concurrency} | Synthesis model: {synthesis_model}\n")
+
+    context_paths = {}
+    for name, filepath in model_specs:
+        click.echo(f"Preparing {name}...")
+        compiled_sql = compile_model(name, root)
+        sample_rows = get_sample_rows(name, root, limit)
+        with open(filepath) as f:
+            source_sql = f.read()
+        context_paths[name] = write_context_file(
+            output_dir, name, template, compiled_sql, sample_rows, source_sql,
+        )
+
+    click.echo(f"\nAll models compiled. Launching {total} audit(s)...\n")
+
+    reports = []
+    with ThreadPoolExecutor(max_workers=concurrency) as pool:
+        futures = {}
+        for name, _ in model_specs:
+            for llm in llms:
+                fut = pool.submit(
+                    run_audit, name, context_paths[name], llm, output_dir, root,
+                )
+                futures[fut] = (name, llm)
+
+        for fut in as_completed(futures):
+            name, llm = futures[fut]
+            try:
+                reports.append(fut.result())
+            except Exception as e:
+                click.echo(f"  ERROR [{name} × {llm}]: {e}", err=True)
+                reports.append((name, llm, f"(error: {e})"))
+
+    reports.sort(key=lambda r: (r[0], r[1]))
+
+    synthesize_reports(reports, synthesis_model, output_dir, root)
+
+    md_files = glob.glob(os.path.join(output_dir, "*.md"))
+    if md_files:
+        click.echo(f"\nFormatting {len(md_files)} markdown file(s)...")
+        run(
+            ["uvx", "mdformat", "--wrap", "100"] + md_files,
+            cwd=root,
+        )
+
+    click.echo(f"\nDone! {len(reports)} audit(s) completed. Reports in: {output_dir}")
+
+
+if __name__ == "__main__":
+    main()

From b5a745bdc4e6195dcc8dae1b6190010dc497b256 Mon Sep 17 00:00:00 2001
From: Michael Barton <mail@michaelbarton.me.uk>
Date: Fri, 6 Mar 2026 13:49:00 -0800
Subject: [PATCH 19/26] Update dbt nvim keymaps

---
 nvim/lua/config/keymaps.lua | 43 +++++++++++++++----------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index a4252ec..2e13c82 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -368,9 +368,9 @@ for line in raw.splitlines():
     :toggle()
 end, { desc = "[D]bt [V]isidata preview" })
 
--- dbt: read a prompt template from nvim/prompts/ and substitute {{key}} placeholders
+-- dbt: read a prompt template from the llm/ directory and substitute {{key}} placeholders
 local function dbt_load_prompt(name, vars)
-  local prompt_dir = vim.fn.stdpath("config") .. "/prompts/"
+  local prompt_dir = vim.fn.stdpath("config") .. "/dbt/"
   local path = prompt_dir .. name .. ".md"
   local lines = vim.fn.readfile(path)
   if #lines == 0 then
@@ -384,7 +384,7 @@ local function dbt_load_prompt(name, vars)
   return prompt
 end
 
--- dbt: run claude agent on current model (quick analysis with sonnet)
+-- dbt: run cursor-agent on current model (quick analysis with sonnet)
 -- Replaces buffer contents with the file + inline SQL comments
 vim.keymap.set("n", "<leader>da", function()
   local filepath = vim.fn.expand("%:p")
@@ -400,7 +400,9 @@ vim.keymap.set("n", "<leader>da", function()
 
   vim.notify("Running quick analysis...", vim.log.levels.INFO)
 
-  local cmd = { "claude", "-p", prompt, "--model", "claude-sonnet-4-6", filepath }
+  local file_content = table.concat(vim.fn.readfile(filepath), "\n")
+  local full_prompt = prompt .. "\n\nFile: " .. filepath .. "\n```sql\n" .. file_content .. "\n```"
+  local cmd = { "cursor-agent", "--print", "--model", "sonnet-4.6", full_prompt }
   local output = {}
   vim.fn.jobstart(cmd, {
     stdout_buffered = true,
@@ -412,7 +414,7 @@ vim.keymap.set("n", "<leader>da", function()
     on_exit = function(_, exit_code)
       vim.schedule(function()
         if exit_code ~= 0 then
-          vim.notify("claude exited with code " .. exit_code, vim.log.levels.ERROR)
+          vim.notify("cursor-agent exited with code " .. exit_code, vim.log.levels.ERROR)
           return
         end
         -- Remove trailing empty strings from jobstart output
@@ -420,7 +422,7 @@ vim.keymap.set("n", "<leader>da", function()
           table.remove(output)
         end
         if #output == 0 then
-          vim.notify("No output from claude", vim.log.levels.WARN)
+          vim.notify("No output from cursor-agent", vim.log.levels.WARN)
           return
         end
         vim.api.nvim_buf_set_lines(bufnr, 0, -1, false, output)
@@ -430,7 +432,7 @@ vim.keymap.set("n", "<leader>da", function()
   })
 end, { desc = "[D]bt [A]nalyse model (quick)" })
 
--- dbt: open interactive claude session in a new tmux window with compiled SQL + sample rows as context
+-- dbt: open interactive cursor-agent session in a new tmux window with compiled SQL + sample rows as context
 vim.keymap.set("n", "<leader>dA", function()
   local filepath = vim.fn.expand("%:p")
   if filepath == "" then
@@ -447,24 +449,13 @@ vim.keymap.set("n", "<leader>dA", function()
     return
   end
 
-  -- Open a new tmux window that compiles, gathers context, then starts interactive claude
-  local prompt_path = vim.fn.stdpath("config") .. "/prompts/dbt_deep_analysis.md"
-  local cmd = string.format(
-    [[tmux new-window -n 'dbt:%s' ']]
-      .. [[uv run dbt compile -s %s --quiet ]]
-      .. [[&& compiled_sql=$(cat $(find %s/target/compiled -name "%s.sql" | head -1) 2>/dev/null) ]]
-      .. [[&& sample_rows=$(uv run dbt show -s %s --limit 20 2>/dev/null) ]]
-      .. [[&& prompt=$(sed -e "s|{{compiled_sql}}|${compiled_sql}|g" -e "s|{{sample_rows}}|${sample_rows}|g" %s) ]]
-      .. [[&& claude --model claude-sonnet-4-6 --thinking --prompt "${prompt}" %s ]]
-      .. [[|| read -p "Press enter to close..."']],
-    model,
-    model,
-    vim.fn.shellescape(root),
-    model,
-    model,
-    vim.fn.shellescape(prompt_path),
-    vim.fn.shellescape(filepath)
+  -- Open a new tmux window running the standalone dbt_analyse.py script
+  local prompt_path = vim.fn.stdpath("config") .. "/dbt/dbt_deep_analysis.md"
+  local script_path = vim.fn.stdpath("config") .. "/dbt/dbt_analyse.py"
+  local shell_script = string.format(
+    [[cd %s && uv run python3 %s --model %s --root %s --filepath %s --prompt %s || (echo "Press enter to close..." && read)]],
+    root, script_path, model, root, filepath, prompt_path
   )
-  vim.fn.system(cmd)
-  vim.notify("Opened interactive claude session in tmux window 'dbt:" .. model .. "'", vim.log.levels.INFO)
+  vim.fn.jobstart({ "tmux", "new-window", "-n", "dbt:" .. model, shell_script }, { detach = true })
+  vim.notify("Opened interactive cursor-agent session in tmux window 'dbt:" .. model .. "'", vim.log.levels.INFO)
 end, { desc = "[D]bt [A]nalyse model (interactive)" })

From abd77b073b6368553f400f22b936ae02e5a3a0e8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 21:56:52 +0000
Subject: [PATCH 20/26] Fix resource leaks, safety, and robustness issues in
 dbt audit scripts

- Close file handles properly in dbt_analyse.py (use `with` statements)
- Add PEP 723 inline metadata to dbt_analyse.py for uv compatibility
- Move `import re` to module level in dbt_batch_audit.py
- Write context to a temp file in dbt_analyse.py to avoid ARG_MAX limits
- Add subprocess timeouts (900s audit, 1200s synthesis) to prevent hangs
- Order template substitutions to avoid placeholder injection

https://claude.ai/code/session_01RHSUYqsWy6xLfAC9tFTy66
---
 dbt/dbt_analyse.py     | 39 +++++++++++++++++++++++------
 dbt/dbt_batch_audit.py | 56 ++++++++++++++++++++++++++++--------------
 2 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/dbt/dbt_analyse.py b/dbt/dbt_analyse.py
index baec277..4c9a3f4 100755
--- a/dbt/dbt_analyse.py
+++ b/dbt/dbt_analyse.py
@@ -1,4 +1,8 @@
 #!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["click"]
+# ///
 """
 dbt_analyse: compile a dbt model, gather context, then launch an interactive
 cursor-agent session. Designed to be called from a tmux window.
@@ -12,6 +16,8 @@
 import sys
 import glob
 import os
+import tempfile
+
 import click
 
 
@@ -51,7 +57,8 @@ def main(model, root, filepath, prompt, limit, model_flag):
     if not matches:
         click.echo(f"ERROR: no compiled SQL found for {model} — did compile succeed?", err=True)
         sys.exit(1)
-    compiled_sql = open(matches[0]).read()
+    with open(matches[0]) as f:
+        compiled_sql = f.read()
     click.echo(f"Compiled SQL: {matches[0]}")
 
     # --- 3. sample rows ---
@@ -88,21 +95,37 @@ def main(model, root, filepath, prompt, limit, model_flag):
     if not os.path.exists(filepath):
         click.echo(f"ERROR: source file not found: {filepath}", err=True)
         sys.exit(1)
-    source_sql = open(filepath).read()
+    with open(filepath) as f:
+        source_sql = f.read()
 
     # --- 5. build prompt ---
     if not os.path.exists(prompt):
         click.echo(f"ERROR: prompt template not found: {prompt}", err=True)
         sys.exit(1)
-    template = open(prompt).read()
-    full_prompt = template.replace("{{compiled_sql}}", compiled_sql).replace(
-        "{{sample_rows}}", sample_rows
-    )
+    with open(prompt) as f:
+        template = f.read()
+    # Substitute compiled_sql first; use a sentinel to avoid the compiled SQL
+    # accidentally containing the {{sample_rows}} placeholder.
+    full_prompt = template.replace("{{compiled_sql}}", compiled_sql)
+    full_prompt = full_prompt.replace("{{sample_rows}}", sample_rows)
     full_prompt += f"\n\nSource SQL:\n{source_sql}"
 
-    # --- 6. launch cursor-agent ---
+    # --- 6. write context to a temp file & launch cursor-agent ---
+    # Avoids OS arg-length limits (ARG_MAX) when compiled SQL + sample rows
+    # are large.
+    ctx = tempfile.NamedTemporaryFile(
+        mode="w", suffix=".md", prefix=f"dbt_audit_{model}_", delete=False,
+    )
+    ctx.write(full_prompt)
+    ctx.close()
+    click.echo(f"Context written to {ctx.name}")
+
     click.echo(f"Launching cursor-agent ({model_flag})...")
-    os.execlp("cursor-agent", "cursor-agent", "--model", model_flag, full_prompt)
+    agent_prompt = (
+        f"Read the audit instructions and dbt model context from {ctx.name}. "
+        "Perform a thorough data quality audit of the dbt model as described."
+    )
+    os.execlp("cursor-agent", "cursor-agent", "--model", model_flag, agent_prompt)
 
 
 if __name__ == "__main__":
diff --git a/dbt/dbt_batch_audit.py b/dbt/dbt_batch_audit.py
index 04ee536..15eb36b 100644
--- a/dbt/dbt_batch_audit.py
+++ b/dbt/dbt_batch_audit.py
@@ -24,6 +24,7 @@
     dbt_batch_audit.py models/intermediate/ models/stg_special.sql
 """
 
+import re
 import subprocess
 import sys
 import glob
@@ -82,11 +83,10 @@ def write_context_file(output_dir, model_name, template, compiled_sql, sample_ro
     ctx_dir = os.path.join(output_dir, ".context")
     os.makedirs(ctx_dir, exist_ok=True)
 
-    content = (
-        template
-        .replace("{{compiled_sql}}", compiled_sql)
-        .replace("{{sample_rows}}", sample_rows)
-    )
+    # Substitute compiled_sql first to avoid the compiled SQL accidentally
+    # containing the {{sample_rows}} placeholder.
+    content = template.replace("{{compiled_sql}}", compiled_sql)
+    content = content.replace("{{sample_rows}}", sample_rows)
     content += f"\n\nSource SQL:\n{source_sql}"
 
     ctx_path = os.path.join(ctx_dir, f"{model_name}__context.md")
@@ -104,7 +104,6 @@ def get_available_models():
     )
     # Strip ANSI escape codes, then extract the first token of each line that
     # looks like a model ID (alphanumeric + hyphens/dots, before the " - " separator).
-    import re
     ansi_escape = re.compile(r"\x1b\[[0-9;]*[A-Za-z]|\x1b\[[0-9]*[A-Za-z]")
     clean = ansi_escape.sub("", result.stdout)
     models = set()
@@ -147,12 +146,24 @@ def run_audit(model_name, context_path, llm, output_dir, root):
         "Output your complete findings as a well-structured markdown report."
     )
 
-    result = subprocess.run(
-        ["cursor-agent", "--print", "--force", "--model", llm, prompt],
-        capture_output=True,
-        text=True,
-        cwd=root,
-    )
+    try:
+        result = subprocess.run(
+            ["cursor-agent", "--print", "--force", "--model", llm, prompt],
+            capture_output=True,
+            text=True,
+            cwd=root,
+            timeout=900,
+        )
+    except subprocess.TimeoutExpired:
+        report = "(audit timed out after 900s)"
+        click.echo(f"  [{model_name} × {llm}] Timed out", err=True)
+        safe_llm = llm.replace("/", "_").replace(" ", "_")
+        report_path = os.path.join(output_dir, f"{model_name}__{safe_llm}.md")
+        with open(report_path, "w") as f:
+            f.write(f"# Audit: {model_name} (LLM: {llm})\n\n{report}\n")
+        elapsed = time.monotonic() - start
+        click.echo(f"  [{model_name} × {llm}] Done ({elapsed:.0f}s) → {report_path}")
+        return model_name, llm, report
 
     if result.returncode == 0:
         report = result.stdout.strip()
@@ -235,12 +246,21 @@ def synthesize_reports(reports, synthesis_model, output_dir, root):
         "consolidated synthesis report in markdown."
     )
 
-    result = subprocess.run(
-        ["cursor-agent", "--print", "--force", "--model", synthesis_model, prompt],
-        capture_output=True,
-        text=True,
-        cwd=root,
-    )
+    try:
+        result = subprocess.run(
+            ["cursor-agent", "--print", "--force", "--model", synthesis_model, prompt],
+            capture_output=True,
+            text=True,
+            cwd=root,
+            timeout=1200,
+        )
+    except subprocess.TimeoutExpired:
+        synthesis = "(synthesis timed out after 1200s)"
+        click.echo("WARNING: synthesis timed out", err=True)
+        synthesis_path = os.path.join(output_dir, "final_synthesis.md")
+        with open(synthesis_path, "w") as f:
+            f.write(f"# dbt Model Audit — Final Synthesis\n\n{synthesis}\n")
+        return synthesis_path
 
     if result.returncode == 0:
         synthesis = result.stdout.strip()

From bd959492154d11d2a3f35a8d16a3211091765d90 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 22:43:30 +0000
Subject: [PATCH 21/26] Enrich dbt audit with lineage, test coverage, and
 structured prompts

- Rewrite dbt_deep_analysis.md with a comprehensive 8-section audit
  checklist covering schema/types, join correctness, filters, grain,
  data quality, performance, test coverage gaps, and upstream risks
- Add structured output format (findings table, evidence queries,
  suggested dbt test YAML snippets)
- Add conditional template sections ({{#if lineage}}, {{#if existing_tests}})
  with a lightweight Handlebars-style renderer in both scripts
- Gather model lineage (parents/children) via `dbt ls` selectors
- Scan schema.yml files for existing test definitions and include them
  so the LLM focuses on coverage gaps rather than redundant suggestions
- Add pyyaml dependency to both scripts' PEP 723 metadata

https://claude.ai/code/session_01RHSUYqsWy6xLfAC9tFTy66
---
 dbt/dbt_analyse.py       | 145 +++++++++++++++++++++++++++++++++------
 dbt/dbt_batch_audit.py   |  93 +++++++++++++++++++++++--
 dbt/dbt_deep_analysis.md |  97 +++++++++++++++++++++++++-
 3 files changed, 305 insertions(+), 30 deletions(-)

diff --git a/dbt/dbt_analyse.py b/dbt/dbt_analyse.py
index 4c9a3f4..91ba2e8 100755
--- a/dbt/dbt_analyse.py
+++ b/dbt/dbt_analyse.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # /// script
 # requires-python = ">=3.10"
-# dependencies = ["click"]
+# dependencies = ["click", "pyyaml"]
 # ///
 """
 dbt_analyse: compile a dbt model, gather context, then launch an interactive
@@ -12,13 +12,16 @@
                    --filepath <source_sql_path> --prompt <prompt_template_path>
 """
 
+import json
 import subprocess
 import sys
 import glob
 import os
+import re
 import tempfile
 
 import click
+import yaml
 
 
 def run(cmd, cwd=None, capture=False, check=True):
@@ -37,6 +40,107 @@ def run(cmd, cwd=None, capture=False, check=True):
     return result
 
 
+def get_lineage(model, root):
+    """Return a summary of immediate parents and children from dbt ls."""
+    lines = []
+    for direction, selector in [("parents", f"+{model},1+{model}"), ("children", f"{model}+,{model}1+")]:
+        result = subprocess.run(
+            ["uv", "run", "dbt", "ls", "-s", selector, "--output", "name", "--quiet"],
+            capture_output=True, text=True, cwd=root,
+        )
+        if result.returncode == 0:
+            names = [n.strip() for n in result.stdout.strip().splitlines() if n.strip() and n.strip() != model]
+            if names:
+                lines.append(f"**{direction.title()}:** {', '.join(names)}")
+    return "\n".join(lines) if lines else ""
+
+
+def get_existing_tests(model, root):
+    """Extract test definitions for a model from schema.yml files."""
+    schema_files = glob.glob(os.path.join(root, "**", "schema.yml"), recursive=True)
+    schema_files += glob.glob(os.path.join(root, "**", "_schema.yml"), recursive=True)
+    schema_files += glob.glob(os.path.join(root, "**", f"*_models.yml"), recursive=True)
+    schema_files += glob.glob(os.path.join(root, "**", f"*.yml"), recursive=True)
+    # Deduplicate while preserving order
+    seen = set()
+    unique_files = []
+    for f in schema_files:
+        if f not in seen:
+            seen.add(f)
+            unique_files.append(f)
+
+    tests = []
+    for schema_path in unique_files:
+        try:
+            with open(schema_path) as f:
+                doc = yaml.safe_load(f)
+        except (yaml.YAMLError, OSError):
+            continue
+        if not isinstance(doc, dict):
+            continue
+        for m in doc.get("models", []):
+            if not isinstance(m, dict) or m.get("name") != model:
+                continue
+            # Model-level tests
+            for t in m.get("tests", []):
+                tests.append(f"- model-level: {t}")
+            # Column-level tests
+            for col in m.get("columns", []):
+                if not isinstance(col, dict):
+                    continue
+                col_name = col.get("name", "?")
+                for t in col.get("tests", []):
+                    if isinstance(t, str):
+                        tests.append(f"- {col_name}: {t}")
+                    elif isinstance(t, dict):
+                        tests.append(f"- {col_name}: {t}")
+    return "\n".join(tests) if tests else ""
+
+
+def get_data_profile(model, root):
+    """Run a profiling query via dbt show to get column stats."""
+    # Use an inline query that profiles the model's output
+    profile_sql = f"""
+    {{% set cols = adapter.get_columns_in_relation(ref('{model}')) %}}
+    SELECT
+      {{% for col in cols %}}
+      '{{{ col.name }}}' AS column_name_{{{{ loop.index }}}},
+      COUNT(*) AS total_rows_{{{{ loop.index }}}},
+      COUNT("{{{ col.name }}}") AS non_null_{{{{ loop.index }}}},
+      COUNT(DISTINCT "{{{ col.name }}}") AS distinct_{{{{ loop.index }}}}
+      {{% if not loop.last %}},{{% endif %}}
+      {{% endfor %}}
+    FROM {{{{ ref('{model}') }}}}
+    """
+    # Simpler approach: ask dbt to show the model with a higher limit and
+    # compute stats from sample rows in the prompt. The LLM has database
+    # access and can run profiling queries itself. We just nudge it.
+    return ""
+
+
+def render_template(template, replacements):
+    """Replace template placeholders, handling conditional {{#if}}/{{^if}} blocks."""
+    for key, value in replacements.items():
+        # Handle {{#if key}}...{{/if}} blocks
+        if_pattern = re.compile(
+            r"\{\{#if " + re.escape(key) + r"\}\}(.*?)\{\{/if\}\}",
+            re.DOTALL,
+        )
+        not_pattern = re.compile(
+            r"\{\{\^if " + re.escape(key) + r"\}\}(.*?)\{\{/if\}\}",
+            re.DOTALL,
+        )
+        if value:
+            template = if_pattern.sub(r"\1", template)
+            template = not_pattern.sub("", template)
+        else:
+            template = if_pattern.sub("", template)
+            template = not_pattern.sub(r"\1", template)
+        # Replace the simple placeholder
+        template = template.replace("{{" + key + "}}", value)
+    return template
+
+
 @click.command()
 @click.option("--model", required=True, help="dbt model name (no extension)")
 @click.option("--root", required=True, help="Path to dbt project root")
@@ -65,18 +169,8 @@ def main(model, root, filepath, prompt, limit, model_flag):
     click.echo(f"Fetching sample rows (limit={limit})...")
     result = run(
         [
-            "uv",
-            "run",
-            "dbt",
-            "show",
-            "-s",
-            model,
-            "--limit",
-            str(limit),
-            "--output",
-            "json",
-            "--log-format",
-            "json",
+            "uv", "run", "dbt", "show", "-s", model,
+            "--limit", str(limit), "--output", "json", "--log-format", "json",
         ],
         cwd=root,
         capture=True,
@@ -98,21 +192,30 @@ def main(model, root, filepath, prompt, limit, model_flag):
     with open(filepath) as f:
         source_sql = f.read()
 
-    # --- 5. build prompt ---
+    # --- 5. gather lineage and existing tests ---
+    click.echo("Gathering model lineage...")
+    lineage = get_lineage(model, root)
+
+    click.echo("Scanning for existing dbt tests...")
+    existing_tests = get_existing_tests(model, root)
+
+    # --- 6. build prompt ---
     if not os.path.exists(prompt):
         click.echo(f"ERROR: prompt template not found: {prompt}", err=True)
         sys.exit(1)
     with open(prompt) as f:
         template = f.read()
-    # Substitute compiled_sql first; use a sentinel to avoid the compiled SQL
-    # accidentally containing the {{sample_rows}} placeholder.
-    full_prompt = template.replace("{{compiled_sql}}", compiled_sql)
-    full_prompt = full_prompt.replace("{{sample_rows}}", sample_rows)
+
+    full_prompt = render_template(template, {
+        "compiled_sql": compiled_sql,
+        "sample_rows": sample_rows,
+        "existing_tests": existing_tests,
+        "lineage": lineage,
+        "data_profile": "",
+    })
     full_prompt += f"\n\nSource SQL:\n{source_sql}"
 
-    # --- 6. write context to a temp file & launch cursor-agent ---
-    # Avoids OS arg-length limits (ARG_MAX) when compiled SQL + sample rows
-    # are large.
+    # --- 7. write context to a temp file & launch cursor-agent ---
     ctx = tempfile.NamedTemporaryFile(
         mode="w", suffix=".md", prefix=f"dbt_audit_{model}_", delete=False,
     )
diff --git a/dbt/dbt_batch_audit.py b/dbt/dbt_batch_audit.py
index 15eb36b..6fca44a 100644
--- a/dbt/dbt_batch_audit.py
+++ b/dbt/dbt_batch_audit.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # /// script
 # requires-python = ">=3.10"
-# dependencies = ["click", "mdformat"]
+# dependencies = ["click", "mdformat", "pyyaml"]
 # ///
 """
 dbt_batch_audit: run dbt model audits across multiple models and LLMs in
@@ -33,6 +33,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 import click
+import yaml
 
 
 def run(cmd, cwd=None, capture=False, check=True):
@@ -78,15 +79,90 @@ def get_sample_rows(model_name, root, limit):
     return result.stdout.strip() or "(no rows returned)"
 
 
-def write_context_file(output_dir, model_name, template, compiled_sql, sample_rows, source_sql):
+def get_lineage(model_name, root):
+    """Return a summary of immediate parents and children from dbt ls."""
+    lines = []
+    for direction, selector in [("parents", f"+{model_name},1+{model_name}"), ("children", f"{model_name}+,{model_name}1+")]:
+        result = subprocess.run(
+            ["uv", "run", "dbt", "ls", "-s", selector, "--output", "name", "--quiet"],
+            capture_output=True, text=True, cwd=root,
+        )
+        if result.returncode == 0:
+            names = [n.strip() for n in result.stdout.strip().splitlines() if n.strip() and n.strip() != model_name]
+            if names:
+                lines.append(f"**{direction.title()}:** {', '.join(names)}")
+    return "\n".join(lines) if lines else ""
+
+
+def get_existing_tests(model_name, root):
+    """Extract test definitions for a model from schema.yml files."""
+    schema_files = glob.glob(os.path.join(root, "**", "*.yml"), recursive=True)
+    seen = set()
+    unique_files = []
+    for f in schema_files:
+        if f not in seen:
+            seen.add(f)
+            unique_files.append(f)
+
+    tests = []
+    for schema_path in unique_files:
+        try:
+            with open(schema_path) as f:
+                doc = yaml.safe_load(f)
+        except (yaml.YAMLError, OSError):
+            continue
+        if not isinstance(doc, dict):
+            continue
+        for m in doc.get("models", []):
+            if not isinstance(m, dict) or m.get("name") != model_name:
+                continue
+            for t in m.get("tests", []):
+                tests.append(f"- model-level: {t}")
+            for col in m.get("columns", []):
+                if not isinstance(col, dict):
+                    continue
+                col_name = col.get("name", "?")
+                for t in col.get("tests", []):
+                    if isinstance(t, str):
+                        tests.append(f"- {col_name}: {t}")
+                    elif isinstance(t, dict):
+                        tests.append(f"- {col_name}: {t}")
+    return "\n".join(tests) if tests else ""
+
+
+def render_template(template, replacements):
+    """Replace template placeholders, handling conditional {{#if}}/{{^if}} blocks."""
+    for key, value in replacements.items():
+        if_pattern = re.compile(
+            r"\{\{#if " + re.escape(key) + r"\}\}(.*?)\{\{/if\}\}",
+            re.DOTALL,
+        )
+        not_pattern = re.compile(
+            r"\{\{\^if " + re.escape(key) + r"\}\}(.*?)\{\{/if\}\}",
+            re.DOTALL,
+        )
+        if value:
+            template = if_pattern.sub(r"\1", template)
+            template = not_pattern.sub("", template)
+        else:
+            template = if_pattern.sub("", template)
+            template = not_pattern.sub(r"\1", template)
+        template = template.replace("{{" + key + "}}", value)
+    return template
+
+
+def write_context_file(output_dir, model_name, template, compiled_sql, sample_rows, source_sql, lineage="", existing_tests=""):
     """Write the full audit context to a file so cursor-agent can read it."""
     ctx_dir = os.path.join(output_dir, ".context")
     os.makedirs(ctx_dir, exist_ok=True)
 
-    # Substitute compiled_sql first to avoid the compiled SQL accidentally
-    # containing the {{sample_rows}} placeholder.
-    content = template.replace("{{compiled_sql}}", compiled_sql)
-    content = content.replace("{{sample_rows}}", sample_rows)
+    content = render_template(template, {
+        "compiled_sql": compiled_sql,
+        "sample_rows": sample_rows,
+        "existing_tests": existing_tests,
+        "lineage": lineage,
+        "data_profile": "",
+    })
     content += f"\n\nSource SQL:\n{source_sql}"
 
     ctx_path = os.path.join(ctx_dir, f"{model_name}__context.md")
@@ -375,8 +451,13 @@ def main(paths, llms, root, prompt, output_dir, limit, synthesis_model, concurre
         sample_rows = get_sample_rows(name, root, limit)
         with open(filepath) as f:
             source_sql = f.read()
+        click.echo(f"  Gathering lineage for {name}...")
+        lineage = get_lineage(name, root)
+        click.echo(f"  Scanning tests for {name}...")
+        existing_tests = get_existing_tests(name, root)
         context_paths[name] = write_context_file(
             output_dir, name, template, compiled_sql, sample_rows, source_sql,
+            lineage=lineage, existing_tests=existing_tests,
         )
 
     click.echo(f"\nAll models compiled. Launching {total} audit(s)...\n")
diff --git a/dbt/dbt_deep_analysis.md b/dbt/dbt_deep_analysis.md
index 397ca3d..fbc10d4 100644
--- a/dbt/dbt_deep_analysis.md
+++ b/dbt/dbt_deep_analysis.md
@@ -1,7 +1,98 @@
-You have access to a duckdb database. Interrogate the database to understand the schema and data, then cross-reference with this dbt model. Check for: data quality issues, join correctness, missing filters, column type mismatches, and potential improvements. Run queries to validate assumptions.
+You have access to a duckdb database. You are auditing a dbt model for data quality, correctness, and best practices. Interrogate the database to validate every claim you make — do not speculate without running a query first.
 
-Here is the compiled SQL:
+## Audit checklist
+
+Work through each section. For every finding, run a query to confirm it.
+
+### 1. Schema & types
+- Are column types appropriate (e.g. dates stored as DATE not VARCHAR, monetary values as DECIMAL not FLOAT)?
+- Are there implicit casts in joins or WHERE clauses that could silently drop rows or change values?
+- Do any columns contain mixed types or unexpected NULLs?
+
+### 2. Join correctness
+- Is every join relationship correct (1:1, 1:N, M:N)? Run a query: does the join **fan out** (produce more rows than the driving table)?
+- Are there orphaned rows (LEFT JOIN misses)? What fraction of rows have NULL foreign keys after the join?
+- Are join keys unique on the side that should be unique? Query `COUNT(*) vs COUNT(DISTINCT key)`.
+
+### 3. Filters & business logic
+- Are there WHERE / HAVING filters that could silently exclude valid records (e.g. filtering on a column that is sometimes NULL)?
+- Is there business logic (CASE statements, date arithmetic, aggregations) that could produce wrong results on edge cases?
+- Are date boundaries inclusive/exclusive as intended?
+
+### 4. Grain & uniqueness
+- What is the intended grain of this model? Verify with `COUNT(*) vs COUNT(DISTINCT <grain_key>)`.
+- Could the model produce duplicate rows under any upstream data condition?
+
+### 5. Data quality
+- What percentage of each column is NULL? Flag any column where the NULL rate is suspicious.
+- Are there unexpected duplicate values, negative numbers, future dates, or empty strings where there shouldn't be?
+- Do value distributions look reasonable (run MIN, MAX, AVG, percentiles for numeric columns)?
+
+### 6. Performance & best practices
+- Are there SELECT * or unnecessary columns being carried through?
+- Could CTEs be simplified or combined?
+- Are there window functions that could be replaced with simpler aggregations, or vice versa?
+- Is the model incremental where it should be, or full-refresh where incremental would be better?
+
+### 7. Test coverage gaps
+{{#if existing_tests}}
+The following dbt tests are already defined for this model:
+{{existing_tests}}
+
+Identify what is NOT covered by existing tests. Focus recommendations on gaps.
+{{/if}}
+{{^if existing_tests}}
+No dbt tests were found for this model. Recommend the most important tests to add.
+{{/if}}
+
+### 8. Upstream dependency risks
+{{#if lineage}}
+Model lineage (immediate upstream/downstream):
+{{lineage}}
+
+Consider: if an upstream model delivers late, delivers duplicates, or changes its grain, how does this model behave? Are there defensive checks?
+{{/if}}
+
+## Context
+
+### Compiled SQL
 {{compiled_sql}}
 
-Here are the first 20 rows returned by this model:
+### Sample rows
 {{sample_rows}}
+
+### Data profile
+{{#if data_profile}}
+{{data_profile}}
+{{/if}}
+
+## Output format
+
+Structure your report as follows:
+
+```
+## Executive summary
+(2-3 sentences: overall health, most critical finding)
+
+## Critical findings
+(Issues that could produce wrong numbers in production)
+
+| # | Finding | Severity | Evidence query | Affected columns |
+|---|---------|----------|---------------|-----------------|
+| 1 | ...     | ...      | ...           | ...             |
+
+### Finding 1: <title>
+**Query:** <the SQL you ran>
+**Result:** <what you found>
+**Impact:** <what goes wrong downstream>
+**Fix:** <specific SQL or config change>
+
+## Warnings
+(Issues that aren't wrong today but are fragile)
+
+## Recommendations
+(Best-practice improvements, ordered by impact)
+
+## Suggested dbt tests
+(Specific test YAML snippets to add)
+```

From 0c6912ba7759ec102b27a51225f5594e413f80f7 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 22:46:45 +0000
Subject: [PATCH 22/26] Clean up dead code in dbt_analyse.py

- Remove unused `import json`
- Remove stub `get_data_profile` that always returned empty string
- Simplify redundant glob patterns in get_existing_tests (*.yml already
  covers schema.yml, _schema.yml, *_models.yml)

https://claude.ai/code/session_01RHSUYqsWy6xLfAC9tFTy66
---
 dbt/dbt_analyse.py | 33 +--------------------------------
 1 file changed, 1 insertion(+), 32 deletions(-)

diff --git a/dbt/dbt_analyse.py b/dbt/dbt_analyse.py
index 91ba2e8..f065d22 100755
--- a/dbt/dbt_analyse.py
+++ b/dbt/dbt_analyse.py
@@ -12,7 +12,6 @@
                    --filepath <source_sql_path> --prompt <prompt_template_path>
 """
 
-import json
 import subprocess
 import sys
 import glob
@@ -57,17 +56,7 @@ def get_lineage(model, root):
 
 def get_existing_tests(model, root):
     """Extract test definitions for a model from schema.yml files."""
-    schema_files = glob.glob(os.path.join(root, "**", "schema.yml"), recursive=True)
-    schema_files += glob.glob(os.path.join(root, "**", "_schema.yml"), recursive=True)
-    schema_files += glob.glob(os.path.join(root, "**", f"*_models.yml"), recursive=True)
-    schema_files += glob.glob(os.path.join(root, "**", f"*.yml"), recursive=True)
-    # Deduplicate while preserving order
-    seen = set()
-    unique_files = []
-    for f in schema_files:
-        if f not in seen:
-            seen.add(f)
-            unique_files.append(f)
+    unique_files = glob.glob(os.path.join(root, "**", "*.yml"), recursive=True)
 
     tests = []
     for schema_path in unique_files:
@@ -97,26 +86,6 @@ def get_existing_tests(model, root):
     return "\n".join(tests) if tests else ""
 
 
-def get_data_profile(model, root):
-    """Run a profiling query via dbt show to get column stats."""
-    # Use an inline query that profiles the model's output
-    profile_sql = f"""
-    {{% set cols = adapter.get_columns_in_relation(ref('{model}')) %}}
-    SELECT
-      {{% for col in cols %}}
-      '{{{ col.name }}}' AS column_name_{{{{ loop.index }}}},
-      COUNT(*) AS total_rows_{{{{ loop.index }}}},
-      COUNT("{{{ col.name }}}") AS non_null_{{{{ loop.index }}}},
-      COUNT(DISTINCT "{{{ col.name }}}") AS distinct_{{{{ loop.index }}}}
-      {{% if not loop.last %}},{{% endif %}}
-      {{% endfor %}}
-    FROM {{{{ ref('{model}') }}}}
-    """
-    # Simpler approach: ask dbt to show the model with a higher limit and
-    # compute stats from sample rows in the prompt. The LLM has database
-    # access and can run profiling queries itself. We just nudge it.
-    return ""
-
 
 def render_template(template, replacements):
     """Replace template placeholders, handling conditional {{#if}}/{{^if}} blocks."""

From 9eb9e439a3baee1dd9d751c49a814fafd80f0abc Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 22:56:23 +0000
Subject: [PATCH 23/26] Split dbt nvim keymaps into their own file

Move all dbt helpers and keymaps from keymaps.lua into a dedicated
config/dbt.lua, loaded via require("config.dbt"). Keeps keymaps.lua
focused on general-purpose bindings.

https://claude.ai/code/session_01RHSUYqsWy6xLfAC9tFTy66
---
 nvim/lua/config/dbt.lua     | 409 ++++++++++++++++++++++++++++++++++++
 nvim/lua/config/keymaps.lua | 409 +-----------------------------------
 2 files changed, 411 insertions(+), 407 deletions(-)
 create mode 100644 nvim/lua/config/dbt.lua

diff --git a/nvim/lua/config/dbt.lua b/nvim/lua/config/dbt.lua
new file mode 100644
index 0000000..962891b
--- /dev/null
+++ b/nvim/lua/config/dbt.lua
@@ -0,0 +1,409 @@
+-- dbt keymaps and helpers
+-- Loaded from keymaps.lua
+
+-- Extract model name from current file path (e.g., models/staging/stg_orders.sql -> stg_orders)
+local function dbt_model_name()
+  local filepath = vim.fn.expand("%:t:r")
+  if filepath == "" then
+    vim.notify("No file open", vim.log.levels.WARN)
+    return nil
+  end
+  return filepath
+end
+
+-- Format the current SQL file, then send a dbt command to the terminal
+local function dbt_cmd(cmd_template)
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+
+  -- Format with conform (sqlfmt), then save
+  require("conform").format({ async = false, lsp_fallback = true })
+  vim.cmd("write")
+
+  -- Build the command
+  local cmd = string.format(cmd_template, model)
+
+  -- Send to toggleterm (terminal 1), then return focus to the code window
+  local prev_win = vim.api.nvim_get_current_win()
+  local term = require("toggleterm.terminal").get(1)
+  if not term then
+    term = require("toggleterm.terminal").Terminal:new({ id = 1 })
+  end
+  if not term:is_open() then
+    term:toggle()
+  end
+  term:send(cmd)
+  vim.api.nvim_set_current_win(prev_win)
+end
+
+-- Find the project root (directory containing dbt_project.yml)
+local function dbt_project_root()
+  local path = vim.fn.findfile("dbt_project.yml", ".;")
+  if path == "" then
+    return nil
+  end
+  return vim.fn.fnamemodify(path, ":p:h")
+end
+
+-- Send a raw command string to toggleterm (used by picker actions)
+local function dbt_cmd_raw(cmd)
+  local term = require("toggleterm.terminal").get(1)
+  if not term then
+    term = require("toggleterm.terminal").Terminal:new({ id = 1 })
+  end
+  if not term:is_open() then
+    term:toggle()
+  end
+  term:send(cmd)
+end
+
+-- Read a prompt template and substitute {{key}} placeholders
+local function dbt_load_prompt(name, vars)
+  local prompt_dir = vim.fn.stdpath("config") .. "/dbt/"
+  local path = prompt_dir .. name .. ".md"
+  local lines = vim.fn.readfile(path)
+  if #lines == 0 then
+    vim.notify("Prompt not found: " .. path, vim.log.levels.ERROR)
+    return nil
+  end
+  local prompt = table.concat(lines, "\n")
+  for key, value in pairs(vars or {}) do
+    prompt = prompt:gsub("{{" .. key .. "}}", value)
+  end
+  return prompt
+end
+
+-- Jump to model under cursor from {{ ref('model_name') }} or {{ source('src', 'table') }}
+vim.keymap.set("n", "<leader>dg", function()
+  local line = vim.api.nvim_get_current_line()
+  local col = vim.api.nvim_win_get_cursor(0)[2] + 1
+
+  -- Try to find ref('model') or ref("model") around cursor
+  local ref_model = nil
+  for start_pos, name, end_pos in line:gmatch("()ref%(['\"]([^'\"]+)['\"]%)()" ) do
+    if col >= start_pos and col <= end_pos then
+      ref_model = name
+      break
+    end
+  end
+
+  -- Try source('source_name', 'table_name') if no ref found
+  local source_name, source_table = nil, nil
+  if not ref_model then
+    for start_pos, src, tbl, end_pos in line:gmatch("()source%(['\"]([^'\"]+)['\"]%s*,%s*['\"]([^'\"]+)['\"]%)()" ) do
+      if col >= start_pos and col <= end_pos then
+        source_name, source_table = src, tbl
+        break
+      end
+    end
+  end
+
+  if not ref_model and not source_table then
+    vim.notify("No ref() or source() under cursor", vim.log.levels.WARN)
+    return
+  end
+
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+
+  -- Search for the model file
+  local search_name = ref_model or source_table
+  local matches = vim.fn.globpath(root, "**/" .. search_name .. ".sql", false, true)
+  if #matches == 0 then
+    -- Also try .yml for source definitions
+    matches = vim.fn.globpath(root, "**/" .. search_name .. ".yml", false, true)
+  end
+
+  if #matches == 1 then
+    vim.cmd.edit(matches[1])
+  elseif #matches > 1 then
+    vim.ui.select(matches, { prompt = "Multiple matches:" }, function(choice)
+      if choice then
+        vim.cmd.edit(choice)
+      end
+    end)
+  else
+    vim.notify("No file found for: " .. search_name, vim.log.levels.WARN)
+  end
+end, { desc = "[D]bt [G]o to ref/source" })
+
+-- Fuzzy model picker — select a model then choose an action
+vim.keymap.set("n", "<leader>df", function()
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+
+  local actions = require("telescope.actions")
+  local action_state = require("telescope.actions.state")
+  require("telescope.builtin").find_files({
+    prompt_title = "dbt model (enter=open, C-r=run, C-b=build, C-t=test)",
+    cwd = root,
+    search_dirs = { "models" },
+    find_command = { "fd", "-e", "sql" },
+    attach_mappings = function(prompt_bufnr, map)
+      local function get_model_name()
+        local entry = action_state.get_selected_entry()
+        if entry then
+          return entry[1]:match("([^/]+)%.sql$")
+        end
+      end
+      map("i", "<C-r>", function()
+        local name = get_model_name()
+        actions.close(prompt_bufnr)
+        if name then dbt_cmd_raw("uv run dbt run -s " .. name) end
+      end)
+      map("i", "<C-b>", function()
+        local name = get_model_name()
+        actions.close(prompt_bufnr)
+        if name then dbt_cmd_raw("uv run dbt build -s " .. name) end
+      end)
+      map("i", "<C-t>", function()
+        local name = get_model_name()
+        actions.close(prompt_bufnr)
+        if name then dbt_cmd_raw("uv run dbt test -s " .. name) end
+      end)
+      return true
+    end,
+  })
+end, { desc = "[D]bt [F]ind model" })
+
+-- Open the compiled SQL for the current model in a split
+vim.keymap.set("n", "<leader>do", function()
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+  local compiled = vim.fn.globpath(root, "target/compiled/**/" .. model .. ".sql", false, true)
+  if #compiled == 0 then
+    vim.notify("No compiled SQL found — run dbt compile first", vim.log.levels.WARN)
+    return
+  end
+  vim.cmd("vsplit " .. compiled[1])
+  vim.bo.readonly = true
+  vim.bo.modifiable = false
+end, { desc = "[D]bt [O]pen compiled SQL" })
+
+-- Grep across all models (search for column names, CTEs, etc.)
+vim.keymap.set("n", "<leader>d/", function()
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+  require("telescope.builtin").live_grep({ prompt_title = "dbt grep", cwd = root .. "/models" })
+end, { desc = "[D]bt search models" })
+
+vim.keymap.set("n", "<leader>dr", function()
+  dbt_cmd("uv run dbt run -s %s")
+end, { desc = "[D]bt [R]un current model" })
+
+vim.keymap.set("n", "<leader>dR", function()
+  dbt_cmd("uv run dbt run -s %s+")
+end, { desc = "[D]bt [R]un model + downstream" })
+
+vim.keymap.set("n", "<leader>db", function()
+  dbt_cmd("uv run dbt build -s %s")
+end, { desc = "[D]bt [B]uild current model (run + test)" })
+
+vim.keymap.set("n", "<leader>dc", function()
+  dbt_cmd("uv run dbt compile -s %s")
+end, { desc = "[D]bt [C]ompile current model" })
+
+vim.keymap.set("n", "<leader>dt", function()
+  dbt_cmd("uv run dbt test -s %s")
+end, { desc = "[D]bt [T]est current model" })
+
+vim.keymap.set("n", "<leader>ds", function()
+  dbt_cmd("uv run dbt show -s %s")
+end, { desc = "[D]bt [S]how preview results" })
+
+-- Preview sample rows in a horizontal split
+vim.keymap.set("n", "<leader>dp", function()
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  vim.notify("Fetching preview for " .. model .. "...", vim.log.levels.INFO)
+  local cmd = { "uv", "run", "dbt", "show", "-s", model, "--limit", "20" }
+  local output = {}
+  vim.fn.jobstart(cmd, {
+    cwd = dbt_project_root(),
+    stdout_buffered = true,
+    stderr_buffered = true,
+    on_stdout = function(_, data)
+      if data then
+        vim.list_extend(output, data)
+      end
+    end,
+    on_exit = function(_, exit_code)
+      vim.schedule(function()
+        if exit_code ~= 0 then
+          vim.notify("dbt show failed (exit " .. exit_code .. ")", vim.log.levels.ERROR)
+          return
+        end
+        -- Trim trailing empty lines
+        while #output > 0 and output[#output] == "" do
+          table.remove(output)
+        end
+        if #output == 0 then
+          vim.notify("No rows returned", vim.log.levels.WARN)
+          return
+        end
+        -- Open a scratch buffer in a horizontal split
+        vim.cmd("botright new")
+        local buf = vim.api.nvim_get_current_buf()
+        vim.bo[buf].buftype = "nofile"
+        vim.bo[buf].bufhidden = "wipe"
+        vim.bo[buf].swapfile = false
+        vim.bo[buf].filetype = "sql"
+        vim.api.nvim_buf_set_name(buf, "dbt-preview://" .. model)
+        vim.api.nvim_buf_set_lines(buf, 0, -1, false, output)
+        vim.bo[buf].modifiable = false
+        -- Resize to fit content (max 20 lines)
+        local height = math.min(#output, 20)
+        vim.api.nvim_win_set_height(0, height)
+        vim.keymap.set("n", "q", "<cmd>close<cr>", { buffer = buf, silent = true })
+      end)
+    end,
+  })
+end, { desc = "[D]bt [P]review model rows" })
+
+-- Show model output as CSV piped into visidata
+vim.keymap.set("n", "<leader>dv", function()
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+  require("conform").format({ async = false, lsp_fallback = true })
+  vim.cmd("write")
+  local prev_win = vim.api.nvim_get_current_win()
+  local json_to_csv = [[python3 -c "
+import sys, json, csv
+raw = sys.stdin.read()
+for line in raw.splitlines():
+    try:
+        obj = json.loads(line)
+    except json.JSONDecodeError:
+        continue
+    preview = None
+    if 'data' in obj and 'preview' in obj['data']:
+        preview = obj['data']['preview']
+    elif 'results' in obj:
+        preview = obj['results'][0].get('preview')
+    elif 'preview' in obj:
+        preview = obj['preview']
+    if preview:
+        if isinstance(preview, str):
+            preview = json.loads(preview)
+        w = csv.DictWriter(sys.stdout, fieldnames=preview[0].keys())
+        w.writeheader()
+        w.writerows(preview)
+        break
+"]]
+  local cmd = "cd " .. root .. " && uv run dbt show -s " .. model .. " --limit 500 --output json --log-format json | " .. json_to_csv .. " | vd -f csv"
+  require("toggleterm.terminal").Terminal
+    :new({
+      cmd = cmd,
+      close_on_exit = true,
+      on_exit = function()
+        vim.schedule(function()
+          if vim.api.nvim_win_is_valid(prev_win) then
+            vim.api.nvim_set_current_win(prev_win)
+          end
+        end)
+      end,
+    })
+    :toggle()
+end, { desc = "[D]bt [V]isidata preview" })
+
+-- Run cursor-agent on current model (quick analysis with sonnet)
+vim.keymap.set("n", "<leader>da", function()
+  local filepath = vim.fn.expand("%:p")
+  if filepath == "" then
+    vim.notify("No file open", vim.log.levels.WARN)
+    return
+  end
+  local prompt = dbt_load_prompt("dbt_quick_analysis", {})
+  if not prompt then
+    return
+  end
+  local bufnr = vim.api.nvim_get_current_buf()
+
+  vim.notify("Running quick analysis...", vim.log.levels.INFO)
+
+  local file_content = table.concat(vim.fn.readfile(filepath), "\n")
+  local full_prompt = prompt .. "\n\nFile: " .. filepath .. "\n```sql\n" .. file_content .. "\n```"
+  local cmd = { "cursor-agent", "--print", "--model", "sonnet-4.6", full_prompt }
+  local output = {}
+  vim.fn.jobstart(cmd, {
+    stdout_buffered = true,
+    on_stdout = function(_, data)
+      if data then
+        output = data
+      end
+    end,
+    on_exit = function(_, exit_code)
+      vim.schedule(function()
+        if exit_code ~= 0 then
+          vim.notify("cursor-agent exited with code " .. exit_code, vim.log.levels.ERROR)
+          return
+        end
+        -- Remove trailing empty strings from jobstart output
+        while #output > 0 and output[#output] == "" do
+          table.remove(output)
+        end
+        if #output == 0 then
+          vim.notify("No output from cursor-agent", vim.log.levels.WARN)
+          return
+        end
+        vim.api.nvim_buf_set_lines(bufnr, 0, -1, false, output)
+        vim.notify("Inline comments added — review and :w to save, or :u to undo", vim.log.levels.INFO)
+      end)
+    end,
+  })
+end, { desc = "[D]bt [A]nalyse model (quick)" })
+
+-- Open interactive cursor-agent session in a new tmux window with compiled SQL + sample rows as context
+vim.keymap.set("n", "<leader>dA", function()
+  local filepath = vim.fn.expand("%:p")
+  if filepath == "" then
+    vim.notify("No file open", vim.log.levels.WARN)
+    return
+  end
+  local model = dbt_model_name()
+  if not model then
+    return
+  end
+  local root = dbt_project_root()
+  if not root then
+    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
+    return
+  end
+
+  -- Open a new tmux window running the standalone dbt_analyse.py script
+  local prompt_path = vim.fn.stdpath("config") .. "/dbt/dbt_deep_analysis.md"
+  local script_path = vim.fn.stdpath("config") .. "/dbt/dbt_analyse.py"
+  local shell_script = string.format(
+    [[cd %s && uv run python3 %s --model %s --root %s --filepath %s --prompt %s || (echo "Press enter to close..." && read)]],
+    root, script_path, model, root, filepath, prompt_path
+  )
+  vim.fn.jobstart({ "tmux", "new-window", "-n", "dbt:" .. model, shell_script }, { detach = true })
+  vim.notify("Opened interactive cursor-agent session in tmux window 'dbt:" .. model .. "'", vim.log.levels.INFO)
+end, { desc = "[D]bt [A]nalyse model (interactive)" })
diff --git a/nvim/lua/config/keymaps.lua b/nvim/lua/config/keymaps.lua
index 2e13c82..6dab4bf 100644
--- a/nvim/lua/config/keymaps.lua
+++ b/nvim/lua/config/keymaps.lua
@@ -52,410 +52,5 @@ vim.keymap.set("n", "<leader>wi", function()
   })
 end, { noremap = true, silent = true, desc = "[W]iki [I]nsert Link" })
 
--- dbt: extract model name from current file path (e.g., models/staging/stg_orders.sql -> stg_orders)
-local function dbt_model_name()
-  local filepath = vim.fn.expand("%:t:r")
-  if filepath == "" then
-    vim.notify("No file open", vim.log.levels.WARN)
-    return nil
-  end
-  return filepath
-end
-
--- dbt: format the current SQL file, then send a dbt command to the terminal
-local function dbt_cmd(cmd_template)
-  local model = dbt_model_name()
-  if not model then
-    return
-  end
-
-  -- Format with conform (sqlfmt), then save
-  require("conform").format({ async = false, lsp_fallback = true })
-  vim.cmd("write")
-
-  -- Build the command
-  local cmd = string.format(cmd_template, model)
-
-  -- Send to toggleterm (terminal 1), then return focus to the code window
-  local prev_win = vim.api.nvim_get_current_win()
-  local term = require("toggleterm.terminal").get(1)
-  if not term then
-    term = require("toggleterm.terminal").Terminal:new({ id = 1 })
-  end
-  if not term:is_open() then
-    term:toggle()
-  end
-  term:send(cmd)
-  vim.api.nvim_set_current_win(prev_win)
-end
-
--- dbt: find the project root (directory containing dbt_project.yml)
-local function dbt_project_root()
-  local path = vim.fn.findfile("dbt_project.yml", ".;")
-  if path == "" then
-    return nil
-  end
-  return vim.fn.fnamemodify(path, ":p:h")
-end
-
--- dbt: jump to model under cursor from {{ ref('model_name') }} or {{ source('src', 'table') }}
-vim.keymap.set("n", "<leader>dg", function()
-  local line = vim.api.nvim_get_current_line()
-  local col = vim.api.nvim_win_get_cursor(0)[2] + 1
-
-  -- Try to find ref('model') or ref("model") around cursor
-  local ref_model = nil
-  for start_pos, name, end_pos in line:gmatch("()ref%(['\"]([^'\"]+)['\"]%)()" ) do
-    if col >= start_pos and col <= end_pos then
-      ref_model = name
-      break
-    end
-  end
-
-  -- Try source('source_name', 'table_name') if no ref found
-  local source_name, source_table = nil, nil
-  if not ref_model then
-    for start_pos, src, tbl, end_pos in line:gmatch("()source%(['\"]([^'\"]+)['\"]%s*,%s*['\"]([^'\"]+)['\"]%)()" ) do
-      if col >= start_pos and col <= end_pos then
-        source_name, source_table = src, tbl
-        break
-      end
-    end
-  end
-
-  if not ref_model and not source_table then
-    vim.notify("No ref() or source() under cursor", vim.log.levels.WARN)
-    return
-  end
-
-  local root = dbt_project_root()
-  if not root then
-    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
-    return
-  end
-
-  -- Search for the model file
-  local search_name = ref_model or source_table
-  local matches = vim.fn.globpath(root, "**/" .. search_name .. ".sql", false, true)
-  if #matches == 0 then
-    -- Also try .yml for source definitions
-    matches = vim.fn.globpath(root, "**/" .. search_name .. ".yml", false, true)
-  end
-
-  if #matches == 1 then
-    vim.cmd.edit(matches[1])
-  elseif #matches > 1 then
-    vim.ui.select(matches, { prompt = "Multiple matches:" }, function(choice)
-      if choice then
-        vim.cmd.edit(choice)
-      end
-    end)
-  else
-    vim.notify("No file found for: " .. search_name, vim.log.levels.WARN)
-  end
-end, { desc = "[D]bt [G]o to ref/source" })
-
--- dbt: send a raw command string to toggleterm (used by picker actions)
-local function dbt_cmd_raw(cmd)
-  local term = require("toggleterm.terminal").get(1)
-  if not term then
-    term = require("toggleterm.terminal").Terminal:new({ id = 1 })
-  end
-  if not term:is_open() then
-    term:toggle()
-  end
-  term:send(cmd)
-end
-
--- dbt: fuzzy model picker — select a model then choose an action
-vim.keymap.set("n", "<leader>df", function()
-  local root = dbt_project_root()
-  if not root then
-    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
-    return
-  end
-
-  local actions = require("telescope.actions")
-  local action_state = require("telescope.actions.state")
-  require("telescope.builtin").find_files({
-    prompt_title = "dbt model (enter=open, C-r=run, C-b=build, C-t=test)",
-    cwd = root,
-    search_dirs = { "models" },
-    find_command = { "fd", "-e", "sql" },
-    attach_mappings = function(prompt_bufnr, map)
-      local function get_model_name()
-        local entry = action_state.get_selected_entry()
-        if entry then
-          return entry[1]:match("([^/]+)%.sql$")
-        end
-      end
-      map("i", "<C-r>", function()
-        local name = get_model_name()
-        actions.close(prompt_bufnr)
-        if name then dbt_cmd_raw("uv run dbt run -s " .. name) end
-      end)
-      map("i", "<C-b>", function()
-        local name = get_model_name()
-        actions.close(prompt_bufnr)
-        if name then dbt_cmd_raw("uv run dbt build -s " .. name) end
-      end)
-      map("i", "<C-t>", function()
-        local name = get_model_name()
-        actions.close(prompt_bufnr)
-        if name then dbt_cmd_raw("uv run dbt test -s " .. name) end
-      end)
-      return true
-    end,
-  })
-end, { desc = "[D]bt [F]ind model" })
-
--- dbt: open the compiled SQL for the current model in a split
-vim.keymap.set("n", "<leader>do", function()
-  local model = dbt_model_name()
-  if not model then
-    return
-  end
-  local root = dbt_project_root()
-  if not root then
-    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
-    return
-  end
-  local compiled = vim.fn.globpath(root, "target/compiled/**/" .. model .. ".sql", false, true)
-  if #compiled == 0 then
-    vim.notify("No compiled SQL found — run dbt compile first", vim.log.levels.WARN)
-    return
-  end
-  vim.cmd("vsplit " .. compiled[1])
-  vim.bo.readonly = true
-  vim.bo.modifiable = false
-end, { desc = "[D]bt [O]pen compiled SQL" })
-
--- dbt: grep across all models (search for column names, CTEs, etc.)
-vim.keymap.set("n", "<leader>d/", function()
-  local root = dbt_project_root()
-  if not root then
-    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
-    return
-  end
-  require("telescope.builtin").live_grep({ prompt_title = "dbt grep", cwd = root .. "/models" })
-end, { desc = "[D]bt search models" })
-
-vim.keymap.set("n", "<leader>dr", function()
-  dbt_cmd("uv run dbt run -s %s")
-end, { desc = "[D]bt [R]un current model" })
-
-vim.keymap.set("n", "<leader>dR", function()
-  dbt_cmd("uv run dbt run -s %s+")
-end, { desc = "[D]bt [R]un model + downstream" })
-
-vim.keymap.set("n", "<leader>db", function()
-  dbt_cmd("uv run dbt build -s %s")
-end, { desc = "[D]bt [B]uild current model (run + test)" })
-
-vim.keymap.set("n", "<leader>dc", function()
-  dbt_cmd("uv run dbt compile -s %s")
-end, { desc = "[D]bt [C]ompile current model" })
-
-vim.keymap.set("n", "<leader>dt", function()
-  dbt_cmd("uv run dbt test -s %s")
-end, { desc = "[D]bt [T]est current model" })
-
-vim.keymap.set("n", "<leader>ds", function()
-  dbt_cmd("uv run dbt show -s %s")
-end, { desc = "[D]bt [S]how preview results" })
-
--- dbt: preview sample rows in a horizontal split
-vim.keymap.set("n", "<leader>dp", function()
-  local model = dbt_model_name()
-  if not model then
-    return
-  end
-  vim.notify("Fetching preview for " .. model .. "...", vim.log.levels.INFO)
-  local cmd = { "uv", "run", "dbt", "show", "-s", model, "--limit", "20" }
-  local output = {}
-  vim.fn.jobstart(cmd, {
-    cwd = dbt_project_root(),
-    stdout_buffered = true,
-    stderr_buffered = true,
-    on_stdout = function(_, data)
-      if data then
-        vim.list_extend(output, data)
-      end
-    end,
-    on_exit = function(_, exit_code)
-      vim.schedule(function()
-        if exit_code ~= 0 then
-          vim.notify("dbt show failed (exit " .. exit_code .. ")", vim.log.levels.ERROR)
-          return
-        end
-        -- Trim trailing empty lines
-        while #output > 0 and output[#output] == "" do
-          table.remove(output)
-        end
-        if #output == 0 then
-          vim.notify("No rows returned", vim.log.levels.WARN)
-          return
-        end
-        -- Open a scratch buffer in a horizontal split
-        vim.cmd("botright new")
-        local buf = vim.api.nvim_get_current_buf()
-        vim.bo[buf].buftype = "nofile"
-        vim.bo[buf].bufhidden = "wipe"
-        vim.bo[buf].swapfile = false
-        vim.bo[buf].filetype = "sql"
-        vim.api.nvim_buf_set_name(buf, "dbt-preview://" .. model)
-        vim.api.nvim_buf_set_lines(buf, 0, -1, false, output)
-        vim.bo[buf].modifiable = false
-        -- Resize to fit content (max 20 lines)
-        local height = math.min(#output, 20)
-        vim.api.nvim_win_set_height(0, height)
-        vim.keymap.set("n", "q", "<cmd>close<cr>", { buffer = buf, silent = true })
-      end)
-    end,
-  })
-end, { desc = "[D]bt [P]review model rows" })
-
--- dbt: show model output as CSV piped into visidata
-vim.keymap.set("n", "<leader>dv", function()
-  local model = dbt_model_name()
-  if not model then
-    return
-  end
-  local root = dbt_project_root()
-  if not root then
-    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
-    return
-  end
-  require("conform").format({ async = false, lsp_fallback = true })
-  vim.cmd("write")
-  local prev_win = vim.api.nvim_get_current_win()
-  local json_to_csv = [[python3 -c "
-import sys, json, csv
-raw = sys.stdin.read()
-for line in raw.splitlines():
-    try:
-        obj = json.loads(line)
-    except json.JSONDecodeError:
-        continue
-    preview = None
-    if 'data' in obj and 'preview' in obj['data']:
-        preview = obj['data']['preview']
-    elif 'results' in obj:
-        preview = obj['results'][0].get('preview')
-    elif 'preview' in obj:
-        preview = obj['preview']
-    if preview:
-        if isinstance(preview, str):
-            preview = json.loads(preview)
-        w = csv.DictWriter(sys.stdout, fieldnames=preview[0].keys())
-        w.writeheader()
-        w.writerows(preview)
-        break
-"]]
-  local cmd = "cd " .. root .. " && uv run dbt show -s " .. model .. " --limit 500 --output json --log-format json | " .. json_to_csv .. " | vd -f csv"
-  require("toggleterm.terminal").Terminal
-    :new({
-      cmd = cmd,
-      close_on_exit = true,
-      on_exit = function()
-        vim.schedule(function()
-          if vim.api.nvim_win_is_valid(prev_win) then
-            vim.api.nvim_set_current_win(prev_win)
-          end
-        end)
-      end,
-    })
-    :toggle()
-end, { desc = "[D]bt [V]isidata preview" })
-
--- dbt: read a prompt template from the llm/ directory and substitute {{key}} placeholders
-local function dbt_load_prompt(name, vars)
-  local prompt_dir = vim.fn.stdpath("config") .. "/dbt/"
-  local path = prompt_dir .. name .. ".md"
-  local lines = vim.fn.readfile(path)
-  if #lines == 0 then
-    vim.notify("Prompt not found: " .. path, vim.log.levels.ERROR)
-    return nil
-  end
-  local prompt = table.concat(lines, "\n")
-  for key, value in pairs(vars or {}) do
-    prompt = prompt:gsub("{{" .. key .. "}}", value)
-  end
-  return prompt
-end
-
--- dbt: run cursor-agent on current model (quick analysis with sonnet)
--- Replaces buffer contents with the file + inline SQL comments
-vim.keymap.set("n", "<leader>da", function()
-  local filepath = vim.fn.expand("%:p")
-  if filepath == "" then
-    vim.notify("No file open", vim.log.levels.WARN)
-    return
-  end
-  local prompt = dbt_load_prompt("dbt_quick_analysis", {})
-  if not prompt then
-    return
-  end
-  local bufnr = vim.api.nvim_get_current_buf()
-
-  vim.notify("Running quick analysis...", vim.log.levels.INFO)
-
-  local file_content = table.concat(vim.fn.readfile(filepath), "\n")
-  local full_prompt = prompt .. "\n\nFile: " .. filepath .. "\n```sql\n" .. file_content .. "\n```"
-  local cmd = { "cursor-agent", "--print", "--model", "sonnet-4.6", full_prompt }
-  local output = {}
-  vim.fn.jobstart(cmd, {
-    stdout_buffered = true,
-    on_stdout = function(_, data)
-      if data then
-        output = data
-      end
-    end,
-    on_exit = function(_, exit_code)
-      vim.schedule(function()
-        if exit_code ~= 0 then
-          vim.notify("cursor-agent exited with code " .. exit_code, vim.log.levels.ERROR)
-          return
-        end
-        -- Remove trailing empty strings from jobstart output
-        while #output > 0 and output[#output] == "" do
-          table.remove(output)
-        end
-        if #output == 0 then
-          vim.notify("No output from cursor-agent", vim.log.levels.WARN)
-          return
-        end
-        vim.api.nvim_buf_set_lines(bufnr, 0, -1, false, output)
-        vim.notify("Inline comments added — review and :w to save, or :u to undo", vim.log.levels.INFO)
-      end)
-    end,
-  })
-end, { desc = "[D]bt [A]nalyse model (quick)" })
-
--- dbt: open interactive cursor-agent session in a new tmux window with compiled SQL + sample rows as context
-vim.keymap.set("n", "<leader>dA", function()
-  local filepath = vim.fn.expand("%:p")
-  if filepath == "" then
-    vim.notify("No file open", vim.log.levels.WARN)
-    return
-  end
-  local model = dbt_model_name()
-  if not model then
-    return
-  end
-  local root = dbt_project_root()
-  if not root then
-    vim.notify("No dbt_project.yml found", vim.log.levels.WARN)
-    return
-  end
-
-  -- Open a new tmux window running the standalone dbt_analyse.py script
-  local prompt_path = vim.fn.stdpath("config") .. "/dbt/dbt_deep_analysis.md"
-  local script_path = vim.fn.stdpath("config") .. "/dbt/dbt_analyse.py"
-  local shell_script = string.format(
-    [[cd %s && uv run python3 %s --model %s --root %s --filepath %s --prompt %s || (echo "Press enter to close..." && read)]],
-    root, script_path, model, root, filepath, prompt_path
-  )
-  vim.fn.jobstart({ "tmux", "new-window", "-n", "dbt:" .. model, shell_script }, { detach = true })
-  vim.notify("Opened interactive cursor-agent session in tmux window 'dbt:" .. model .. "'", vim.log.levels.INFO)
-end, { desc = "[D]bt [A]nalyse model (interactive)" })
+-- dbt keymaps (loaded from separate file)
+require("config.dbt")

From f31143b9bf2471c63d06e44818fa20a81d790377 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 8 Mar 2026 22:43:46 +0000
Subject: [PATCH 24/26] Add full spec for data-audit Python package

Covers artifact types (dbt models, notebooks, flat files, Quarto docs),
the PM orchestrator loop, structured findings model, LLM backend
abstraction, CLI/API design, and phased implementation plan.

https://claude.ai/code/session_019YdrjfrB6Lgu5QTZzrnXdb
---
 dbt/SPEC.md | 549 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 549 insertions(+)
 create mode 100644 dbt/SPEC.md

diff --git a/dbt/SPEC.md b/dbt/SPEC.md
new file mode 100644
index 0000000..cdd97d5
--- /dev/null
+++ b/dbt/SPEC.md
@@ -0,0 +1,549 @@
+# data-audit: specification
+
+A Python package for LLM-powered auditing of data artifacts — dbt models, SQL
+scripts, Jupyter notebooks, Quarto documents, and flat data files. Designed to
+be run by a single analyst or orchestrated by an autonomous "project manager"
+agent that can expand the scope of an audit based on initial findings.
+
+## Problem
+
+Data teams accumulate a mix of dbt models, ad-hoc notebooks, CSV/Parquet
+exports, and glue scripts. Quality issues hide at the seams: a notebook reads a
+stale CSV export instead of the dbt model, a model silently fans out on a join,
+a Quarto report hardcodes a date filter that drifts. Today's tooling audits
+each artifact type in isolation — if at all.
+
+## Goals
+
+1. Audit heterogeneous data artifacts (dbt SQL, notebooks, flat files) through
+   a unified interface.
+2. Support an autonomous orchestration loop where an LLM "project manager" can
+   review initial findings and spawn follow-up audits when warranted.
+3. Produce structured, machine-readable findings alongside human-readable
+   reports so that both people and agents can act on results.
+4. Be LLM-backend agnostic — work with direct API calls, local models, or CLI
+   wrappers.
+5. Ship as an installable Python package (`pip install data-audit`) with a CLI
+   entry point.
+
+## Non-goals
+
+- Replacing dbt test or Great Expectations for deterministic, rule-based
+  testing. This tool is for exploratory, LLM-driven analysis.
+- Real-time or CI-blocking checks. This is an offline review tool.
+- Supporting non-Python notebook kernels (R, Julia) in the first version.
+
+---
+
+## Artifact types
+
+### dbt models (.sql within a dbt project)
+
+This is the existing capability. Context gathering includes:
+
+- Compile the model (`dbt compile`)
+- Fetch sample rows (`dbt show --output json`)
+- Extract lineage — immediate parents and children (`dbt ls`)
+- Discover existing tests from `schema.yml` files
+- Read the source SQL
+
+Audit focus: join correctness, grain/uniqueness, filter logic, type mismatches,
+test coverage gaps, upstream dependency risks. See the existing
+`dbt_deep_analysis.md` prompt template for the full checklist.
+
+### dbt schema files (.yml)
+
+Context gathering:
+
+- Parse the YAML
+- Cross-reference with the models it describes (do all models have schema
+  entries? do all columns exist?)
+- Check test coverage across models
+
+Audit focus: missing descriptions, undocumented columns, missing or weak tests,
+inconsistent naming conventions, stale entries for deleted models.
+
+### Jupyter notebooks (.ipynb)
+
+Notebooks vary widely. An analyst might be:
+
+- Running SQL queries against a warehouse
+- Reading CSV/Parquet/Excel files from disk or S3
+- Pulling data from an API
+- Doing pandas/polars transformations
+- Producing charts or summary statistics
+
+Context gathering:
+
+- Extract all code cells and their outputs (where present)
+- Identify data sources: SQL connection strings, file paths
+  (`pd.read_csv(...)`, `pd.read_parquet(...)`), API calls
+- Extract imported libraries to understand the toolchain
+- Note cell execution order and whether outputs are stale (execution count
+  gaps, missing outputs)
+- If the notebook references dbt models or known tables, cross-reference with
+  the dbt project
+
+Audit focus:
+- **Data source hygiene**: is the notebook reading raw files that should come
+  from a managed source? Are file paths hardcoded or parameterized? Are
+  connection strings embedded in code?
+- **Reproducibility**: are cells ordered logically? Are there cells that depend
+  on execution side effects? Are random seeds set? Are outputs present and
+  consistent with the code?
+- **Transformation logic**: are there pandas/polars transformations that
+  duplicate or contradict dbt model logic? Could this logic be pushed into the
+  transformation layer?
+- **Data quality in-notebook**: are there silent drops (inner joins, dropna)
+  that could hide problems? Are filters reasonable? Are aggregations correct?
+- **Staleness**: do outputs reference dates/values that suggest the notebook
+  hasn't been re-run recently?
+
+### Quarto documents (.qmd)
+
+Similar to notebooks but with additional structure:
+
+- YAML front matter (params, output format, execution options)
+- Mixed prose and code chunks
+- Cross-references and callouts
+
+Context gathering:
+
+- Parse YAML front matter for parameters and execution options
+- Extract code chunks (```{python} blocks)
+- Identify data sources same as notebooks
+- Note `freeze: true` or other execution-control settings
+
+Audit focus: same as notebooks, plus parameter handling (are params used
+consistently? are defaults reasonable?), and whether `freeze` settings mean
+outputs could be stale.
+
+### Flat data files (.csv, .parquet, .json, .xlsx)
+
+Not code artifacts — these are data. An analyst might ask "audit this export"
+or "check this CSV for issues." The PM agent might also flag a file referenced
+by a notebook for closer inspection.
+
+Context gathering:
+
+- Read schema/column names and types
+- Compute basic profiling: row count, null rates, cardinality, min/max/mean
+  for numerics, sample values for categoricals, date ranges for timestamps
+- For Parquet: extract embedded schema metadata
+- For CSV: detect delimiter, encoding, quoting issues
+- If the file is referenced by a notebook or dbt model, note that relationship
+
+Audit focus:
+- **Schema issues**: mixed types in columns, inconsistent date formats,
+  encoding problems
+- **Completeness**: unexpected nulls, empty columns, truncated rows
+- **Distribution anomalies**: outliers, impossible values (negative ages,
+  future dates), suspicious cardinality (a "country" column with 3 values)
+- **Staleness**: if file metadata includes a timestamp, flag if it's old
+  relative to the project
+
+---
+
+## Architecture
+
+### Core data model
+
+```
+Finding
+  id: str                  # unique within an audit run
+  artifact: str            # path or identifier of the audited artifact
+  artifact_type: str       # "dbt_model", "notebook", "data_file", etc.
+  category: str            # "join_correctness", "data_quality", "reproducibility", etc.
+  severity: Severity       # critical | warning | info
+  title: str               # one-line summary
+  description: str         # full explanation
+  evidence: str | None     # query, code snippet, or data sample that demonstrates the issue
+  suggested_fix: str | None
+  downstream_impact: list[str]  # artifact IDs affected if this isn't fixed
+  metadata: dict           # arbitrary extra context (column names, line numbers, etc.)
+
+AuditResult
+  artifact: str
+  artifact_type: str
+  findings: list[Finding]
+  context_summary: str     # what context was gathered (for traceability)
+  llm_model: str           # which LLM produced this result
+  duration_seconds: float
+  raw_report: str          # the full markdown report as returned by the LLM
+
+AuditPlan
+  tasks: list[AuditTask]   # the full set of tasks (initial + follow-ups)
+
+AuditTask
+  artifact: str
+  artifact_type: str
+  prompt_template: str     # which prompt to use
+  reason: str              # why this task exists ("initial scan" or "follow-up: NULL propagation from stg_orders")
+  priority: int
+  parent_task_id: str | None  # if this is a follow-up, which task spawned it
+  status: pending | running | completed | failed
+```
+
+### Auditor interface
+
+Each artifact type has an auditor that implements:
+
+```python
+class Auditor(Protocol):
+    """Gathers context for an artifact and produces an LLM-ready prompt."""
+
+    artifact_type: str
+
+    def can_handle(self, path: Path) -> bool:
+        """Return True if this auditor knows how to handle the given path."""
+        ...
+
+    def gather_context(self, path: Path, config: AuditConfig) -> AuditContext:
+        """Read the artifact and its surroundings, return structured context."""
+        ...
+
+    def render_prompt(self, context: AuditContext, template: str) -> str:
+        """Fill the prompt template with gathered context."""
+        ...
+
+    def parse_findings(self, raw_response: str, context: AuditContext) -> list[Finding]:
+        """Extract structured findings from the LLM's raw response."""
+        ...
+```
+
+Concrete implementations:
+
+- `DbtModelAuditor` — wraps the existing compile/show/lineage/test-discovery logic
+- `DbtSchemaAuditor` — parses YAML, cross-references models
+- `NotebookAuditor` — extracts cells, identifies data sources, checks reproducibility
+- `QuartoAuditor` — extends NotebookAuditor with front-matter parsing
+- `DataFileAuditor` — profiles CSVs, Parquet, JSON, Excel files
+
+### LLM backend interface
+
+```python
+class LLMBackend(Protocol):
+    """Send a prompt to an LLM and get a response."""
+
+    def complete(self, prompt: str, system: str | None = None) -> str:
+        ...
+
+    def list_models(self) -> list[str]:
+        ...
+```
+
+Concrete implementations:
+
+- `CursorAgentBackend` — wraps the current `cursor-agent` CLI (preserves
+  existing workflow)
+- `AnthropicBackend` — direct Claude API calls via the Anthropic SDK
+- `LiteLLMBackend` — any model supported by litellm (OpenAI, local, etc.)
+
+### Orchestrator (the "project manager")
+
+The orchestrator is the key new capability. It operates in a loop:
+
+```
+1. Build initial AuditPlan from user-provided paths
+2. While there are pending tasks and budget remains:
+   a. Pick next task(s) by priority
+   b. Run auditor(s) in parallel → collect AuditResults
+   c. Pass results to PM agent with the question:
+      "Given these findings, should we investigate anything further?"
+   d. PM agent returns zero or more follow-up AuditTasks
+      (e.g., "audit downstream model X", "profile data file Y referenced
+       in notebook Z", "do a deeper join analysis on model W")
+   e. Add follow-up tasks to the plan
+3. Synthesize all AuditResults into a final report
+```
+
+**Budget controls** — the orchestrator enforces limits to prevent runaway
+analysis:
+
+- `max_depth`: how many rounds of follow-ups (default: 2)
+- `max_tasks`: total task cap across all rounds (default: 20)
+- `max_wall_clock`: overall time limit (default: 30 minutes)
+- `max_tokens`: approximate token budget across all LLM calls
+
+The PM agent prompt instructs it to be conservative — only spawn follow-up
+tasks when there's concrete evidence that a finding has broader implications,
+not speculatively.
+
+### Task graph
+
+Tasks form a tree (not a DAG — a follow-up task has exactly one parent). The
+graph is used for:
+
+- Tracing why an audit was run ("this file was audited because the PM agent
+  identified it as a downstream consumer of a model with a join fan-out")
+- Reporting: the final synthesis groups findings by their provenance
+- Budget tracking: depth = longest path from root to leaf
+
+### Prompt templates
+
+Ship as package data under `data_audit/prompts/`:
+
+```
+dbt_model_deep.md       # existing dbt_deep_analysis.md
+dbt_model_quick.md      # existing dbt_quick_analysis.md
+dbt_schema.md           # schema.yml audit
+notebook.md             # Jupyter/Quarto notebook audit
+data_file.md            # flat file profiling + audit
+orchestrator.md         # PM agent system prompt
+synthesis.md            # final report synthesis prompt
+```
+
+The existing Handlebars-style template engine (`{{var}}`, `{{#if var}}`,
+`{{^if var}}`) is retained. It's simple and sufficient.
+
+---
+
+## CLI
+
+```
+data-audit [OPTIONS] PATHS...
+```
+
+**Arguments:**
+
+- `PATHS` — files, directories, or globs. The tool auto-detects artifact type
+  by extension (`.sql` in a dbt project → dbt model, `.ipynb` → notebook,
+  `.csv`/`.parquet` → data file, etc.). Directories are walked recursively.
+
+**Options:**
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--llm` | (required, repeatable) | LLM model to use for audits |
+| `--backend` | `anthropic` | LLM backend: `anthropic`, `cursor-agent`, `litellm` |
+| `--dbt-root` | auto-detect | Path to dbt project root (for dbt artifacts) |
+| `--prompt` | built-in | Override the prompt template |
+| `--output-dir` | `./audit_reports` | Where to write reports |
+| `--output-format` | `markdown` | `markdown`, `json`, or `both` |
+| `--limit` | `20` | Row limit for dbt show / data file sampling |
+| `--synthesis-model` | same as `--llm` | LLM for synthesis step |
+| `--orchestrate` | `false` | Enable the PM orchestration loop |
+| `--max-depth` | `2` | Max follow-up rounds (requires `--orchestrate`) |
+| `--max-tasks` | `20` | Max total audit tasks (requires `--orchestrate`) |
+| `--concurrency` | `3` | Max parallel LLM calls |
+| `--timeout` | `900` | Per-task timeout in seconds |
+| `--verbose` | `false` | Show detailed progress |
+
+**Examples:**
+
+```bash
+# Audit a few dbt models (current behavior, just packaged)
+data-audit models/stg_orders.sql models/int_revenue.sql \
+  --llm claude-sonnet-4-6 --dbt-root .
+
+# Audit a notebook
+data-audit analysis/revenue_deep_dive.ipynb --llm claude-sonnet-4-6
+
+# Audit an export file
+data-audit exports/monthly_summary.csv --llm claude-sonnet-4-6
+
+# Audit an entire directory with PM orchestration
+data-audit models/ notebooks/ exports/ \
+  --llm claude-sonnet-4-6 \
+  --orchestrate --max-depth 2 --max-tasks 15
+
+# JSON output for programmatic consumption
+data-audit models/stg_orders.sql --llm claude-sonnet-4-6 --output-format json
+```
+
+---
+
+## Programmatic API
+
+```python
+from data_audit import audit, AuditConfig
+from data_audit.backends import AnthropicBackend
+
+config = AuditConfig(
+    llm="claude-sonnet-4-6",
+    backend=AnthropicBackend(),
+    concurrency=3,
+    orchestrate=True,
+    max_depth=2,
+)
+
+# Audit specific files
+results = audit(["models/stg_orders.sql", "analysis/deep_dive.ipynb"], config)
+
+for finding in results.all_findings():
+    print(f"[{finding.severity}] {finding.artifact}: {finding.title}")
+
+# Access the structured data
+results.to_json("audit_output.json")
+results.to_markdown("audit_report.md")
+```
+
+---
+
+## Package structure
+
+```
+data-audit/
+  pyproject.toml
+  src/data_audit/
+    __init__.py             # public API: audit(), AuditConfig
+    cli.py                  # click CLI entry point
+
+    # Core data model
+    models.py               # Finding, AuditResult, AuditTask, AuditPlan
+
+    # Auditors — one per artifact type
+    auditors/
+      __init__.py
+      base.py               # Auditor protocol
+      dbt_model.py          # compile, show, lineage, test discovery
+      dbt_schema.py         # schema.yml cross-referencing
+      notebook.py           # .ipynb extraction and analysis
+      quarto.py             # .qmd front-matter + code chunks
+      data_file.py          # CSV, Parquet, JSON, Excel profiling
+      registry.py           # maps file extensions/patterns → auditors
+
+    # Context gathering utilities
+    context/
+      __init__.py
+      dbt.py                # dbt compile/show/ls wrappers
+      notebook_parser.py    # cell extraction, data source detection
+      file_profiler.py      # column stats, null rates, distributions
+      template.py           # Handlebars-style template rendering
+
+    # LLM backends
+    backends/
+      __init__.py
+      base.py               # LLMBackend protocol
+      anthropic.py          # direct Anthropic API
+      cursor_agent.py       # subprocess wrapper for cursor-agent CLI
+      litellm.py            # litellm pass-through
+
+    # Orchestration
+    orchestrator/
+      __init__.py
+      pm_agent.py           # project manager LLM logic
+      task_graph.py         # task tree, budget tracking
+      executor.py           # parallel task execution
+      synthesizer.py        # final report generation
+
+    # Shipped prompt templates
+    prompts/
+      dbt_model_deep.md
+      dbt_model_quick.md
+      dbt_schema.md
+      notebook.md
+      quarto.md
+      data_file.md
+      orchestrator.md
+      synthesis.md
+```
+
+---
+
+## Migration path from current code
+
+The existing scripts (`dbt_batch_audit.py`, `dbt_analyse.py`) become thin
+wrappers or are replaced entirely by the package CLI. Mapping:
+
+| Current | Package equivalent |
+|---------|--------------------|
+| `dbt_batch_audit.py` | `data-audit models/*.sql --llm ... --dbt-root .` |
+| `dbt_analyse.py` (interactive tmux) | `data-audit --interactive models/stg_orders.sql` (or keep as a separate nvim-specific script that calls the package API) |
+| `dbt_deep_analysis.md` | `src/data_audit/prompts/dbt_model_deep.md` |
+| `dbt_quick_analysis.md` | `src/data_audit/prompts/dbt_model_quick.md` |
+| `render_template()` | `src/data_audit/context/template.py` |
+| `compile_model()`, `get_sample_rows()`, etc. | `src/data_audit/context/dbt.py` |
+| `run_audit()` | `src/data_audit/orchestrator/executor.py` |
+| `synthesize_reports()` | `src/data_audit/orchestrator/synthesizer.py` |
+| cursor-agent hardcoding | `src/data_audit/backends/cursor_agent.py` (one of N backends) |
+
+The nvim integration (`nvim/lua/config/dbt.lua`) continues to work — it just
+calls `data-audit` instead of the raw Python scripts.
+
+---
+
+## Dependencies
+
+**Required:**
+
+- `click` — CLI framework
+- `pyyaml` — YAML parsing (dbt schema files, Quarto front matter)
+
+**Optional (extras):**
+
+- `anthropic` — for the Anthropic backend (`pip install data-audit[anthropic]`)
+- `litellm` — for the litellm backend (`pip install data-audit[litellm]`)
+- `mdformat` — for markdown formatting of output reports
+- `nbformat` — for robust Jupyter notebook parsing (fallback: raw JSON parsing)
+- `pyarrow` or `polars` — for Parquet file profiling
+- `openpyxl` — for Excel file profiling
+
+---
+
+## Implementation phases
+
+### Phase 1: Extract and package
+
+Move the existing dbt model audit logic into the package structure. Get
+`data-audit models/*.sql --llm ... --dbt-root .` working identically to the
+current `dbt_batch_audit.py`. Ship the Auditor protocol, the dbt model auditor,
+the cursor-agent backend, and the existing prompt templates. No new
+capabilities — just packaging.
+
+### Phase 2: LLM backends + structured output
+
+Add the Anthropic and litellm backends. Implement the `Finding` data model and
+`parse_findings()` so that results are available as structured data (JSON
+output). This unblocks the orchestrator since the PM agent needs machine-readable
+findings to reason over.
+
+### Phase 3: Notebook and data file auditors
+
+Implement `NotebookAuditor`, `QuartoAuditor`, and `DataFileAuditor` with their
+context gatherers and prompt templates. At this point `data-audit` can audit
+any artifact type, but only in a flat batch (no orchestration loop).
+
+### Phase 4: Orchestrator
+
+Implement the PM agent loop: initial plan → audit → review findings → spawn
+follow-ups → synthesize. This is the most architecturally complex phase and
+benefits from phases 2-3 being stable first.
+
+### Phase 5: Cross-artifact analysis
+
+Teach the orchestrator to reason across artifact types: "this notebook reads
+the same table that dbt model X writes to — flag if findings in X affect the
+notebook's conclusions." This requires a lightweight project graph (dbt lineage
++ notebook data source detection + file references).
+
+---
+
+## Open questions
+
+1. **Package name**: `data-audit` is clean but generic. Alternatives:
+   `llm-data-audit`, `data-artifact-audit`, `dbt-audit-llm`. The scope beyond
+   dbt suggests avoiding `dbt-` as a prefix.
+
+2. **Structured finding extraction**: should the LLM be asked to return JSON
+   directly (fragile), or should we post-process the markdown response with a
+   second LLM call or regex-based parser? A hybrid approach (ask for markdown
+   with a machine-readable YAML block at the end) may be most robust.
+
+3. **Database access for data file profiling**: profiling flat files requires
+   reading them. For large files, should the tool spin up a temporary DuckDB
+   instance to run profiling queries, or rely on pandas/polars sampling? DuckDB
+   is likely the right answer since it handles CSV/Parquet/JSON natively and
+   the audit prompts already assume SQL-based evidence queries.
+
+4. **Interactive mode**: the current `dbt_analyse.py` launches an interactive
+   agent session in tmux. Should the package support this, or should it remain
+   a thin wrapper in the dotfiles that calls the package API? Leaning toward
+   the latter — interactive UX is editor-specific and doesn't belong in the
+   package.
+
+5. **Notebook output handling**: should the auditor include cell outputs
+   (which can be large — images, dataframes, tracebacks) in the LLM context?
+   Probably: include text outputs and truncated dataframe representations,
+   skip images unless the LLM supports vision, always note when outputs are
+   missing.

From 01a367ba5ffda662248d6fef5fabd9ad0ff462b2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 10 Mar 2026 01:44:59 +0000
Subject: [PATCH 25/26] Fix black formatting in dbt Python scripts

Merge master and reformat dbt_analyse.py and dbt_batch_audit.py to
pass the CI formatting checks.

https://claude.ai/code/session_01BPxJNKYipwwrx17jvyv6rM
---
 dbt/dbt_analyse.py     |  52 +++++++++++++-----
 dbt/dbt_batch_audit.py | 117 ++++++++++++++++++++++++++++-------------
 2 files changed, 118 insertions(+), 51 deletions(-)

diff --git a/dbt/dbt_analyse.py b/dbt/dbt_analyse.py
index f065d22..dd92fa3 100755
--- a/dbt/dbt_analyse.py
+++ b/dbt/dbt_analyse.py
@@ -42,13 +42,22 @@ def run(cmd, cwd=None, capture=False, check=True):
 def get_lineage(model, root):
     """Return a summary of immediate parents and children from dbt ls."""
     lines = []
-    for direction, selector in [("parents", f"+{model},1+{model}"), ("children", f"{model}+,{model}1+")]:
+    for direction, selector in [
+        ("parents", f"+{model},1+{model}"),
+        ("children", f"{model}+,{model}1+"),
+    ]:
         result = subprocess.run(
             ["uv", "run", "dbt", "ls", "-s", selector, "--output", "name", "--quiet"],
-            capture_output=True, text=True, cwd=root,
+            capture_output=True,
+            text=True,
+            cwd=root,
         )
         if result.returncode == 0:
-            names = [n.strip() for n in result.stdout.strip().splitlines() if n.strip() and n.strip() != model]
+            names = [
+                n.strip()
+                for n in result.stdout.strip().splitlines()
+                if n.strip() and n.strip() != model
+            ]
             if names:
                 lines.append(f"**{direction.title()}:** {', '.join(names)}")
     return "\n".join(lines) if lines else ""
@@ -86,7 +95,6 @@ def get_existing_tests(model, root):
     return "\n".join(tests) if tests else ""
 
 
-
 def render_template(template, replacements):
     """Replace template placeholders, handling conditional {{#if}}/{{^if}} blocks."""
     for key, value in replacements.items():
@@ -138,8 +146,18 @@ def main(model, root, filepath, prompt, limit, model_flag):
     click.echo(f"Fetching sample rows (limit={limit})...")
     result = run(
         [
-            "uv", "run", "dbt", "show", "-s", model,
-            "--limit", str(limit), "--output", "json", "--log-format", "json",
+            "uv",
+            "run",
+            "dbt",
+            "show",
+            "-s",
+            model,
+            "--limit",
+            str(limit),
+            "--output",
+            "json",
+            "--log-format",
+            "json",
         ],
         cwd=root,
         capture=True,
@@ -175,18 +193,24 @@ def main(model, root, filepath, prompt, limit, model_flag):
     with open(prompt) as f:
         template = f.read()
 
-    full_prompt = render_template(template, {
-        "compiled_sql": compiled_sql,
-        "sample_rows": sample_rows,
-        "existing_tests": existing_tests,
-        "lineage": lineage,
-        "data_profile": "",
-    })
+    full_prompt = render_template(
+        template,
+        {
+            "compiled_sql": compiled_sql,
+            "sample_rows": sample_rows,
+            "existing_tests": existing_tests,
+            "lineage": lineage,
+            "data_profile": "",
+        },
+    )
     full_prompt += f"\n\nSource SQL:\n{source_sql}"
 
     # --- 7. write context to a temp file & launch cursor-agent ---
     ctx = tempfile.NamedTemporaryFile(
-        mode="w", suffix=".md", prefix=f"dbt_audit_{model}_", delete=False,
+        mode="w",
+        suffix=".md",
+        prefix=f"dbt_audit_{model}_",
+        delete=False,
     )
     ctx.write(full_prompt)
     ctx.close()
diff --git a/dbt/dbt_batch_audit.py b/dbt/dbt_batch_audit.py
index 6fca44a..36a8683 100644
--- a/dbt/dbt_batch_audit.py
+++ b/dbt/dbt_batch_audit.py
@@ -66,8 +66,18 @@ def get_sample_rows(model_name, root, limit):
     click.echo(f"  Fetching sample rows for {model_name} (limit={limit})...")
     result = run(
         [
-            "uv", "run", "dbt", "show", "-s", model_name,
-            "--limit", str(limit), "--output", "json", "--log-format", "json",
+            "uv",
+            "run",
+            "dbt",
+            "show",
+            "-s",
+            model_name,
+            "--limit",
+            str(limit),
+            "--output",
+            "json",
+            "--log-format",
+            "json",
         ],
         cwd=root,
         capture=True,
@@ -82,13 +92,22 @@ def get_sample_rows(model_name, root, limit):
 def get_lineage(model_name, root):
     """Return a summary of immediate parents and children from dbt ls."""
     lines = []
-    for direction, selector in [("parents", f"+{model_name},1+{model_name}"), ("children", f"{model_name}+,{model_name}1+")]:
+    for direction, selector in [
+        ("parents", f"+{model_name},1+{model_name}"),
+        ("children", f"{model_name}+,{model_name}1+"),
+    ]:
         result = subprocess.run(
             ["uv", "run", "dbt", "ls", "-s", selector, "--output", "name", "--quiet"],
-            capture_output=True, text=True, cwd=root,
+            capture_output=True,
+            text=True,
+            cwd=root,
         )
         if result.returncode == 0:
-            names = [n.strip() for n in result.stdout.strip().splitlines() if n.strip() and n.strip() != model_name]
+            names = [
+                n.strip()
+                for n in result.stdout.strip().splitlines()
+                if n.strip() and n.strip() != model_name
+            ]
             if names:
                 lines.append(f"**{direction.title()}:** {', '.join(names)}")
     return "\n".join(lines) if lines else ""
@@ -151,18 +170,30 @@ def render_template(template, replacements):
     return template
 
 
-def write_context_file(output_dir, model_name, template, compiled_sql, sample_rows, source_sql, lineage="", existing_tests=""):
+def write_context_file(
+    output_dir,
+    model_name,
+    template,
+    compiled_sql,
+    sample_rows,
+    source_sql,
+    lineage="",
+    existing_tests="",
+):
     """Write the full audit context to a file so cursor-agent can read it."""
     ctx_dir = os.path.join(output_dir, ".context")
     os.makedirs(ctx_dir, exist_ok=True)
 
-    content = render_template(template, {
-        "compiled_sql": compiled_sql,
-        "sample_rows": sample_rows,
-        "existing_tests": existing_tests,
-        "lineage": lineage,
-        "data_profile": "",
-    })
+    content = render_template(
+        template,
+        {
+            "compiled_sql": compiled_sql,
+            "sample_rows": sample_rows,
+            "existing_tests": existing_tests,
+            "lineage": lineage,
+            "data_profile": "",
+        },
+    )
     content += f"\n\nSource SQL:\n{source_sql}"
 
     ctx_path = os.path.join(ctx_dir, f"{model_name}__context.md")
@@ -205,7 +236,8 @@ def validate_llms(llms):
         click.echo(
             f"ERROR: the following model(s) are not available in cursor-agent:\n"
             + "\n".join(f"  - {m}" for m in invalid)
-            + f"\n\nAvailable models:\n  " + "\n  ".join(sorted(available)),
+            + f"\n\nAvailable models:\n  "
+            + "\n  ".join(sorted(available)),
             err=True,
         )
         sys.exit(1)
@@ -262,9 +294,7 @@ def synthesize_reports(reports, synthesis_model, output_dir, root):
 
     combined_parts = []
     for model_name, llm, report in reports:
-        combined_parts.append(
-            f"---\n## Model: {model_name} | Reviewer: {llm}\n\n{report}\n"
-        )
+        combined_parts.append(f"---\n## Model: {model_name} | Reviewer: {llm}\n\n{report}\n")
     combined_text = "\n".join(combined_parts)
 
     combined_path = os.path.join(output_dir, "all_individual_reports.md")
@@ -274,9 +304,7 @@ def synthesize_reports(reports, synthesis_model, output_dir, root):
     # Write synthesis context to a file to avoid command-line length limits
     ctx_dir = os.path.join(output_dir, ".context")
     os.makedirs(ctx_dir, exist_ok=True)
-    synthesis_ctx_path = os.path.abspath(
-        os.path.join(ctx_dir, "synthesis_context.md")
-    )
+    synthesis_ctx_path = os.path.abspath(os.path.join(ctx_dir, "synthesis_context.md"))
 
     synthesis_instructions = f"""\
 You are a senior analytics engineer reviewing multiple dbt model audit reports.
@@ -341,9 +369,7 @@ def synthesize_reports(reports, synthesis_model, output_dir, root):
     if result.returncode == 0:
         synthesis = result.stdout.strip()
     else:
-        synthesis = (
-            f"(synthesis failed: exit {result.returncode})\n{result.stderr}"
-        )
+        synthesis = f"(synthesis failed: exit {result.returncode})\n{result.stderr}"
 
     synthesis_path = os.path.join(output_dir, "final_synthesis.md")
     with open(synthesis_path, "w") as f:
@@ -361,9 +387,7 @@ def resolve_sql_paths(paths):
     for p in paths:
         p = os.path.abspath(p)
         if os.path.isdir(p):
-            children = sorted(
-                f for f in glob.glob(os.path.join(p, "**", "*.sql"), recursive=True)
-            )
+            children = sorted(f for f in glob.glob(os.path.join(p, "**", "*.sql"), recursive=True))
             if not children:
                 click.echo(f"WARNING: no .sql files found in {p}", err=True)
             resolved.extend(children)
@@ -385,22 +409,31 @@ def model_name_from_path(filepath):
 @click.command()
 @click.argument("paths", nargs=-1, required=True)
 @click.option(
-    "--llm", "llms", required=True, multiple=True,
+    "--llm",
+    "llms",
+    required=True,
+    multiple=True,
     help="LLM model name for cursor-agent (repeatable)",
 )
 @click.option("--root", required=True, help="Path to dbt project root")
 @click.option("--prompt", required=True, help="Path to the prompt template .md file")
 @click.option(
-    "--output-dir", default="./audit_reports", show_default=True,
+    "--output-dir",
+    default="./audit_reports",
+    show_default=True,
     help="Directory for output reports",
 )
 @click.option("--limit", default=20, show_default=True, help="Row limit for dbt show")
 @click.option(
-    "--synthesis-model", default="sonnet-4.6-thinking", show_default=True,
+    "--synthesis-model",
+    default="sonnet-4.6-thinking",
+    show_default=True,
     help="LLM for the final synthesis step",
 )
 @click.option(
-    "--concurrency", default=3, show_default=True,
+    "--concurrency",
+    default=3,
+    show_default=True,
     help="Max parallel cursor-agent invocations",
 )
 def main(paths, llms, root, prompt, output_dir, limit, synthesis_model, concurrency):
@@ -422,7 +455,8 @@ def main(paths, llms, root, prompt, output_dir, limit, synthesis_model, concurre
         name = model_name_from_path(filepath)
         if name in seen:
             click.echo(
-                f"ERROR: duplicate model name '{name}' from {filepath}", err=True,
+                f"ERROR: duplicate model name '{name}' from {filepath}",
+                err=True,
             )
             sys.exit(1)
         seen.add(name)
@@ -439,9 +473,7 @@ def main(paths, llms, root, prompt, output_dir, limit, synthesis_model, concurre
     os.makedirs(output_dir, exist_ok=True)
 
     total = len(model_specs) * len(llms)
-    click.echo(
-        f"Auditing {len(model_specs)} model(s) × {len(llms)} LLM(s) = {total} audit(s)"
-    )
+    click.echo(f"Auditing {len(model_specs)} model(s) × {len(llms)} LLM(s) = {total} audit(s)")
     click.echo(f"Concurrency: {concurrency} | Synthesis model: {synthesis_model}\n")
 
     context_paths = {}
@@ -456,8 +488,14 @@ def main(paths, llms, root, prompt, output_dir, limit, synthesis_model, concurre
         click.echo(f"  Scanning tests for {name}...")
         existing_tests = get_existing_tests(name, root)
         context_paths[name] = write_context_file(
-            output_dir, name, template, compiled_sql, sample_rows, source_sql,
-            lineage=lineage, existing_tests=existing_tests,
+            output_dir,
+            name,
+            template,
+            compiled_sql,
+            sample_rows,
+            source_sql,
+            lineage=lineage,
+            existing_tests=existing_tests,
         )
 
     click.echo(f"\nAll models compiled. Launching {total} audit(s)...\n")
@@ -468,7 +506,12 @@ def main(paths, llms, root, prompt, output_dir, limit, synthesis_model, concurre
         for name, _ in model_specs:
             for llm in llms:
                 fut = pool.submit(
-                    run_audit, name, context_paths[name], llm, output_dir, root,
+                    run_audit,
+                    name,
+                    context_paths[name],
+                    llm,
+                    output_dir,
+                    root,
                 )
                 futures[fut] = (name, llm)
 

From e1269e96d283636eb84656945cca9f79d2d4054c Mon Sep 17 00:00:00 2001
From: Michael Barton <mail@michaelbarton.me.uk>
Date: Mon, 9 Mar 2026 18:57:35 -0700
Subject: [PATCH 26/26] Delete dbt/SPEC.md

---
 dbt/SPEC.md | 549 ----------------------------------------------------
 1 file changed, 549 deletions(-)
 delete mode 100644 dbt/SPEC.md

diff --git a/dbt/SPEC.md b/dbt/SPEC.md
deleted file mode 100644
index cdd97d5..0000000
--- a/dbt/SPEC.md
+++ /dev/null
@@ -1,549 +0,0 @@
-# data-audit: specification
-
-A Python package for LLM-powered auditing of data artifacts — dbt models, SQL
-scripts, Jupyter notebooks, Quarto documents, and flat data files. Designed to
-be run by a single analyst or orchestrated by an autonomous "project manager"
-agent that can expand the scope of an audit based on initial findings.
-
-## Problem
-
-Data teams accumulate a mix of dbt models, ad-hoc notebooks, CSV/Parquet
-exports, and glue scripts. Quality issues hide at the seams: a notebook reads a
-stale CSV export instead of the dbt model, a model silently fans out on a join,
-a Quarto report hardcodes a date filter that drifts. Today's tooling audits
-each artifact type in isolation — if at all.
-
-## Goals
-
-1. Audit heterogeneous data artifacts (dbt SQL, notebooks, flat files) through
-   a unified interface.
-2. Support an autonomous orchestration loop where an LLM "project manager" can
-   review initial findings and spawn follow-up audits when warranted.
-3. Produce structured, machine-readable findings alongside human-readable
-   reports so that both people and agents can act on results.
-4. Be LLM-backend agnostic — work with direct API calls, local models, or CLI
-   wrappers.
-5. Ship as an installable Python package (`pip install data-audit`) with a CLI
-   entry point.
-
-## Non-goals
-
-- Replacing dbt test or Great Expectations for deterministic, rule-based
-  testing. This tool is for exploratory, LLM-driven analysis.
-- Real-time or CI-blocking checks. This is an offline review tool.
-- Supporting non-Python notebook kernels (R, Julia) in the first version.
-
----
-
-## Artifact types
-
-### dbt models (.sql within a dbt project)
-
-This is the existing capability. Context gathering includes:
-
-- Compile the model (`dbt compile`)
-- Fetch sample rows (`dbt show --output json`)
-- Extract lineage — immediate parents and children (`dbt ls`)
-- Discover existing tests from `schema.yml` files
-- Read the source SQL
-
-Audit focus: join correctness, grain/uniqueness, filter logic, type mismatches,
-test coverage gaps, upstream dependency risks. See the existing
-`dbt_deep_analysis.md` prompt template for the full checklist.
-
-### dbt schema files (.yml)
-
-Context gathering:
-
-- Parse the YAML
-- Cross-reference with the models it describes (do all models have schema
-  entries? do all columns exist?)
-- Check test coverage across models
-
-Audit focus: missing descriptions, undocumented columns, missing or weak tests,
-inconsistent naming conventions, stale entries for deleted models.
-
-### Jupyter notebooks (.ipynb)
-
-Notebooks vary widely. An analyst might be:
-
-- Running SQL queries against a warehouse
-- Reading CSV/Parquet/Excel files from disk or S3
-- Pulling data from an API
-- Doing pandas/polars transformations
-- Producing charts or summary statistics
-
-Context gathering:
-
-- Extract all code cells and their outputs (where present)
-- Identify data sources: SQL connection strings, file paths
-  (`pd.read_csv(...)`, `pd.read_parquet(...)`), API calls
-- Extract imported libraries to understand the toolchain
-- Note cell execution order and whether outputs are stale (execution count
-  gaps, missing outputs)
-- If the notebook references dbt models or known tables, cross-reference with
-  the dbt project
-
-Audit focus:
-- **Data source hygiene**: is the notebook reading raw files that should come
-  from a managed source? Are file paths hardcoded or parameterized? Are
-  connection strings embedded in code?
-- **Reproducibility**: are cells ordered logically? Are there cells that depend
-  on execution side effects? Are random seeds set? Are outputs present and
-  consistent with the code?
-- **Transformation logic**: are there pandas/polars transformations that
-  duplicate or contradict dbt model logic? Could this logic be pushed into the
-  transformation layer?
-- **Data quality in-notebook**: are there silent drops (inner joins, dropna)
-  that could hide problems? Are filters reasonable? Are aggregations correct?
-- **Staleness**: do outputs reference dates/values that suggest the notebook
-  hasn't been re-run recently?
-
-### Quarto documents (.qmd)
-
-Similar to notebooks but with additional structure:
-
-- YAML front matter (params, output format, execution options)
-- Mixed prose and code chunks
-- Cross-references and callouts
-
-Context gathering:
-
-- Parse YAML front matter for parameters and execution options
-- Extract code chunks (```{python} blocks)
-- Identify data sources same as notebooks
-- Note `freeze: true` or other execution-control settings
-
-Audit focus: same as notebooks, plus parameter handling (are params used
-consistently? are defaults reasonable?), and whether `freeze` settings mean
-outputs could be stale.
-
-### Flat data files (.csv, .parquet, .json, .xlsx)
-
-Not code artifacts — these are data. An analyst might ask "audit this export"
-or "check this CSV for issues." The PM agent might also flag a file referenced
-by a notebook for closer inspection.
-
-Context gathering:
-
-- Read schema/column names and types
-- Compute basic profiling: row count, null rates, cardinality, min/max/mean
-  for numerics, sample values for categoricals, date ranges for timestamps
-- For Parquet: extract embedded schema metadata
-- For CSV: detect delimiter, encoding, quoting issues
-- If the file is referenced by a notebook or dbt model, note that relationship
-
-Audit focus:
-- **Schema issues**: mixed types in columns, inconsistent date formats,
-  encoding problems
-- **Completeness**: unexpected nulls, empty columns, truncated rows
-- **Distribution anomalies**: outliers, impossible values (negative ages,
-  future dates), suspicious cardinality (a "country" column with 3 values)
-- **Staleness**: if file metadata includes a timestamp, flag if it's old
-  relative to the project
-
----
-
-## Architecture
-
-### Core data model
-
-```
-Finding
-  id: str                  # unique within an audit run
-  artifact: str            # path or identifier of the audited artifact
-  artifact_type: str       # "dbt_model", "notebook", "data_file", etc.
-  category: str            # "join_correctness", "data_quality", "reproducibility", etc.
-  severity: Severity       # critical | warning | info
-  title: str               # one-line summary
-  description: str         # full explanation
-  evidence: str | None     # query, code snippet, or data sample that demonstrates the issue
-  suggested_fix: str | None
-  downstream_impact: list[str]  # artifact IDs affected if this isn't fixed
-  metadata: dict           # arbitrary extra context (column names, line numbers, etc.)
-
-AuditResult
-  artifact: str
-  artifact_type: str
-  findings: list[Finding]
-  context_summary: str     # what context was gathered (for traceability)
-  llm_model: str           # which LLM produced this result
-  duration_seconds: float
-  raw_report: str          # the full markdown report as returned by the LLM
-
-AuditPlan
-  tasks: list[AuditTask]   # the full set of tasks (initial + follow-ups)
-
-AuditTask
-  artifact: str
-  artifact_type: str
-  prompt_template: str     # which prompt to use
-  reason: str              # why this task exists ("initial scan" or "follow-up: NULL propagation from stg_orders")
-  priority: int
-  parent_task_id: str | None  # if this is a follow-up, which task spawned it
-  status: pending | running | completed | failed
-```
-
-### Auditor interface
-
-Each artifact type has an auditor that implements:
-
-```python
-class Auditor(Protocol):
-    """Gathers context for an artifact and produces an LLM-ready prompt."""
-
-    artifact_type: str
-
-    def can_handle(self, path: Path) -> bool:
-        """Return True if this auditor knows how to handle the given path."""
-        ...
-
-    def gather_context(self, path: Path, config: AuditConfig) -> AuditContext:
-        """Read the artifact and its surroundings, return structured context."""
-        ...
-
-    def render_prompt(self, context: AuditContext, template: str) -> str:
-        """Fill the prompt template with gathered context."""
-        ...
-
-    def parse_findings(self, raw_response: str, context: AuditContext) -> list[Finding]:
-        """Extract structured findings from the LLM's raw response."""
-        ...
-```
-
-Concrete implementations:
-
-- `DbtModelAuditor` — wraps the existing compile/show/lineage/test-discovery logic
-- `DbtSchemaAuditor` — parses YAML, cross-references models
-- `NotebookAuditor` — extracts cells, identifies data sources, checks reproducibility
-- `QuartoAuditor` — extends NotebookAuditor with front-matter parsing
-- `DataFileAuditor` — profiles CSVs, Parquet, JSON, Excel files
-
-### LLM backend interface
-
-```python
-class LLMBackend(Protocol):
-    """Send a prompt to an LLM and get a response."""
-
-    def complete(self, prompt: str, system: str | None = None) -> str:
-        ...
-
-    def list_models(self) -> list[str]:
-        ...
-```
-
-Concrete implementations:
-
-- `CursorAgentBackend` — wraps the current `cursor-agent` CLI (preserves
-  existing workflow)
-- `AnthropicBackend` — direct Claude API calls via the Anthropic SDK
-- `LiteLLMBackend` — any model supported by litellm (OpenAI, local, etc.)
-
-### Orchestrator (the "project manager")
-
-The orchestrator is the key new capability. It operates in a loop:
-
-```
-1. Build initial AuditPlan from user-provided paths
-2. While there are pending tasks and budget remains:
-   a. Pick next task(s) by priority
-   b. Run auditor(s) in parallel → collect AuditResults
-   c. Pass results to PM agent with the question:
-      "Given these findings, should we investigate anything further?"
-   d. PM agent returns zero or more follow-up AuditTasks
-      (e.g., "audit downstream model X", "profile data file Y referenced
-       in notebook Z", "do a deeper join analysis on model W")
-   e. Add follow-up tasks to the plan
-3. Synthesize all AuditResults into a final report
-```
-
-**Budget controls** — the orchestrator enforces limits to prevent runaway
-analysis:
-
-- `max_depth`: how many rounds of follow-ups (default: 2)
-- `max_tasks`: total task cap across all rounds (default: 20)
-- `max_wall_clock`: overall time limit (default: 30 minutes)
-- `max_tokens`: approximate token budget across all LLM calls
-
-The PM agent prompt instructs it to be conservative — only spawn follow-up
-tasks when there's concrete evidence that a finding has broader implications,
-not speculatively.
-
-### Task graph
-
-Tasks form a tree (not a DAG — a follow-up task has exactly one parent). The
-graph is used for:
-
-- Tracing why an audit was run ("this file was audited because the PM agent
-  identified it as a downstream consumer of a model with a join fan-out")
-- Reporting: the final synthesis groups findings by their provenance
-- Budget tracking: depth = longest path from root to leaf
-
-### Prompt templates
-
-Ship as package data under `data_audit/prompts/`:
-
-```
-dbt_model_deep.md       # existing dbt_deep_analysis.md
-dbt_model_quick.md      # existing dbt_quick_analysis.md
-dbt_schema.md           # schema.yml audit
-notebook.md             # Jupyter/Quarto notebook audit
-data_file.md            # flat file profiling + audit
-orchestrator.md         # PM agent system prompt
-synthesis.md            # final report synthesis prompt
-```
-
-The existing Handlebars-style template engine (`{{var}}`, `{{#if var}}`,
-`{{^if var}}`) is retained. It's simple and sufficient.
-
----
-
-## CLI
-
-```
-data-audit [OPTIONS] PATHS...
-```
-
-**Arguments:**
-
-- `PATHS` — files, directories, or globs. The tool auto-detects artifact type
-  by extension (`.sql` in a dbt project → dbt model, `.ipynb` → notebook,
-  `.csv`/`.parquet` → data file, etc.). Directories are walked recursively.
-
-**Options:**
-
-| Flag | Default | Description |
-|------|---------|-------------|
-| `--llm` | (required, repeatable) | LLM model to use for audits |
-| `--backend` | `anthropic` | LLM backend: `anthropic`, `cursor-agent`, `litellm` |
-| `--dbt-root` | auto-detect | Path to dbt project root (for dbt artifacts) |
-| `--prompt` | built-in | Override the prompt template |
-| `--output-dir` | `./audit_reports` | Where to write reports |
-| `--output-format` | `markdown` | `markdown`, `json`, or `both` |
-| `--limit` | `20` | Row limit for dbt show / data file sampling |
-| `--synthesis-model` | same as `--llm` | LLM for synthesis step |
-| `--orchestrate` | `false` | Enable the PM orchestration loop |
-| `--max-depth` | `2` | Max follow-up rounds (requires `--orchestrate`) |
-| `--max-tasks` | `20` | Max total audit tasks (requires `--orchestrate`) |
-| `--concurrency` | `3` | Max parallel LLM calls |
-| `--timeout` | `900` | Per-task timeout in seconds |
-| `--verbose` | `false` | Show detailed progress |
-
-**Examples:**
-
-```bash
-# Audit a few dbt models (current behavior, just packaged)
-data-audit models/stg_orders.sql models/int_revenue.sql \
-  --llm claude-sonnet-4-6 --dbt-root .
-
-# Audit a notebook
-data-audit analysis/revenue_deep_dive.ipynb --llm claude-sonnet-4-6
-
-# Audit an export file
-data-audit exports/monthly_summary.csv --llm claude-sonnet-4-6
-
-# Audit an entire directory with PM orchestration
-data-audit models/ notebooks/ exports/ \
-  --llm claude-sonnet-4-6 \
-  --orchestrate --max-depth 2 --max-tasks 15
-
-# JSON output for programmatic consumption
-data-audit models/stg_orders.sql --llm claude-sonnet-4-6 --output-format json
-```
-
----
-
-## Programmatic API
-
-```python
-from data_audit import audit, AuditConfig
-from data_audit.backends import AnthropicBackend
-
-config = AuditConfig(
-    llm="claude-sonnet-4-6",
-    backend=AnthropicBackend(),
-    concurrency=3,
-    orchestrate=True,
-    max_depth=2,
-)
-
-# Audit specific files
-results = audit(["models/stg_orders.sql", "analysis/deep_dive.ipynb"], config)
-
-for finding in results.all_findings():
-    print(f"[{finding.severity}] {finding.artifact}: {finding.title}")
-
-# Access the structured data
-results.to_json("audit_output.json")
-results.to_markdown("audit_report.md")
-```
-
----
-
-## Package structure
-
-```
-data-audit/
-  pyproject.toml
-  src/data_audit/
-    __init__.py             # public API: audit(), AuditConfig
-    cli.py                  # click CLI entry point
-
-    # Core data model
-    models.py               # Finding, AuditResult, AuditTask, AuditPlan
-
-    # Auditors — one per artifact type
-    auditors/
-      __init__.py
-      base.py               # Auditor protocol
-      dbt_model.py          # compile, show, lineage, test discovery
-      dbt_schema.py         # schema.yml cross-referencing
-      notebook.py           # .ipynb extraction and analysis
-      quarto.py             # .qmd front-matter + code chunks
-      data_file.py          # CSV, Parquet, JSON, Excel profiling
-      registry.py           # maps file extensions/patterns → auditors
-
-    # Context gathering utilities
-    context/
-      __init__.py
-      dbt.py                # dbt compile/show/ls wrappers
-      notebook_parser.py    # cell extraction, data source detection
-      file_profiler.py      # column stats, null rates, distributions
-      template.py           # Handlebars-style template rendering
-
-    # LLM backends
-    backends/
-      __init__.py
-      base.py               # LLMBackend protocol
-      anthropic.py          # direct Anthropic API
-      cursor_agent.py       # subprocess wrapper for cursor-agent CLI
-      litellm.py            # litellm pass-through
-
-    # Orchestration
-    orchestrator/
-      __init__.py
-      pm_agent.py           # project manager LLM logic
-      task_graph.py         # task tree, budget tracking
-      executor.py           # parallel task execution
-      synthesizer.py        # final report generation
-
-    # Shipped prompt templates
-    prompts/
-      dbt_model_deep.md
-      dbt_model_quick.md
-      dbt_schema.md
-      notebook.md
-      quarto.md
-      data_file.md
-      orchestrator.md
-      synthesis.md
-```
-
----
-
-## Migration path from current code
-
-The existing scripts (`dbt_batch_audit.py`, `dbt_analyse.py`) become thin
-wrappers or are replaced entirely by the package CLI. Mapping:
-
-| Current | Package equivalent |
-|---------|--------------------|
-| `dbt_batch_audit.py` | `data-audit models/*.sql --llm ... --dbt-root .` |
-| `dbt_analyse.py` (interactive tmux) | `data-audit --interactive models/stg_orders.sql` (or keep as a separate nvim-specific script that calls the package API) |
-| `dbt_deep_analysis.md` | `src/data_audit/prompts/dbt_model_deep.md` |
-| `dbt_quick_analysis.md` | `src/data_audit/prompts/dbt_model_quick.md` |
-| `render_template()` | `src/data_audit/context/template.py` |
-| `compile_model()`, `get_sample_rows()`, etc. | `src/data_audit/context/dbt.py` |
-| `run_audit()` | `src/data_audit/orchestrator/executor.py` |
-| `synthesize_reports()` | `src/data_audit/orchestrator/synthesizer.py` |
-| cursor-agent hardcoding | `src/data_audit/backends/cursor_agent.py` (one of N backends) |
-
-The nvim integration (`nvim/lua/config/dbt.lua`) continues to work — it just
-calls `data-audit` instead of the raw Python scripts.
-
----
-
-## Dependencies
-
-**Required:**
-
-- `click` — CLI framework
-- `pyyaml` — YAML parsing (dbt schema files, Quarto front matter)
-
-**Optional (extras):**
-
-- `anthropic` — for the Anthropic backend (`pip install data-audit[anthropic]`)
-- `litellm` — for the litellm backend (`pip install data-audit[litellm]`)
-- `mdformat` — for markdown formatting of output reports
-- `nbformat` — for robust Jupyter notebook parsing (fallback: raw JSON parsing)
-- `pyarrow` or `polars` — for Parquet file profiling
-- `openpyxl` — for Excel file profiling
-
----
-
-## Implementation phases
-
-### Phase 1: Extract and package
-
-Move the existing dbt model audit logic into the package structure. Get
-`data-audit models/*.sql --llm ... --dbt-root .` working identically to the
-current `dbt_batch_audit.py`. Ship the Auditor protocol, the dbt model auditor,
-the cursor-agent backend, and the existing prompt templates. No new
-capabilities — just packaging.
-
-### Phase 2: LLM backends + structured output
-
-Add the Anthropic and litellm backends. Implement the `Finding` data model and
-`parse_findings()` so that results are available as structured data (JSON
-output). This unblocks the orchestrator since the PM agent needs machine-readable
-findings to reason over.
-
-### Phase 3: Notebook and data file auditors
-
-Implement `NotebookAuditor`, `QuartoAuditor`, and `DataFileAuditor` with their
-context gatherers and prompt templates. At this point `data-audit` can audit
-any artifact type, but only in a flat batch (no orchestration loop).
-
-### Phase 4: Orchestrator
-
-Implement the PM agent loop: initial plan → audit → review findings → spawn
-follow-ups → synthesize. This is the most architecturally complex phase and
-benefits from phases 2-3 being stable first.
-
-### Phase 5: Cross-artifact analysis
-
-Teach the orchestrator to reason across artifact types: "this notebook reads
-the same table that dbt model X writes to — flag if findings in X affect the
-notebook's conclusions." This requires a lightweight project graph (dbt lineage
-+ notebook data source detection + file references).
-
----
-
-## Open questions
-
-1. **Package name**: `data-audit` is clean but generic. Alternatives:
-   `llm-data-audit`, `data-artifact-audit`, `dbt-audit-llm`. The scope beyond
-   dbt suggests avoiding `dbt-` as a prefix.
-
-2. **Structured finding extraction**: should the LLM be asked to return JSON
-   directly (fragile), or should we post-process the markdown response with a
-   second LLM call or regex-based parser? A hybrid approach (ask for markdown
-   with a machine-readable YAML block at the end) may be most robust.
-
-3. **Database access for data file profiling**: profiling flat files requires
-   reading them. For large files, should the tool spin up a temporary DuckDB
-   instance to run profiling queries, or rely on pandas/polars sampling? DuckDB
-   is likely the right answer since it handles CSV/Parquet/JSON natively and
-   the audit prompts already assume SQL-based evidence queries.
-
-4. **Interactive mode**: the current `dbt_analyse.py` launches an interactive
-   agent session in tmux. Should the package support this, or should it remain
-   a thin wrapper in the dotfiles that calls the package API? Leaning toward
-   the latter — interactive UX is editor-specific and doesn't belong in the
-   package.
-
-5. **Notebook output handling**: should the auditor include cell outputs
-   (which can be large — images, dataframes, tracebacks) in the LLM context?
-   Probably: include text outputs and truncated dataframe representations,
-   skip images unless the LLM supports vision, always note when outputs are
-   missing.