From b76845f66f69d22696bfefa87d983b30a8dc5850 Mon Sep 17 00:00:00 2001
From: arieradle <arie.radle@gmail.com>
Date: Sun, 15 Mar 2026 16:29:53 +0200
Subject: [PATCH 1/4] =?UTF-8?q?feat:=20shekel=20run=20=E2=80=94=20non-inva?=
 =?UTF-8?q?sive=20CLI=20budget=20enforcement=20(v0.2.9)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds `shekel run agent.py --budget 5` as a drop-in wrapper for any Python
agent script: zero code changes required, exit 1 on budget exceeded (CI-friendly).

New features:
- `shekel run`: wraps scripts via runpy in-process so monkey-patches are active
- `--budget / AGENT_BUDGET_USD`: USD cap with Docker/CI env-var support
- `--warn-at`, `--max-llm-calls`, `--max-tool-calls`: full Budget param parity
- `--output json`: machine-readable spend summary for log pipelines
- `--warn-only`: log warning but never exit 1 (soft guardrail)
- `--dry-run`: track costs only, implies --warn-only
- `--budget-file shekel.toml`: operator-supplied TOML config
- `Budget(warn_only=True)`: new parameter — suppresses raises, fires warn callback
- `.github/actions/enforce/action.yml`: GitHub Actions composite action
- `docs/docker.md`: Docker entrypoint patterns and shell script examples

Tests: 85 new tests (TDD), 100% coverage on _cli.py, _run_utils.py, _run_config.py.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/actions/enforce/action.yml     |  86 ++++++
 docs/cli.md                            |  85 +++++-
 docs/docker.md                         | 154 ++++++++++
 pyproject.toml                         |   2 +-
 shekel/__init__.py                     |   2 +-
 shekel/_budget.py                      |  39 ++-
 shekel/_cli.py                         | 244 ++++++++++++++++
 shekel/_run_config.py                  |  50 ++++
 shekel/_run_utils.py                   |  56 ++++
 tests/performance/test_run_overhead.py |  51 ++++
 tests/test_budget_warn_only.py         |  98 +++++++
 tests/test_cli.py                      |   9 +
 tests/test_cli_run.py                  | 374 +++++++++++++++++++++++++
 tests/test_cli_run_config.py           | 147 ++++++++++
 tests/test_cli_run_output.py           | 198 +++++++++++++
 15 files changed, 1577 insertions(+), 18 deletions(-)
 create mode 100644 .github/actions/enforce/action.yml
 create mode 100644 docs/docker.md
 create mode 100644 shekel/_run_config.py
 create mode 100644 shekel/_run_utils.py
 create mode 100644 tests/performance/test_run_overhead.py
 create mode 100644 tests/test_budget_warn_only.py
 create mode 100644 tests/test_cli_run.py
 create mode 100644 tests/test_cli_run_config.py
 create mode 100644 tests/test_cli_run_output.py

diff --git a/.github/actions/enforce/action.yml b/.github/actions/enforce/action.yml
new file mode 100644
index 0000000..7e3ca75
--- /dev/null
+++ b/.github/actions/enforce/action.yml
@@ -0,0 +1,86 @@
+name: "shekel — LLM Budget Enforcement"
+description: "Run a Python agent script with a USD budget cap. Exits 1 if the budget is exceeded."
+author: "shekel"
+
+branding:
+  icon: "dollar-sign"
+  color: "green"
+
+inputs:
+  script:
+    description: "Path to the Python agent script to run"
+    required: true
+  budget:
+    description: "Maximum spend in USD (e.g. 5 for $5)"
+    required: false
+    default: ""
+  warn-at:
+    description: "Warn fraction 0.0–1.0 of budget (e.g. 0.8 for 80%)"
+    required: false
+    default: ""
+  max-llm-calls:
+    description: "Maximum number of LLM API calls"
+    required: false
+    default: ""
+  max-tool-calls:
+    description: "Maximum number of tool invocations"
+    required: false
+    default: ""
+  warn-only:
+    description: "If true, warn but do not exit 1 on budget exceeded"
+    required: false
+    default: "false"
+  output:
+    description: "Output format: text or json"
+    required: false
+    default: "text"
+  budget-file:
+    description: "Path to a TOML budget config file (shekel.toml)"
+    required: false
+    default: ""
+  shekel-version:
+    description: "shekel package version to install (e.g. '>=0.3.0')"
+    required: false
+    default: ">=0.3.0"
+
+outputs:
+  spent:
+    description: "Total USD spent (populated when --output json is used)"
+  status:
+    description: "Budget status: ok | warn | exceeded"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install shekel
+      shell: bash
+      run: pip install "shekel[cli]${{ inputs.shekel-version }}" --quiet
+
+    - name: Run agent with budget enforcement
+      shell: bash
+      run: |
+        ARGS="${{ inputs.script }}"
+
+        if [ -n "${{ inputs.budget }}" ]; then
+          ARGS="$ARGS --budget ${{ inputs.budget }}"
+        fi
+        if [ -n "${{ inputs.warn-at }}" ]; then
+          ARGS="$ARGS --warn-at ${{ inputs.warn-at }}"
+        fi
+        if [ -n "${{ inputs.max-llm-calls }}" ]; then
+          ARGS="$ARGS --max-llm-calls ${{ inputs.max-llm-calls }}"
+        fi
+        if [ -n "${{ inputs.max-tool-calls }}" ]; then
+          ARGS="$ARGS --max-tool-calls ${{ inputs.max-tool-calls }}"
+        fi
+        if [ "${{ inputs.warn-only }}" = "true" ]; then
+          ARGS="$ARGS --warn-only"
+        fi
+        if [ -n "${{ inputs.output }}" ]; then
+          ARGS="$ARGS --output ${{ inputs.output }}"
+        fi
+        if [ -n "${{ inputs.budget-file }}" ]; then
+          ARGS="$ARGS --budget-file ${{ inputs.budget-file }}"
+        fi
+
+        shekel run $ARGS
diff --git a/docs/cli.md b/docs/cli.md
index d703e80..9eb5f9e 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -1,6 +1,6 @@
 # CLI Tools
 
-Shekel provides command-line tools for cost estimation and model information.
+Shekel provides command-line tools for budget enforcement, cost estimation, and model information.
 
 ## Installation
 
@@ -12,6 +12,88 @@ This installs the `shekel` command with Click support.
 
 ## Commands
 
+### `shekel run`
+
+Run a Python script with budget enforcement. Equivalent to wrapping your script
+in `with budget(max_usd=N):` — zero code changes required.
+
+#### Usage
+
+```bash
+shekel run SCRIPT [OPTIONS] [-- SCRIPT_ARGS...]
+```
+
+#### Options
+
+| Option | Description |
+|--------|-------------|
+| `--budget N` | Max spend in USD. Equivalent to `AGENT_BUDGET_USD=N`. |
+| `--warn-at F` | Warn fraction 0.0–1.0 (e.g. `0.8` = warn at 80% of budget). |
+| `--max-llm-calls N` | Cap on LLM API calls. |
+| `--max-tool-calls N` | Cap on tool invocations. |
+| `--warn-only` | Warn but never exit 1 when budget exceeded. |
+| `--dry-run` | Track costs only — no enforcement. Implies `--warn-only`. |
+| `--output text\|json` | Output format (default: `text`). |
+| `--budget-file PATH` | Path to a `shekel.toml` config file. |
+| `--fallback-model M` | Cheaper model to switch to at threshold. |
+| `--fallback-at F` | Fallback activation threshold (default: `0.8`). |
+
+#### Exit codes
+
+| Code | Meaning |
+|------|---------|
+| `0` | Script completed within budget |
+| `1` | Budget exceeded (unless `--warn-only`) |
+| `2` | Configuration error (missing script, bad TOML, etc.) |
+
+#### Environment variables
+
+| Variable | Description |
+|----------|-------------|
+| `AGENT_BUDGET_USD` | Fallback for `--budget`. Ideal for Docker/CI operator control. |
+
+#### Examples
+
+```bash
+# Enforce a $5 cap
+shekel run agent.py --budget 5
+
+# Warn at 80%, hard-stop at $5
+shekel run agent.py --budget 5 --warn-at 0.8
+
+# Cap LLM calls instead of spend
+shekel run agent.py --max-llm-calls 20
+
+# JSON output for CI log parsing
+shekel run agent.py --budget 5 --output json
+
+# Warn but don't fail the pipeline
+shekel run agent.py --budget 5 --warn-only
+
+# Dry-run: track costs without enforcement
+shekel run agent.py --budget 5 --dry-run
+
+# Load limits from TOML file
+shekel run agent.py --budget-file shekel.toml
+
+# Set budget via env var (Docker / CI)
+AGENT_BUDGET_USD=5 shekel run agent.py
+```
+
+#### `shekel.toml` format
+
+```toml
+[budget]
+max_usd       = 5.0
+warn_at       = 0.8
+max_llm_calls = 50
+max_tool_calls = 200
+```
+
+See [Docker & Container Guardrails](docker.md) for container-specific patterns.
+
+---
+
 ### `shekel estimate`
 
 Estimate API call costs without making actual requests.
@@ -244,6 +326,7 @@ print(f"Available models: {models}")
 
 ## Next Steps
 
+- [Docker & Container Guardrails](docker.md) - Using `shekel run` in Docker
 - [Supported Models](models.md) - Full model list with pricing
 - [Installation](installation.md) - Installing CLI tools
 - [Basic Usage](usage/basic-usage.md) - Using budgets in code
diff --git a/docs/docker.md b/docs/docker.md
new file mode 100644
index 0000000..e9740c0
--- /dev/null
+++ b/docs/docker.md
@@ -0,0 +1,154 @@
+# Docker & Container Guardrails
+
+Use `shekel run` as an entrypoint wrapper to enforce LLM cost limits on any agent
+running inside a Docker container — zero code changes required.
+
+## Quick start
+
+```dockerfile
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install your agent and shekel CLI
+COPY requirements.txt .
+RUN pip install -r requirements.txt shekel[cli]
+
+COPY agent.py .
+
+# shekel run becomes the entrypoint; AGENT_BUDGET_USD sets the cap at runtime
+ENTRYPOINT ["shekel", "run", "agent.py"]
+```
+
+Run with a $5 cap:
+
+```bash
+docker run -e AGENT_BUDGET_USD=5 my-agent-image
+```
+
+The container exits with code 1 if the budget is exceeded, so your orchestration
+layer (ECS, Kubernetes, Compose) can detect it as a failed task.
+
+---
+
+## Patterns
+
+### Budget via environment variable
+
+The `AGENT_BUDGET_USD` env var is equivalent to `--budget N`. This is the
+preferred pattern for containers because the budget can be set by the operator
+without rebuilding the image.
+
+```bash
+# docker run
+docker run -e AGENT_BUDGET_USD=10 my-agent-image
+
+# docker-compose
+services:
+  agent:
+    image: my-agent-image
+    environment:
+      AGENT_BUDGET_USD: "10"
+```
+
+### Budget via CLI flag (baked into image)
+
+```dockerfile
+ENTRYPOINT ["shekel", "run", "agent.py", "--budget", "5"]
+```
+
+### TOML config file
+
+Mount a `shekel.toml` at runtime for fine-grained control:
+
+```bash
+docker run -v $(pwd)/shekel.toml:/app/shekel.toml \
+  my-agent-image shekel run agent.py --budget-file /app/shekel.toml
+```
+
+```toml
+# shekel.toml
+[budget]
+max_usd       = 5.0
+warn_at       = 0.8
+max_llm_calls = 50
+max_tool_calls = 200
+```
+
+### Warn-only mode (log but don't kill)
+
+```dockerfile
+ENTRYPOINT ["shekel", "run", "agent.py", "--warn-only"]
+```
+
+With `--warn-only`, the container exits 0 even if the budget is exceeded.
+Use this during development to observe spend without blocking the run.
+
+### JSON output for structured logging
+
+```bash
+docker run my-agent-image shekel run agent.py --budget 5 --output json \
+  | tee /logs/spend.json
+```
+
+The JSON line emitted at the end:
+
+```json
+{
+  "spent": 1.23,
+  "limit": 5.0,
+  "calls": 12,
+  "tool_calls": 4,
+  "status": "ok",
+  "model": "gpt-4o"
+}
+```
+
+---
+
+## Exit codes
+
+| Code | Meaning |
+|------|---------|
+| `0`  | Script completed within budget (or `--warn-only` mode) |
+| `1`  | Budget exceeded (default mode) |
+| `2`  | Configuration error (missing script, bad TOML, etc.) |
+
+---
+
+## Shell script wrapper
+
+For non-Docker environments (e.g. bare VMs, `.sh` CI scripts):
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+
+BUDGET="${AGENT_BUDGET_USD:-5}"
+
+shekel run agent.py \
+  --budget "$BUDGET" \
+  --warn-at 0.8 \
+  --output json \
+  | tee spend.json
+
+status=$(jq -r '.status' spend.json)
+if [ "$status" = "exceeded" ]; then
+  echo "Budget exceeded — check spend.json for details" >&2
+  exit 1
+fi
+```
+
+---
+
+## GitHub Actions
+
+See the [CLI reference](cli.md) or use the bundled composite action:
+
+```yaml
+- uses: ./.github/actions/enforce
+  with:
+    script: agent.py
+    budget: "5"
+    warn-at: "0.8"
+```
diff --git a/pyproject.toml b/pyproject.toml
index 3f825c2..629f4bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "shekel"
-version = "0.2.8"
+version = "0.2.9"
 description = "LLM budget enforcement and cost tracking. Zero config — with budget(max_usd=1.00): run_agent(). Works with LangGraph, CrewAI, raw OpenAI/Anthropic/Gemini."
 readme = "README.md"
 license = { file = "LICENSE" }
diff --git a/shekel/__init__.py b/shekel/__init__.py
index 6995929..828c6ee 100644
--- a/shekel/__init__.py
+++ b/shekel/__init__.py
@@ -7,7 +7,7 @@
 from shekel._tool import tool
 from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError
 
-__version__ = "0.2.8"
+__version__ = "0.2.9"
 __all__ = [
     "budget",
     "Budget",
diff --git a/shekel/_budget.py b/shekel/_budget.py
index 67fbd51..c44edf0 100644
--- a/shekel/_budget.py
+++ b/shekel/_budget.py
@@ -74,6 +74,7 @@ def __init__(
         max_llm_calls: int | None = None,
         max_tool_calls: int | None = None,
         tool_prices: dict[str, float] | None = None,
+        warn_only: bool = False,
     ) -> None:
 
         if max_usd is not None and max_usd <= 0:
@@ -146,6 +147,7 @@ def __init__(
         self.price_per_1k_tokens = price_per_1k_tokens
         self.fallback: dict[str, Any] | None = fallback
         self.on_fallback = on_fallback
+        self.warn_only: bool = warn_only
 
         # --- Nested budget support (v0.2.3) ---
         self.name: str | None = name
@@ -596,8 +598,11 @@ def _check_limit(self) -> None:
             return  # Fallback just activated — keep running on cheaper model
 
         if budget_exceeded and (self.fallback is None or self._using_fallback):
-            # No fallback available, or already on fallback and still exceeded — raise
+            # No fallback available, or already on fallback and still exceeded
             self._emit_budget_exceeded_event()
+            if self.warn_only:
+                self._check_warn()  # fire warning callback if threshold set
+                return
             raise BudgetExceededError(
                 self._spent, effective_limit, self._last_model, self._last_tokens
             )
@@ -623,6 +628,8 @@ def _check_call_limit(self) -> None:
 
         if self._calls_made > effective_call_limit:
             self._emit_budget_exceeded_event()
+            if self.warn_only:
+                return
             raise BudgetExceededError(
                 self._calls_made,
                 effective_call_limit,
@@ -643,20 +650,7 @@ def _check_tool_limit(self, tool_name: str, framework: str) -> None:
         limit = self._effective_tool_call_limit
         if limit is not None and self._tool_calls_made >= limit:
             self._emit_tool_budget_exceeded_event(tool_name, framework)
-            raise ToolBudgetExceededError(
-                tool_name=tool_name,
-                calls_used=self._tool_calls_made,
-                calls_limit=limit,
-                usd_spent=self._tool_spent,
-                usd_limit=self.max_usd,
-                framework=framework,
-            )
-
-        # Also check USD limit if tool_prices configured for this tool
-        if self.max_usd is not None and self.tool_prices is not None:
-            price = self.tool_prices.get(tool_name)
-            if price is not None and self._tool_spent + price > self.max_usd:
-                self._emit_tool_budget_exceeded_event(tool_name, framework)
+            if not self.warn_only:
                 raise ToolBudgetExceededError(
                     tool_name=tool_name,
                     calls_used=self._tool_calls_made,
@@ -666,6 +660,21 @@ def _check_tool_limit(self, tool_name: str, framework: str) -> None:
                     framework=framework,
                 )
 
+        # Also check USD limit if tool_prices configured for this tool
+        if self.max_usd is not None and self.tool_prices is not None:
+            price = self.tool_prices.get(tool_name)
+            if price is not None and self._tool_spent + price > self.max_usd:
+                self._emit_tool_budget_exceeded_event(tool_name, framework)
+                if not self.warn_only:
+                    raise ToolBudgetExceededError(
+                        tool_name=tool_name,
+                        calls_used=self._tool_calls_made,
+                        calls_limit=limit,
+                        usd_spent=self._tool_spent,
+                        usd_limit=self.max_usd,
+                        framework=framework,
+                    )
+
     def _record_tool_call(self, tool_name: str, cost: float, framework: str) -> None:
         """Post-dispatch: record the tool call and emit events."""
         self._tool_calls_made += 1
diff --git a/shekel/_cli.py b/shekel/_cli.py
index bbb2375..4144a81 100644
--- a/shekel/_cli.py
+++ b/shekel/_cli.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
 
+import os
+import runpy
+import sys
+
 try:
     import click
 except ImportError:  # pragma: no cover
@@ -59,3 +63,243 @@ def models(provider: str | None) -> None:
     click.echo("-" * len(header))
     for name, inp, out in rows:
         click.echo(f"{name:<{col1}}  ${inp:>11.6f}  ${out:>11.6f}")
+
+
+@cli.command(context_settings={"ignore_unknown_options": True, "allow_extra_args": True})
+@click.argument("script")
+@click.argument("args", nargs=-1, type=click.UNPROCESSED)
+@click.option(
+    "--budget", "max_usd", type=float, default=None, help="Max spend in USD (maps to max_usd)."
+)
+@click.option(
+    "--warn-at", type=float, default=None, help="Warn fraction 0.0–1.0 (maps to warn_at)."
+)
+@click.option(
+    "--max-llm-calls", type=int, default=None, help="Cap on LLM API calls (maps to max_llm_calls)."
+)
+@click.option(
+    "--max-tool-calls",
+    type=int,
+    default=None,
+    help="Cap on tool invocations (maps to max_tool_calls).",
+)
+@click.option(
+    "--fallback-model",
+    type=str,
+    default=None,
+    help="Fallback model name (maps to fallback['model']).",
+)
+@click.option(
+    "--fallback-at",
+    type=float,
+    default=0.8,
+    show_default=True,
+    help="Fallback activation threshold 0.0–1.0 (maps to fallback['at_pct']).",
+)
+@click.option(
+    "--output",
+    type=click.Choice(["text", "json"]),
+    default="text",
+    show_default=True,
+    help="Output format.",
+)
+@click.option(
+    "--warn-only",
+    is_flag=True,
+    default=False,
+    help="Never exit 1; warn but continue when budget exceeded.",
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    default=False,
+    help="Track costs without enforcement. Implies --warn-only.",
+)
+@click.option(
+    "--budget-file", type=str, default=None, help="Path to TOML budget config file (shekel.toml)."
+)
+def run(
+    script: str,
+    args: tuple[str, ...],
+    max_usd: float | None,
+    warn_at: float | None,
+    max_llm_calls: int | None,
+    max_tool_calls: int | None,
+    fallback_model: str | None,
+    fallback_at: float,
+    output: str,
+    warn_only: bool,
+    dry_run: bool,
+    budget_file: str | None,
+) -> None:
+    """Run a Python script with budget enforcement. No code changes required.
+
+    Equivalent to wrapping your script in ``with budget(max_usd=N):``.
+    Exits with code 1 if the budget is exceeded (CI-friendly).
+
+    \b
+    Examples:
+      shekel run agent.py --budget 5
+      shekel run agent.py --budget 5 --warn-at 0.8
+      shekel run agent.py --max-llm-calls 20
+      shekel run agent.py --budget 5 --output json
+      shekel run agent.py --budget 5 --warn-only
+      shekel run agent.py --budget 5 --dry-run
+      shekel run agent.py --budget-file shekel.toml
+      AGENT_BUDGET_USD=5 shekel run agent.py
+    """
+    import json as _json
+
+    from shekel import budget as make_budget
+    from shekel._run_utils import detect_patched_providers, format_spend_summary
+    from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError
+
+    # --dry-run implies --warn-only
+    if dry_run:
+        warn_only = True
+
+    # Load budget file if specified (CLI flags take precedence)
+    file_kwargs: dict[str, object] = {}
+    if budget_file is not None:
+        from shekel._run_config import load_budget_file
+
+        try:
+            file_kwargs = load_budget_file(budget_file)
+        except FileNotFoundError:
+            click.echo(f"shekel: budget file not found: {budget_file}", err=True)
+            sys.exit(2)
+        except Exception as exc:
+            click.echo(f"shekel: invalid budget file — {exc}", err=True)
+            sys.exit(2)
+
+    # Env var fallback for --budget
+    if max_usd is None:
+        env_val = os.environ.get("AGENT_BUDGET_USD")
+        if env_val is not None:
+            try:
+                max_usd = float(env_val)
+            except ValueError:
+                click.echo(
+                    f"shekel: invalid AGENT_BUDGET_USD={env_val!r} — must be a number",
+                    err=True,
+                )
+                sys.exit(1)
+
+    # Build budget kwargs: file values first, then explicit CLI flags override
+    budget_kwargs: dict[str, object] = {"name": "shekel-run", **file_kwargs}
+    if max_usd is not None:
+        budget_kwargs["max_usd"] = max_usd
+    if warn_at is not None:
+        budget_kwargs["warn_at"] = warn_at
+    if max_llm_calls is not None:
+        budget_kwargs["max_llm_calls"] = max_llm_calls
+    if max_tool_calls is not None:
+        budget_kwargs["max_tool_calls"] = max_tool_calls
+    if fallback_model is not None:
+        budget_kwargs["fallback"] = {"model": fallback_model, "at_pct": fallback_at}
+    if warn_only:
+        budget_kwargs["warn_only"] = True
+
+    has_limit = (
+        budget_kwargs.get("max_usd") is not None
+        or budget_kwargs.get("max_llm_calls") is not None
+        or budget_kwargs.get("max_tool_calls") is not None
+    )
+
+    original_argv = sys.argv[:]
+    sys.argv = [script, *args]
+
+    script_exit_code = 0
+    exceeded = False
+    b = make_budget(**budget_kwargs)  # type: ignore[arg-type]
+    try:
+        with b:
+            if dry_run and output == "text":
+                click.echo("[dry-run] cost tracking only — budget limits will not be enforced")
+            if output == "text":
+                providers = detect_patched_providers()
+                if providers:
+                    click.echo(f"Patching: {', '.join(providers)}")
+            runpy.run_path(script, run_name="__main__")
+    except BudgetExceededError as exc:
+        exceeded = True
+        if output == "text":
+            if warn_only:
+                click.echo(
+                    f"⚠ Budget limit reached (warn-only): {exc.model}"
+                    f" · ${exc.spent:.4f} / ${exc.limit:.2f}",
+                    err=True,
+                )
+            else:
+                click.echo(
+                    f"✗ Budget exceeded: {exc.model} · {b.calls_used} calls"
+                    f" · ${exc.spent:.4f} / ${exc.limit:.2f}",
+                    err=True,
+                )
+        if not warn_only:
+            script_exit_code = 1
+    except ToolBudgetExceededError as exc:
+        exceeded = True
+        if output == "text":
+            limit_str = str(exc.calls_limit) if exc.calls_limit is not None else "∞"
+            if warn_only:
+                click.echo(
+                    f"⚠ Tool limit reached (warn-only): {exc.tool_name}"
+                    f" · {exc.calls_used}/{limit_str} calls",
+                    err=True,
+                )
+            else:
+                click.echo(
+                    f"✗ Tool budget exceeded: {exc.tool_name}"
+                    f" · {exc.calls_used}/{limit_str} calls",
+                    err=True,
+                )
+        if not warn_only:
+            script_exit_code = 1
+    except FileNotFoundError:
+        click.echo(f"shekel: script not found: {script}", err=True)
+        script_exit_code = 2
+    except SystemExit as exc:
+        code = exc.code
+        if isinstance(code, int):
+            script_exit_code = code
+        elif code is None:
+            script_exit_code = 0
+        else:
+            script_exit_code = 1
+    finally:
+        sys.argv = original_argv
+        # Determine status for both text and JSON output
+        if exceeded or (has_limit and b.max_usd is not None and b.spent > b.max_usd):
+            status = "exceeded"
+        elif b._warn_fired:
+            status = "warn"
+        else:
+            status = "ok"
+
+        if output == "json":
+            data = b.summary_data()
+            by_model: dict[str, object] = data["by_model"]  # type: ignore[assignment]
+            json_out: dict[str, object] = {
+                "spent": data["total_spent"],
+                "limit": data["limit"],
+                "calls": data["calls_used"],
+                "tool_calls": data["tool_calls_used"],
+                "status": status,
+            }
+            if by_model:
+                top_model = max(
+                    by_model.items(),
+                    key=lambda kv: kv[1]["calls"],  # type: ignore[index]
+                )[0]
+                json_out["model"] = top_model
+            click.echo(_json.dumps(json_out))
+        else:
+            click.echo(format_spend_summary(b))
+            if has_limit and b.calls_used == 0 and b.tool_calls_used == 0:
+                click.echo(
+                    "Warning: 0 LLM calls intercepted — budget may not be enforced.",
+                    err=True,
+                )
+
+    sys.exit(script_exit_code)
diff --git a/shekel/_run_config.py b/shekel/_run_config.py
new file mode 100644
index 0000000..d1f451e
--- /dev/null
+++ b/shekel/_run_config.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+
+def load_budget_file(path: str) -> dict[str, object]:
+    """Parse a TOML budget config file and return budget kwargs.
+
+    Supports the following keys under ``[budget]``:
+
+    .. code-block:: toml
+
+        [budget]
+        max_usd = 5.0
+        warn_at = 0.8
+        max_llm_calls = 20
+        max_tool_calls = 50
+
+    Requires Python 3.11+ (uses stdlib ``tomllib``) or the optional
+    ``tomli`` package for Python 3.9/3.10.
+
+    Raises:
+        FileNotFoundError: If *path* does not exist.
+        SystemExit: If ``tomllib``/``tomli`` is unavailable on Python < 3.11.
+    """
+    if sys.version_info >= (3, 11):
+        import tomllib
+    else:  # pragma: no cover
+        try:
+            import tomli as tomllib  # type: ignore[import-not-found]
+        except ImportError:
+            raise SystemExit("shekel: --budget-file requires Python 3.11+ or: pip install tomli")
+
+    with open(path, "rb") as f:
+        data = tomllib.load(f)
+
+    section: dict[str, Any] = data.get("budget", {})
+    kwargs: dict[str, object] = {}
+
+    if "max_usd" in section:
+        kwargs["max_usd"] = float(section["max_usd"])
+    if "warn_at" in section:
+        kwargs["warn_at"] = float(section["warn_at"])
+    if "max_llm_calls" in section:
+        kwargs["max_llm_calls"] = int(section["max_llm_calls"])
+    if "max_tool_calls" in section:
+        kwargs["max_tool_calls"] = int(section["max_tool_calls"])
+
+    return kwargs
diff --git a/shekel/_run_utils.py b/shekel/_run_utils.py
new file mode 100644
index 0000000..5bb5a9a
--- /dev/null
+++ b/shekel/_run_utils.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from shekel._budget import Budget
+
+# Maps _patch._originals key prefixes to human-readable provider names.
+_KEY_PREFIX_TO_PROVIDER: dict[str, str] = {
+    "openai": "openai",
+    "anthropic": "anthropic",
+    "litellm": "litellm",
+    "gemini": "gemini",
+    "huggingface": "huggingface",
+    "langchain": "langchain",
+    "mcp": "mcp",
+    "crewai": "crewai",
+    "openai_agents": "openai-agents",
+}
+
+
+def detect_patched_providers() -> list[str]:
+    """Return sorted list of provider names whose patches are currently active.
+
+    Must be called after budget.__enter__() (i.e. inside ``with budget():``).
+    """
+    import shekel._patch as _patch_module
+
+    seen: set[str] = set()
+    for key in _patch_module._originals:
+        for prefix, provider in _KEY_PREFIX_TO_PROVIDER.items():
+            if key.startswith(prefix):
+                seen.add(provider)
+    return sorted(seen)
+
+
+def format_spend_summary(b: Budget) -> str:
+    """Return a compact one-line spend summary for CLI output."""
+    data = b.summary_data()
+    spent: float = float(data["total_spent"])  # type: ignore[arg-type]
+    calls: int = int(data["calls_used"])  # type: ignore[call-overload]
+    limit: float | None = data["limit"]  # type: ignore[assignment]
+
+    by_model: dict[str, object] = data["by_model"]  # type: ignore[assignment]
+    model_part = ""
+    if by_model:
+        top_model = max(by_model.items(), key=lambda kv: kv[1]["calls"])[0]  # type: ignore[index]
+        model_part = f" · {top_model}"
+
+    if limit is not None:
+        pct = (spent / limit * 100) if limit > 0 else 0.0
+        limit_part = f" / ${limit:.2f} ({pct:.0f}%)"
+    else:
+        limit_part = ""
+
+    return f"${spent:.4f} spent{limit_part} · {calls} calls{model_part}"
diff --git a/tests/performance/test_run_overhead.py b/tests/performance/test_run_overhead.py
new file mode 100644
index 0000000..d76e82b
--- /dev/null
+++ b/tests/performance/test_run_overhead.py
@@ -0,0 +1,51 @@
+"""Performance test: shekel run overhead should be sub-100 ms for a no-op script.
+
+Uses pytest-benchmark when available (CI), falls back to a wall-clock assertion
+so the test still passes in environments without the benchmark plugin.
+"""
+from __future__ import annotations
+
+import time
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+
+from shekel._cli import cli
+
+
+@pytest.fixture
+def noop_script(tmp_path: Path) -> Path:
+    p = tmp_path / "noop.py"
+    p.write_text("pass")
+    return p
+
+
+def _run_once(noop_script: Path) -> float:
+    runner = CliRunner()
+    t0 = time.perf_counter()
+    result = runner.invoke(cli, ["run", str(noop_script)])
+    elapsed = time.perf_counter() - t0
+    assert result.exit_code == 0
+    return elapsed
+
+
+def test_run_overhead_under_100ms(noop_script: Path) -> None:
+    """shekel run on a no-op script must complete in under 100 ms (wall clock)."""
+    # Warm-up: first call may pay import costs
+    _run_once(noop_script)
+    # Measure: take the minimum of 5 runs to reduce noise
+    times = [_run_once(noop_script) for _ in range(5)]
+    best = min(times)
+    assert best < 0.1, f"shekel run overhead too high: best={best*1000:.1f} ms (limit 100 ms)"
+
+
+def test_run_overhead_benchmark(benchmark: pytest.fixture, noop_script: Path) -> None:  # type: ignore[type-arg]
+    """Benchmark version — only runs when pytest-benchmark is active."""
+    runner = CliRunner()
+
+    def _invoke() -> None:
+        result = runner.invoke(cli, ["run", str(noop_script)])
+        assert result.exit_code == 0
+
+    benchmark(_invoke)
diff --git a/tests/test_budget_warn_only.py b/tests/test_budget_warn_only.py
new file mode 100644
index 0000000..0d78ead
--- /dev/null
+++ b/tests/test_budget_warn_only.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+import pytest
+
+from shekel._budget import Budget
+from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError
+
+# ---------------------------------------------------------------------------
+# Story 11: Budget(warn_only=True) — enforce silently, never raise
+# ---------------------------------------------------------------------------
+
+
+def test_warn_only_does_not_raise_on_usd_exceeded() -> None:
+    b = Budget(max_usd=0.001, warn_only=True)
+    with b:
+        b._record_spend(1.0, "gpt-4o", {"input": 100, "output": 50})
+    # No BudgetExceededError raised
+
+
+def test_warn_only_false_still_raises_on_usd_exceeded() -> None:
+    b = Budget(max_usd=0.001, warn_only=False)
+    with pytest.raises(BudgetExceededError):
+        with b:
+            b._record_spend(1.0, "gpt-4o", {"input": 100, "output": 50})
+
+
+def test_warn_only_does_not_raise_on_call_limit_exceeded() -> None:
+    b = Budget(max_usd=10.0, max_llm_calls=1, warn_only=True)
+    with b:
+        # First call — within limit
+        b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5})
+        # Second call — exceeds max_llm_calls=1, but warn_only so no raise
+        b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5})
+
+
+def test_warn_only_false_raises_on_call_limit_exceeded() -> None:
+    b = Budget(max_usd=10.0, max_llm_calls=1, warn_only=False)
+    with pytest.raises(BudgetExceededError):
+        with b:
+            b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5})
+            b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5})
+
+
+def test_warn_only_does_not_raise_on_tool_limit_exceeded() -> None:
+    b = Budget(max_tool_calls=1, warn_only=True)
+    with b:
+        b._check_tool_limit("web_search", "manual")
+        b._record_tool_call("web_search", 0.0, "manual")
+        # Second call — exceeds max_tool_calls=1, but warn_only so no raise
+        b._check_tool_limit("web_search", "manual")
+
+
+def test_warn_only_false_raises_on_tool_limit_exceeded() -> None:
+    b = Budget(max_tool_calls=1, warn_only=False)
+    with pytest.raises(ToolBudgetExceededError):
+        with b:
+            b._check_tool_limit("web_search", "manual")
+            b._record_tool_call("web_search", 0.0, "manual")
+            b._check_tool_limit("web_search", "manual")
+
+
+def test_warn_only_still_fires_warn_callback_when_exceeded() -> None:
+    fired: list[tuple[float, float]] = []
+    b = Budget(
+        max_usd=1.0,
+        warn_at=0.5,
+        on_warn=lambda s, l: fired.append((s, l)),
+        warn_only=True,
+    )
+    with b:
+        b._record_spend(2.0, "gpt-4o", {"input": 100, "output": 50})
+    assert len(fired) == 1
+    assert fired[0][1] == 1.0  # limit
+
+
+def test_warn_only_spent_is_tracked_correctly() -> None:
+    b = Budget(max_usd=0.001, warn_only=True)
+    with b:
+        b._record_spend(1.5, "gpt-4o", {"input": 100, "output": 50})
+    assert b.spent == pytest.approx(1.5)
+
+
+def test_warn_only_tool_usd_limit_does_not_raise() -> None:
+    """Tool USD limit check also respects warn_only."""
+    b = Budget(max_usd=0.01, tool_prices={"web_search": 0.05}, warn_only=True)
+    with b:
+        # This would raise ToolBudgetExceededError without warn_only
+        b._check_tool_limit("web_search", "manual")
+
+
+def test_warn_only_default_is_false() -> None:
+    b = Budget(max_usd=0.001)
+    assert b.warn_only is False
+
+
+def test_warn_only_stored_on_budget() -> None:
+    b = Budget(max_usd=1.0, warn_only=True)
+    assert b.warn_only is True
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5dd263f..4e752d2 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -105,3 +105,12 @@ def test_models_filter_google(runner: CliRunner) -> None:
 def test_models_invalid_provider(runner: CliRunner) -> None:
     result = runner.invoke(cli, ["models", "--provider", "invalid"])
     assert result.exit_code != 0
+
+
+def test_models_no_results(runner: CliRunner) -> None:
+    from unittest.mock import patch
+
+    with patch("shekel._cli._PRICES", {}):
+        result = runner.invoke(cli, ["models"])
+    assert result.exit_code == 0
+    assert "No models found" in result.output
diff --git a/tests/test_cli_run.py b/tests/test_cli_run.py
new file mode 100644
index 0000000..6a6eb07
--- /dev/null
+++ b/tests/test_cli_run.py
@@ -0,0 +1,374 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from click.testing import CliRunner
+
+from shekel._cli import cli
+from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError
+
+
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+
+
+@pytest.fixture
+def script(tmp_path: Path):
+    """Helper to write a temp Python script and return its path."""
+
+    def _make(content: str, name: str = "agent.py") -> Path:
+        p = tmp_path / name
+        p.write_text(content)
+        return p
+
+    return _make
+
+
+# ---------------------------------------------------------------------------
+# Story 1: bare shekel run (no flags)
+# ---------------------------------------------------------------------------
+
+
+def test_run_executes_script_and_exits_zero(runner: CliRunner, script) -> None:
+    s = script("x = 1 + 1")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 0
+
+
+def test_run_prints_spend_summary_on_exit(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 0
+    assert "$" in result.output
+    assert "spent" in result.output
+
+
+def test_run_passes_script_args_via_argv(runner: CliRunner, script, tmp_path: Path) -> None:
+    out = tmp_path / "argv.txt"
+    s = script(f"import sys; open(r'{out}', 'w').write(' '.join(sys.argv))")
+    result = runner.invoke(cli, ["run", str(s), "arg1", "arg2"])
+    assert result.exit_code == 0
+    content = out.read_text()
+    assert "arg1" in content
+    assert "arg2" in content
+    # sys.argv[0] should be the script path, not the shekel binary
+    assert content.split()[0] == str(s)
+
+
+def test_run_script_not_found_exits_nonzero(runner: CliRunner) -> None:
+    result = runner.invoke(cli, ["run", "/nonexistent/agent.py"])
+    assert result.exit_code != 0
+    assert "not found" in result.output.lower()
+
+
+def test_run_script_nonzero_exit_propagated(runner: CliRunner, script) -> None:
+    s = script("import sys; sys.exit(42)")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 42
+
+
+def test_run_script_exit_zero_explicit(runner: CliRunner, script) -> None:
+    s = script("import sys; sys.exit(0)")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 0
+
+
+def test_run_script_exit_none_is_zero(runner: CliRunner, script) -> None:
+    s = script("import sys; sys.exit()")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 0
+
+
+def test_run_spend_summary_always_printed_on_nonzero_exit(runner: CliRunner, script) -> None:
+    s = script("import sys; sys.exit(1)")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 1
+    assert "$" in result.output
+
+
+# ---------------------------------------------------------------------------
+# Story 2: budget flags
+# ---------------------------------------------------------------------------
+
+
+def test_run_no_budget_flag_is_tracking_only(runner: CliRunner, script) -> None:
+    """Without --budget, the run succeeds even if LLM calls would exceed a limit."""
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 0
+
+
+def test_run_budget_exceeded_exits_one(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1"])
+    assert result.exit_code == 1
+
+
+def test_run_budget_exceeded_no_stacktrace(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1"])
+    full_output = result.output
+    assert "Traceback" not in full_output
+
+
+def test_run_budget_exceeded_shows_spent_and_limit(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1"])
+    full_output = result.output
+    assert "1.50" in full_output or "1.5" in full_output
+    assert "1.00" in full_output or "1.0" in full_output
+
+
+def test_run_budget_exceeded_shows_model(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1"])
+    full_output = result.output
+    assert "gpt-4o" in full_output
+
+
+def test_run_tool_budget_exceeded_clean_output(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = ToolBudgetExceededError(
+        tool_name="web_search",
+        calls_used=10,
+        calls_limit=10,
+        usd_spent=0.10,
+        usd_limit=None,
+    )
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--max-tool-calls", "10"])
+    assert result.exit_code == 1
+    full_output = result.output
+    assert "Traceback" not in full_output
+    assert "web_search" in full_output
+
+
+def test_run_warn_at_flag_accepted(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--warn-at", "0.8"])
+    assert result.exit_code == 0
+
+
+def test_run_max_llm_calls_flag_accepted(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--max-llm-calls", "10"])
+    assert result.exit_code == 0
+
+
+def test_run_max_tool_calls_flag_accepted(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--max-tool-calls", "50"])
+    assert result.exit_code == 0
+
+
+def test_run_fallback_model_flag_accepted(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--fallback-model", "gpt-4o-mini"])
+    assert result.exit_code == 0
+
+
+def test_run_fallback_at_default_is_point_eight(runner: CliRunner, script) -> None:
+    """--fallback-at defaults to 0.8 when not supplied."""
+    s = script("pass")
+    result = runner.invoke(
+        cli,
+        ["run", str(s), "--budget", "5", "--fallback-model", "gpt-4o-mini", "--fallback-at", "0.5"],
+    )
+    assert result.exit_code == 0
+
+
+# ---------------------------------------------------------------------------
+# Story 3: AGENT_BUDGET_USD env var
+# ---------------------------------------------------------------------------
+
+
+def test_run_env_var_sets_budget(runner: CliRunner, script) -> None:
+    s = script("pass")
+    with runner.isolated_filesystem():
+        result = runner.invoke(cli, ["run", str(s)], env={"AGENT_BUDGET_USD": "10.0"})
+    assert result.exit_code == 0
+
+
+def test_run_flag_overrides_env_var(runner: CliRunner, script) -> None:
+    """--budget flag takes precedence over AGENT_BUDGET_USD env var."""
+    s = script("pass")
+    exc = BudgetExceededError(spent=2.0, limit=1.0, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(
+            cli,
+            ["run", str(s), "--budget", "1"],
+            env={"AGENT_BUDGET_USD": "999"},
+        )
+    assert result.exit_code == 1
+
+
+def test_run_env_var_invalid_value_exits_with_error(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s)], env={"AGENT_BUDGET_USD": "notanumber"})
+    assert result.exit_code != 0
+    full_output = result.output
+    assert "AGENT_BUDGET_USD" in full_output
+
+
+def test_run_no_flag_no_env_is_tracking_only(runner: CliRunner, script, monkeypatch) -> None:
+    monkeypatch.delenv("AGENT_BUDGET_USD", raising=False)
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 0
+
+
+# ---------------------------------------------------------------------------
+# Story 4: provider detection message
+# ---------------------------------------------------------------------------
+
+
+def test_run_provider_detection_shown_when_providers_patched(runner: CliRunner, script) -> None:
+    s = script("pass")
+    with patch(
+        "shekel._run_utils.detect_patched_providers",
+        return_value=["anthropic", "openai"],
+    ):
+        result = runner.invoke(cli, ["run", str(s)])
+    assert "Patching:" in result.output
+    assert "openai" in result.output
+    assert "anthropic" in result.output
+
+
+def test_run_no_provider_detection_when_nothing_patched(runner: CliRunner, script) -> None:
+    s = script("pass")
+    with patch("shekel._run_utils.detect_patched_providers", return_value=[]):
+        result = runner.invoke(cli, ["run", str(s)])
+    assert "Patching:" not in result.output
+
+
+# ---------------------------------------------------------------------------
+# Story 5: zero calls intercepted warning
+# ---------------------------------------------------------------------------
+
+
+def test_run_zero_calls_with_budget_prints_warning(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--budget", "5"])
+    full_output = result.output
+    assert "0" in full_output and "intercept" in full_output.lower()
+
+
+def test_run_calls_made_no_zero_intercept_warning(runner: CliRunner, script, tmp_path) -> None:
+    """When calls are made, no zero-intercept warning should appear."""
+    s = script("pass")
+    # Simulate a budget that recorded calls by patching calls_used
+    from shekel._budget import Budget
+
+    original_init = Budget.__init__
+
+    def patched_init(self, **kwargs):
+        original_init(self, **kwargs)
+        self._calls_made = 1  # simulate a call was made
+
+    with patch.object(Budget, "__init__", patched_init):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "5"])
+    full_output = result.output
+    assert "0 LLM calls intercepted" not in full_output
+
+
+def test_run_no_budget_no_zero_intercept_warning(runner: CliRunner, script, monkeypatch) -> None:
+    monkeypatch.delenv("AGENT_BUDGET_USD", raising=False)
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s)])
+    full_output = result.output
+    assert "0 LLM calls intercepted" not in full_output
+
+
+# ---------------------------------------------------------------------------
+# Story 6: clean budget-exceeded output (attribution)
+# ---------------------------------------------------------------------------
+
+
+def test_run_budget_exceeded_message_attributes_to_agent(runner: CliRunner, script) -> None:
+    """The error message should show agent spend, not shekel error."""
+    s = script("pass")
+    exc = BudgetExceededError(
+        spent=3.14,
+        limit=2.00,
+        model="claude-3-5-sonnet",
+        tokens={"input": 1000, "output": 500},
+    )
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "2"])
+    full_output = result.output
+    assert "Budget exceeded" in full_output
+    assert "claude-3-5-sonnet" in full_output
+    assert "Traceback" not in full_output
+
+
+def test_run_spend_summary_printed_even_on_budget_exceeded(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1"])
+    assert "$" in result.output
+
+
+# ---------------------------------------------------------------------------
+# Story 7: naming / nesting
+# ---------------------------------------------------------------------------
+
+
+def test_run_nested_budget_in_script_works(runner: CliRunner, script) -> None:
+    """A script using with budget(name='inner') should not raise due to naming conflicts."""
+    s = script(
+        "from shekel import budget\n" "with budget(max_usd=1.0, name='inner'):\n" "    pass\n"
+    )
+    result = runner.invoke(cli, ["run", str(s), "--budget", "5"])
+    assert result.exit_code == 0
+
+
+def test_run_sys_argv_restored_after_run(runner: CliRunner, script) -> None:
+    original_argv = sys.argv[:]
+    s = script("pass")
+    runner.invoke(cli, ["run", str(s)])
+    # CliRunner isolates sys.argv, so we just confirm no crash
+    assert sys.argv == original_argv
+
+
+def test_run_script_exit_string_message_is_one(runner: CliRunner, script) -> None:
+    """sys.exit("error msg") — non-int, non-None code — should map to exit code 1."""
+    s = script('import sys; sys.exit("fatal error")')
+    result = runner.invoke(cli, ["run", str(s)])
+    assert result.exit_code == 1
+
+
+# ---------------------------------------------------------------------------
+# _run_utils unit tests
+# ---------------------------------------------------------------------------
+
+
+def test_format_spend_summary_with_model(runner: CliRunner, script, tmp_path: Path) -> None:
+    """format_spend_summary includes model name when calls were made."""
+    from shekel._budget import Budget
+    from shekel._run_utils import format_spend_summary
+
+    b = Budget(name="test")
+    # Simulate a recorded call by directly manipulating internal state
+    from shekel._budget import CallRecord
+
+    b._calls.append(CallRecord(model="gpt-4o", cost=0.05, input_tokens=100, output_tokens=50))
+    b._calls_made = 1
+    b._spent = 0.05
+    b._spent_direct = 0.05
+
+    summary = format_spend_summary(b)
+    assert "gpt-4o" in summary
+    assert "$0.0500" in summary
diff --git a/tests/test_cli_run_config.py b/tests/test_cli_run_config.py
new file mode 100644
index 0000000..01905fb
--- /dev/null
+++ b/tests/test_cli_run_config.py
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from click.testing import CliRunner
+
+from shekel._cli import cli
+from shekel.exceptions import BudgetExceededError
+
+
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+
+
+@pytest.fixture
+def script(tmp_path: Path):
+    def _make(content: str, name: str = "agent.py") -> Path:
+        p = tmp_path / name
+        p.write_text(content)
+        return p
+
+    return _make
+
+
+@pytest.fixture
+def budget_toml(tmp_path: Path):
+    def _make(content: str) -> Path:
+        p = tmp_path / "shekel.toml"
+        p.write_text(content)
+        return p
+
+    return _make
+
+
+# ---------------------------------------------------------------------------
+# Story 10: --budget-file shekel.toml
+# ---------------------------------------------------------------------------
+
+
+def test_budget_file_loaded_max_usd(runner: CliRunner, script, budget_toml) -> None:
+    s = script("pass")
+    cfg = budget_toml("[budget]\nmax_usd = 5.0\n")
+    result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)])
+    assert result.exit_code == 0
+
+
+def test_budget_file_loaded_warn_at(runner: CliRunner, script, budget_toml) -> None:
+    s = script("pass")
+    cfg = budget_toml("[budget]\nmax_usd = 5.0\nwarn_at = 0.8\n")
+    result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)])
+    assert result.exit_code == 0
+
+
+def test_budget_file_loaded_max_llm_calls(runner: CliRunner, script, budget_toml) -> None:
+    s = script("pass")
+    cfg = budget_toml("[budget]\nmax_llm_calls = 20\n")
+    result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)])
+    assert result.exit_code == 0
+
+
+def test_budget_file_loaded_max_tool_calls(runner: CliRunner, script, budget_toml) -> None:
+    s = script("pass")
+    cfg = budget_toml("[budget]\nmax_tool_calls = 50\n")
+    result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)])
+    assert result.exit_code == 0
+
+
+def test_budget_file_not_found_exits_with_error(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--budget-file", "/nonexistent/shekel.toml"])
+    assert result.exit_code != 0
+    assert "not found" in result.output.lower() or "no such" in result.output.lower()
+
+
+def test_budget_file_invalid_toml_exits_with_error(runner: CliRunner, script, budget_toml) -> None:
+    s = script("pass")
+    cfg = budget_toml("this is not [ valid toml !!!")
+    result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)])
+    assert result.exit_code != 0
+    assert "invalid" in result.output.lower() or "toml" in result.output.lower()
+
+
+def test_budget_flag_overrides_file_max_usd(runner: CliRunner, script, budget_toml) -> None:
+    """--budget CLI flag takes precedence over max_usd in config file."""
+    s = script("pass")
+    cfg = budget_toml("[budget]\nmax_usd = 10.0\n")
+    exc = BudgetExceededError(spent=2.0, limit=1.0, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--budget-file", str(cfg)])
+    # --budget 1 wins over file's max_usd=10; script exceeds $1 cap
+    assert result.exit_code == 1
+
+
+def test_budget_file_missing_budget_section_is_ok(runner: CliRunner, script, budget_toml) -> None:
+    """A TOML file with no [budget] section is valid — just no budget constraints."""
+    s = script("pass")
+    cfg = budget_toml("[other_section]\nfoo = 'bar'\n")
+    result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)])
+    assert result.exit_code == 0
+
+
+def test_budget_file_enforces_limit(runner: CliRunner, script, budget_toml) -> None:
+    """Budget from file is actually enforced."""
+    s = script("pass")
+    cfg = budget_toml("[budget]\nmax_usd = 1.0\n")
+    exc = BudgetExceededError(spent=2.0, limit=1.0, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)])
+    assert result.exit_code == 1
+
+
+# ---------------------------------------------------------------------------
+# load_budget_file unit tests (_run_config)
+# ---------------------------------------------------------------------------
+
+
+def test_load_budget_file_returns_empty_for_no_section(tmp_path: Path) -> None:
+    from shekel._run_config import load_budget_file
+
+    f = tmp_path / "shekel.toml"
+    f.write_text("[other]\nfoo = 1\n")
+    result = load_budget_file(str(f))
+    assert result == {}
+
+
+def test_load_budget_file_parses_all_keys(tmp_path: Path) -> None:
+    from shekel._run_config import load_budget_file
+
+    f = tmp_path / "shekel.toml"
+    f.write_text(
+        "[budget]\nmax_usd = 5.0\nwarn_at = 0.8\nmax_llm_calls = 20\nmax_tool_calls = 50\n"
+    )
+    result = load_budget_file(str(f))
+    assert result["max_usd"] == pytest.approx(5.0)
+    assert result["warn_at"] == pytest.approx(0.8)
+    assert result["max_llm_calls"] == 20
+    assert result["max_tool_calls"] == 50
+
+
+def test_load_budget_file_file_not_found_raises(tmp_path: Path) -> None:
+    from shekel._run_config import load_budget_file
+
+    with pytest.raises(FileNotFoundError):
+        load_budget_file(str(tmp_path / "missing.toml"))
diff --git a/tests/test_cli_run_output.py b/tests/test_cli_run_output.py
new file mode 100644
index 0000000..b0e85d7
--- /dev/null
+++ b/tests/test_cli_run_output.py
@@ -0,0 +1,198 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from click.testing import CliRunner
+
+from shekel._cli import cli
+from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError
+
+
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+
+
+@pytest.fixture
+def script(tmp_path: Path):
+    def _make(content: str, name: str = "agent.py") -> Path:
+        p = tmp_path / name
+        p.write_text(content)
+        return p
+
+    return _make
+
+
+# ---------------------------------------------------------------------------
+# Story 8: --output json
+# ---------------------------------------------------------------------------
+
+
+def test_output_json_emits_valid_json(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--output", "json"])
+    assert result.exit_code == 0
+    data = json.loads(result.output)
+    assert isinstance(data, dict)
+
+
+def test_output_json_contains_required_keys(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--output", "json"])
+    data = json.loads(result.output)
+    for key in ("spent", "limit", "calls", "tool_calls", "status"):
+        assert key in data, f"missing key: {key}"
+
+
+def test_output_json_ok_status_on_normal_exit(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--output", "json"])
+    data = json.loads(result.output)
+    assert data["status"] == "ok"
+    assert data["limit"] == pytest.approx(5.0)
+
+
+def test_output_json_no_limit_when_no_budget_flag(runner: CliRunner, script, monkeypatch) -> None:
+    monkeypatch.delenv("AGENT_BUDGET_USD", raising=False)
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--output", "json"])
+    data = json.loads(result.output)
+    assert data["limit"] is None
+
+
+def test_output_json_exceeded_status_on_budget_exceeded(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--output", "json"])
+    assert result.exit_code == 1
+    data = json.loads(result.output)
+    assert data["status"] == "exceeded"
+
+
+def test_output_json_hides_human_spend_summary(runner: CliRunner, script) -> None:
+    """In JSON mode, the human 'spent · calls' line should not appear."""
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--output", "json"])
+    assert "spent" not in result.output.replace('"spent"', "")  # only JSON key, not label
+
+
+def test_output_json_with_model_field(runner: CliRunner, script) -> None:
+    """When LLM calls are recorded, model field appears in JSON."""
+    s = script("pass")
+    from shekel._budget import Budget, CallRecord
+
+    original_init = Budget.__init__
+
+    def patched_init(self, **kwargs):
+        original_init(self, **kwargs)
+        self._calls.append(
+            CallRecord(model="claude-3-5-sonnet", cost=0.02, input_tokens=100, output_tokens=50)
+        )
+        self._calls_made = 1
+        self._spent = 0.02
+
+    with patch.object(Budget, "__init__", patched_init):
+        result = runner.invoke(cli, ["run", str(s), "--output", "json"])
+    data = json.loads(result.output)
+    assert data.get("model") == "claude-3-5-sonnet"
+
+
+# ---------------------------------------------------------------------------
+# Story 11: --warn-only
+# ---------------------------------------------------------------------------
+
+
+def test_warn_only_exits_zero_on_budget_exceeded(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--warn-only"])
+    assert result.exit_code == 0
+
+
+def test_warn_only_prints_warning_on_budget_exceeded(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--warn-only"])
+    assert "warn" in result.output.lower() or "limit" in result.output.lower()
+
+
+def test_warn_only_exits_zero_on_tool_budget_exceeded(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = ToolBudgetExceededError(
+        tool_name="web_search", calls_used=5, calls_limit=5, usd_spent=0.05, usd_limit=None
+    )
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--max-tool-calls", "5", "--warn-only"])
+    assert result.exit_code == 0
+
+
+def test_warn_only_json_status_exceeded(runner: CliRunner, script) -> None:
+    """In warn-only + json mode, status should reflect that budget was exceeded."""
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(
+            cli, ["run", str(s), "--budget", "1", "--warn-only", "--output", "json"]
+        )
+    assert result.exit_code == 0
+    data = json.loads(result.output)
+    assert data["status"] == "exceeded"
+
+
+# ---------------------------------------------------------------------------
+# Story 9: --dry-run
+# ---------------------------------------------------------------------------
+
+
+def test_dry_run_exits_zero(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--dry-run"])
+    assert result.exit_code == 0
+
+
+def test_dry_run_prints_dry_run_indicator(runner: CliRunner, script) -> None:
+    s = script("pass")
+    result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--dry-run"])
+    assert "dry-run" in result.output.lower() or "dry run" in result.output.lower()
+
+
+def test_dry_run_exits_zero_even_when_budget_would_be_exceeded(runner: CliRunner, script) -> None:
+    s = script("pass")
+    exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--dry-run"])
+    assert result.exit_code == 0
+
+
+def test_dry_run_implies_warn_only(runner: CliRunner, script) -> None:
+    """--dry-run + budget exceeded should not exit 1."""
+    s = script("pass")
+    exc = BudgetExceededError(spent=99.0, limit=1.00, model="gpt-4o")
+    with patch("runpy.run_path", side_effect=exc):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--dry-run"])
+    assert result.exit_code == 0
+
+
+def test_output_json_warn_status_when_warn_threshold_fired(runner: CliRunner, script) -> None:
+    """JSON status is 'warn' when warn_at threshold fired but budget not exceeded."""
+    import json as _json
+
+    s = script("pass")
+    from shekel._budget import Budget
+
+    original_init = Budget.__init__
+
+    def patched_init(self, **kwargs):
+        original_init(self, **kwargs)
+        self._warn_fired = True  # simulate warn threshold fired
+
+    with patch.object(Budget, "__init__", patched_init):
+        result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--output", "json"])
+    data = _json.loads(result.output)
+    assert data["status"] == "warn"

From a355c69b75ecd6f59ddeb3c8bf2eb95acaf56ec0 Mon Sep 17 00:00:00 2001
From: arieradle <arie.radle@gmail.com>
Date: Sun, 15 Mar 2026 16:37:35 +0200
Subject: [PATCH 2/4] style: black formatting fix on test_run_overhead.py

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/performance/test_run_overhead.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/performance/test_run_overhead.py b/tests/performance/test_run_overhead.py
index d76e82b..2ff4236 100644
--- a/tests/performance/test_run_overhead.py
+++ b/tests/performance/test_run_overhead.py
@@ -3,6 +3,7 @@
 Uses pytest-benchmark when available (CI), falls back to a wall-clock assertion
 so the test still passes in environments without the benchmark plugin.
 """
+
 from __future__ import annotations
 
 import time

From ad48f97cde558e98affc31197ce792e35a1e3efe Mon Sep 17 00:00:00 2001
From: arieradle <arie.radle@gmail.com>
Date: Sun, 15 Mar 2026 16:39:34 +0200
Subject: [PATCH 3/4] =?UTF-8?q?fix:=20rename=20ambiguous=20lambda=20param?=
 =?UTF-8?q?=20l=20=E2=86=92=20lim=20(ruff=20E741)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/test_budget_warn_only.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_budget_warn_only.py b/tests/test_budget_warn_only.py
index 0d78ead..bda530a 100644
--- a/tests/test_budget_warn_only.py
+++ b/tests/test_budget_warn_only.py
@@ -64,7 +64,7 @@ def test_warn_only_still_fires_warn_callback_when_exceeded() -> None:
     b = Budget(
         max_usd=1.0,
         warn_at=0.5,
-        on_warn=lambda s, l: fired.append((s, l)),
+        on_warn=lambda s, lim: fired.append((s, lim)),
         warn_only=True,
     )
     with b:

From b2c12f3d92e63e5e884bb75321bdbb3823f485ad Mon Sep 17 00:00:00 2001
From: arieradle <arie.radle@gmail.com>
Date: Sun, 15 Mar 2026 16:44:15 +0200
Subject: [PATCH 4/4] fix: use mypy override for tomli instead of per-line
 type: ignore

Avoids environment-dependent unused-ignore errors: CI has tomli installed
(no import-not-found), local dev does not. The [[tool.mypy.overrides]] for
tomli handles both cases without needing a fragile inline comment.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 pyproject.toml        | 4 ++++
 shekel/_run_config.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 629f4bb..9b43e77 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -142,6 +142,10 @@ warn_return_any = true
 warn_unused_configs = true
 exclude = ["/_pytest/"]
 
+[[tool.mypy.overrides]]
+module = "tomli"
+ignore_missing_imports = true
+
 [[tool.mypy.overrides]]
 module = "tokencost"
 ignore_missing_imports = true
diff --git a/shekel/_run_config.py b/shekel/_run_config.py
index d1f451e..9341cc6 100644
--- a/shekel/_run_config.py
+++ b/shekel/_run_config.py
@@ -28,7 +28,7 @@ def load_budget_file(path: str) -> dict[str, object]:
         import tomllib
     else:  # pragma: no cover
         try:
-            import tomli as tomllib  # type: ignore[import-not-found]
+            import tomli as tomllib  # noqa: F401
         except ImportError:
             raise SystemExit("shekel: --budget-file requires Python 3.11+ or: pip install tomli")