From b76845f66f69d22696bfefa87d983b30a8dc5850 Mon Sep 17 00:00:00 2001 From: arieradle Date: Sun, 15 Mar 2026 16:29:53 +0200 Subject: [PATCH 1/4] =?UTF-8?q?feat:=20shekel=20run=20=E2=80=94=20non-inva?= =?UTF-8?q?sive=20CLI=20budget=20enforcement=20(v0.2.9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `shekel run agent.py --budget 5` as a drop-in wrapper for any Python agent script: zero code changes required, exit 1 on budget exceeded (CI-friendly). New features: - `shekel run`: wraps scripts via runpy in-process so monkey-patches are active - `--budget / AGENT_BUDGET_USD`: USD cap with Docker/CI env-var support - `--warn-at`, `--max-llm-calls`, `--max-tool-calls`: full Budget param parity - `--output json`: machine-readable spend summary for log pipelines - `--warn-only`: log warning but never exit 1 (soft guardrail) - `--dry-run`: track costs only, implies --warn-only - `--budget-file shekel.toml`: operator-supplied TOML config - `Budget(warn_only=True)`: new parameter — suppresses raises, fires warn callback - `.github/actions/enforce/action.yml`: GitHub Actions composite action - `docs/docker.md`: Docker entrypoint patterns and shell script examples Tests: 85 new tests (TDD), 100% coverage on _cli.py, _run_utils.py, _run_config.py. Co-Authored-By: Claude Sonnet 4.6 --- .github/actions/enforce/action.yml | 86 ++++++ docs/cli.md | 85 +++++- docs/docker.md | 154 ++++++++++ pyproject.toml | 2 +- shekel/__init__.py | 2 +- shekel/_budget.py | 39 ++- shekel/_cli.py | 244 ++++++++++++++++ shekel/_run_config.py | 50 ++++ shekel/_run_utils.py | 56 ++++ tests/performance/test_run_overhead.py | 51 ++++ tests/test_budget_warn_only.py | 98 +++++++ tests/test_cli.py | 9 + tests/test_cli_run.py | 374 +++++++++++++++++++++++++ tests/test_cli_run_config.py | 147 ++++++++++ tests/test_cli_run_output.py | 198 +++++++++++++ 15 files changed, 1577 insertions(+), 18 deletions(-) create mode 100644 .github/actions/enforce/action.yml create mode 100644 docs/docker.md create mode 100644 shekel/_run_config.py create mode 100644 shekel/_run_utils.py create mode 100644 tests/performance/test_run_overhead.py create mode 100644 tests/test_budget_warn_only.py create mode 100644 tests/test_cli_run.py create mode 100644 tests/test_cli_run_config.py create mode 100644 tests/test_cli_run_output.py diff --git a/.github/actions/enforce/action.yml b/.github/actions/enforce/action.yml new file mode 100644 index 0000000..7e3ca75 --- /dev/null +++ b/.github/actions/enforce/action.yml @@ -0,0 +1,86 @@ +name: "shekel — LLM Budget Enforcement" +description: "Run a Python agent script with a USD budget cap. Exits 1 if the budget is exceeded." +author: "shekel" + +branding: + icon: "dollar-sign" + color: "green" + +inputs: + script: + description: "Path to the Python agent script to run" + required: true + budget: + description: "Maximum spend in USD (e.g. 5 for $5)" + required: false + default: "" + warn-at: + description: "Warn fraction 0.0–1.0 of budget (e.g. 0.8 for 80%)" + required: false + default: "" + max-llm-calls: + description: "Maximum number of LLM API calls" + required: false + default: "" + max-tool-calls: + description: "Maximum number of tool invocations" + required: false + default: "" + warn-only: + description: "If true, warn but do not exit 1 on budget exceeded" + required: false + default: "false" + output: + description: "Output format: text or json" + required: false + default: "text" + budget-file: + description: "Path to a TOML budget config file (shekel.toml)" + required: false + default: "" + shekel-version: + description: "shekel package version to install (e.g. '>=0.3.0')" + required: false + default: ">=0.3.0" + +outputs: + spent: + description: "Total USD spent (populated when --output json is used)" + status: + description: "Budget status: ok | warn | exceeded" + +runs: + using: "composite" + steps: + - name: Install shekel + shell: bash + run: pip install "shekel[cli]${{ inputs.shekel-version }}" --quiet + + - name: Run agent with budget enforcement + shell: bash + run: | + ARGS="${{ inputs.script }}" + + if [ -n "${{ inputs.budget }}" ]; then + ARGS="$ARGS --budget ${{ inputs.budget }}" + fi + if [ -n "${{ inputs.warn-at }}" ]; then + ARGS="$ARGS --warn-at ${{ inputs.warn-at }}" + fi + if [ -n "${{ inputs.max-llm-calls }}" ]; then + ARGS="$ARGS --max-llm-calls ${{ inputs.max-llm-calls }}" + fi + if [ -n "${{ inputs.max-tool-calls }}" ]; then + ARGS="$ARGS --max-tool-calls ${{ inputs.max-tool-calls }}" + fi + if [ "${{ inputs.warn-only }}" = "true" ]; then + ARGS="$ARGS --warn-only" + fi + if [ -n "${{ inputs.output }}" ]; then + ARGS="$ARGS --output ${{ inputs.output }}" + fi + if [ -n "${{ inputs.budget-file }}" ]; then + ARGS="$ARGS --budget-file ${{ inputs.budget-file }}" + fi + + shekel run $ARGS diff --git a/docs/cli.md b/docs/cli.md index d703e80..9eb5f9e 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1,6 +1,6 @@ # CLI Tools -Shekel provides command-line tools for cost estimation and model information. +Shekel provides command-line tools for budget enforcement, cost estimation, and model information. ## Installation @@ -12,6 +12,88 @@ This installs the `shekel` command with Click support. ## Commands +### `shekel run` + +Run a Python script with budget enforcement. Equivalent to wrapping your script +in `with budget(max_usd=N):` — zero code changes required. + +#### Usage + +```bash +shekel run SCRIPT [OPTIONS] [-- SCRIPT_ARGS...] +``` + +#### Options + +| Option | Description | +|--------|-------------| +| `--budget N` | Max spend in USD. Equivalent to `AGENT_BUDGET_USD=N`. | +| `--warn-at F` | Warn fraction 0.0–1.0 (e.g. `0.8` = warn at 80% of budget). | +| `--max-llm-calls N` | Cap on LLM API calls. | +| `--max-tool-calls N` | Cap on tool invocations. | +| `--warn-only` | Warn but never exit 1 when budget exceeded. | +| `--dry-run` | Track costs only — no enforcement. Implies `--warn-only`. | +| `--output text\|json` | Output format (default: `text`). | +| `--budget-file PATH` | Path to a `shekel.toml` config file. | +| `--fallback-model M` | Cheaper model to switch to at threshold. | +| `--fallback-at F` | Fallback activation threshold (default: `0.8`). | + +#### Exit codes + +| Code | Meaning | +|------|---------| +| `0` | Script completed within budget | +| `1` | Budget exceeded (unless `--warn-only`) | +| `2` | Configuration error (missing script, bad TOML, etc.) | + +#### Environment variables + +| Variable | Description | +|----------|-------------| +| `AGENT_BUDGET_USD` | Fallback for `--budget`. Ideal for Docker/CI operator control. | + +#### Examples + +```bash +# Enforce a $5 cap +shekel run agent.py --budget 5 + +# Warn at 80%, hard-stop at $5 +shekel run agent.py --budget 5 --warn-at 0.8 + +# Cap LLM calls instead of spend +shekel run agent.py --max-llm-calls 20 + +# JSON output for CI log parsing +shekel run agent.py --budget 5 --output json + +# Warn but don't fail the pipeline +shekel run agent.py --budget 5 --warn-only + +# Dry-run: track costs without enforcement +shekel run agent.py --budget 5 --dry-run + +# Load limits from TOML file +shekel run agent.py --budget-file shekel.toml + +# Set budget via env var (Docker / CI) +AGENT_BUDGET_USD=5 shekel run agent.py +``` + +#### `shekel.toml` format + +```toml +[budget] +max_usd = 5.0 +warn_at = 0.8 +max_llm_calls = 50 +max_tool_calls = 200 +``` + +See [Docker & Container Guardrails](docker.md) for container-specific patterns. + +--- + ### `shekel estimate` Estimate API call costs without making actual requests. @@ -244,6 +326,7 @@ print(f"Available models: {models}") ## Next Steps +- [Docker & Container Guardrails](docker.md) - Using `shekel run` in Docker - [Supported Models](models.md) - Full model list with pricing - [Installation](installation.md) - Installing CLI tools - [Basic Usage](usage/basic-usage.md) - Using budgets in code diff --git a/docs/docker.md b/docs/docker.md new file mode 100644 index 0000000..e9740c0 --- /dev/null +++ b/docs/docker.md @@ -0,0 +1,154 @@ +# Docker & Container Guardrails + +Use `shekel run` as an entrypoint wrapper to enforce LLM cost limits on any agent +running inside a Docker container — zero code changes required. + +## Quick start + +```dockerfile +FROM python:3.12-slim + +WORKDIR /app + +# Install your agent and shekel CLI +COPY requirements.txt . +RUN pip install -r requirements.txt shekel[cli] + +COPY agent.py . + +# shekel run becomes the entrypoint; AGENT_BUDGET_USD sets the cap at runtime +ENTRYPOINT ["shekel", "run", "agent.py"] +``` + +Run with a $5 cap: + +```bash +docker run -e AGENT_BUDGET_USD=5 my-agent-image +``` + +The container exits with code 1 if the budget is exceeded, so your orchestration +layer (ECS, Kubernetes, Compose) can detect it as a failed task. + +--- + +## Patterns + +### Budget via environment variable + +The `AGENT_BUDGET_USD` env var is equivalent to `--budget N`. This is the +preferred pattern for containers because the budget can be set by the operator +without rebuilding the image. + +```bash +# docker run +docker run -e AGENT_BUDGET_USD=10 my-agent-image + +# docker-compose +services: + agent: + image: my-agent-image + environment: + AGENT_BUDGET_USD: "10" +``` + +### Budget via CLI flag (baked into image) + +```dockerfile +ENTRYPOINT ["shekel", "run", "agent.py", "--budget", "5"] +``` + +### TOML config file + +Mount a `shekel.toml` at runtime for fine-grained control: + +```bash +docker run -v $(pwd)/shekel.toml:/app/shekel.toml \ + my-agent-image shekel run agent.py --budget-file /app/shekel.toml +``` + +```toml +# shekel.toml +[budget] +max_usd = 5.0 +warn_at = 0.8 +max_llm_calls = 50 +max_tool_calls = 200 +``` + +### Warn-only mode (log but don't kill) + +```dockerfile +ENTRYPOINT ["shekel", "run", "agent.py", "--warn-only"] +``` + +With `--warn-only`, the container exits 0 even if the budget is exceeded. +Use this during development to observe spend without blocking the run. + +### JSON output for structured logging + +```bash +docker run my-agent-image shekel run agent.py --budget 5 --output json \ + | tee /logs/spend.json +``` + +The JSON line emitted at the end: + +```json +{ + "spent": 1.23, + "limit": 5.0, + "calls": 12, + "tool_calls": 4, + "status": "ok", + "model": "gpt-4o" +} +``` + +--- + +## Exit codes + +| Code | Meaning | +|------|---------| +| `0` | Script completed within budget (or `--warn-only` mode) | +| `1` | Budget exceeded (default mode) | +| `2` | Configuration error (missing script, bad TOML, etc.) | + +--- + +## Shell script wrapper + +For non-Docker environments (e.g. bare VMs, `.sh` CI scripts): + +```bash +#!/usr/bin/env bash +set -euo pipefail + +BUDGET="${AGENT_BUDGET_USD:-5}" + +shekel run agent.py \ + --budget "$BUDGET" \ + --warn-at 0.8 \ + --output json \ + | tee spend.json + +status=$(jq -r '.status' spend.json) +if [ "$status" = "exceeded" ]; then + echo "Budget exceeded — check spend.json for details" >&2 + exit 1 +fi +``` + +--- + +## GitHub Actions + +See the [CLI reference](cli.md) or use the bundled composite action: + +```yaml +- uses: ./.github/actions/enforce + with: + script: agent.py + budget: "5" + warn-at: "0.8" +``` diff --git a/pyproject.toml b/pyproject.toml index 3f825c2..629f4bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "shekel" -version = "0.2.8" +version = "0.2.9" description = "LLM budget enforcement and cost tracking. Zero config — with budget(max_usd=1.00): run_agent(). Works with LangGraph, CrewAI, raw OpenAI/Anthropic/Gemini." readme = "README.md" license = { file = "LICENSE" } diff --git a/shekel/__init__.py b/shekel/__init__.py index 6995929..828c6ee 100644 --- a/shekel/__init__.py +++ b/shekel/__init__.py @@ -7,7 +7,7 @@ from shekel._tool import tool from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError -__version__ = "0.2.8" +__version__ = "0.2.9" __all__ = [ "budget", "Budget", diff --git a/shekel/_budget.py b/shekel/_budget.py index 67fbd51..c44edf0 100644 --- a/shekel/_budget.py +++ b/shekel/_budget.py @@ -74,6 +74,7 @@ def __init__( max_llm_calls: int | None = None, max_tool_calls: int | None = None, tool_prices: dict[str, float] | None = None, + warn_only: bool = False, ) -> None: if max_usd is not None and max_usd <= 0: @@ -146,6 +147,7 @@ def __init__( self.price_per_1k_tokens = price_per_1k_tokens self.fallback: dict[str, Any] | None = fallback self.on_fallback = on_fallback + self.warn_only: bool = warn_only # --- Nested budget support (v0.2.3) --- self.name: str | None = name @@ -596,8 +598,11 @@ def _check_limit(self) -> None: return # Fallback just activated — keep running on cheaper model if budget_exceeded and (self.fallback is None or self._using_fallback): - # No fallback available, or already on fallback and still exceeded — raise + # No fallback available, or already on fallback and still exceeded self._emit_budget_exceeded_event() + if self.warn_only: + self._check_warn() # fire warning callback if threshold set + return raise BudgetExceededError( self._spent, effective_limit, self._last_model, self._last_tokens ) @@ -623,6 +628,8 @@ def _check_call_limit(self) -> None: if self._calls_made > effective_call_limit: self._emit_budget_exceeded_event() + if self.warn_only: + return raise BudgetExceededError( self._calls_made, effective_call_limit, @@ -643,20 +650,7 @@ def _check_tool_limit(self, tool_name: str, framework: str) -> None: limit = self._effective_tool_call_limit if limit is not None and self._tool_calls_made >= limit: self._emit_tool_budget_exceeded_event(tool_name, framework) - raise ToolBudgetExceededError( - tool_name=tool_name, - calls_used=self._tool_calls_made, - calls_limit=limit, - usd_spent=self._tool_spent, - usd_limit=self.max_usd, - framework=framework, - ) - - # Also check USD limit if tool_prices configured for this tool - if self.max_usd is not None and self.tool_prices is not None: - price = self.tool_prices.get(tool_name) - if price is not None and self._tool_spent + price > self.max_usd: - self._emit_tool_budget_exceeded_event(tool_name, framework) + if not self.warn_only: raise ToolBudgetExceededError( tool_name=tool_name, calls_used=self._tool_calls_made, @@ -666,6 +660,21 @@ def _check_tool_limit(self, tool_name: str, framework: str) -> None: framework=framework, ) + # Also check USD limit if tool_prices configured for this tool + if self.max_usd is not None and self.tool_prices is not None: + price = self.tool_prices.get(tool_name) + if price is not None and self._tool_spent + price > self.max_usd: + self._emit_tool_budget_exceeded_event(tool_name, framework) + if not self.warn_only: + raise ToolBudgetExceededError( + tool_name=tool_name, + calls_used=self._tool_calls_made, + calls_limit=limit, + usd_spent=self._tool_spent, + usd_limit=self.max_usd, + framework=framework, + ) + def _record_tool_call(self, tool_name: str, cost: float, framework: str) -> None: """Post-dispatch: record the tool call and emit events.""" self._tool_calls_made += 1 diff --git a/shekel/_cli.py b/shekel/_cli.py index bbb2375..4144a81 100644 --- a/shekel/_cli.py +++ b/shekel/_cli.py @@ -1,5 +1,9 @@ from __future__ import annotations +import os +import runpy +import sys + try: import click except ImportError: # pragma: no cover @@ -59,3 +63,243 @@ def models(provider: str | None) -> None: click.echo("-" * len(header)) for name, inp, out in rows: click.echo(f"{name:<{col1}} ${inp:>11.6f} ${out:>11.6f}") + + +@cli.command(context_settings={"ignore_unknown_options": True, "allow_extra_args": True}) +@click.argument("script") +@click.argument("args", nargs=-1, type=click.UNPROCESSED) +@click.option( + "--budget", "max_usd", type=float, default=None, help="Max spend in USD (maps to max_usd)." +) +@click.option( + "--warn-at", type=float, default=None, help="Warn fraction 0.0–1.0 (maps to warn_at)." +) +@click.option( + "--max-llm-calls", type=int, default=None, help="Cap on LLM API calls (maps to max_llm_calls)." +) +@click.option( + "--max-tool-calls", + type=int, + default=None, + help="Cap on tool invocations (maps to max_tool_calls).", +) +@click.option( + "--fallback-model", + type=str, + default=None, + help="Fallback model name (maps to fallback['model']).", +) +@click.option( + "--fallback-at", + type=float, + default=0.8, + show_default=True, + help="Fallback activation threshold 0.0–1.0 (maps to fallback['at_pct']).", +) +@click.option( + "--output", + type=click.Choice(["text", "json"]), + default="text", + show_default=True, + help="Output format.", +) +@click.option( + "--warn-only", + is_flag=True, + default=False, + help="Never exit 1; warn but continue when budget exceeded.", +) +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Track costs without enforcement. Implies --warn-only.", +) +@click.option( + "--budget-file", type=str, default=None, help="Path to TOML budget config file (shekel.toml)." +) +def run( + script: str, + args: tuple[str, ...], + max_usd: float | None, + warn_at: float | None, + max_llm_calls: int | None, + max_tool_calls: int | None, + fallback_model: str | None, + fallback_at: float, + output: str, + warn_only: bool, + dry_run: bool, + budget_file: str | None, +) -> None: + """Run a Python script with budget enforcement. No code changes required. + + Equivalent to wrapping your script in ``with budget(max_usd=N):``. + Exits with code 1 if the budget is exceeded (CI-friendly). + + \b + Examples: + shekel run agent.py --budget 5 + shekel run agent.py --budget 5 --warn-at 0.8 + shekel run agent.py --max-llm-calls 20 + shekel run agent.py --budget 5 --output json + shekel run agent.py --budget 5 --warn-only + shekel run agent.py --budget 5 --dry-run + shekel run agent.py --budget-file shekel.toml + AGENT_BUDGET_USD=5 shekel run agent.py + """ + import json as _json + + from shekel import budget as make_budget + from shekel._run_utils import detect_patched_providers, format_spend_summary + from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError + + # --dry-run implies --warn-only + if dry_run: + warn_only = True + + # Load budget file if specified (CLI flags take precedence) + file_kwargs: dict[str, object] = {} + if budget_file is not None: + from shekel._run_config import load_budget_file + + try: + file_kwargs = load_budget_file(budget_file) + except FileNotFoundError: + click.echo(f"shekel: budget file not found: {budget_file}", err=True) + sys.exit(2) + except Exception as exc: + click.echo(f"shekel: invalid budget file — {exc}", err=True) + sys.exit(2) + + # Env var fallback for --budget + if max_usd is None: + env_val = os.environ.get("AGENT_BUDGET_USD") + if env_val is not None: + try: + max_usd = float(env_val) + except ValueError: + click.echo( + f"shekel: invalid AGENT_BUDGET_USD={env_val!r} — must be a number", + err=True, + ) + sys.exit(1) + + # Build budget kwargs: file values first, then explicit CLI flags override + budget_kwargs: dict[str, object] = {"name": "shekel-run", **file_kwargs} + if max_usd is not None: + budget_kwargs["max_usd"] = max_usd + if warn_at is not None: + budget_kwargs["warn_at"] = warn_at + if max_llm_calls is not None: + budget_kwargs["max_llm_calls"] = max_llm_calls + if max_tool_calls is not None: + budget_kwargs["max_tool_calls"] = max_tool_calls + if fallback_model is not None: + budget_kwargs["fallback"] = {"model": fallback_model, "at_pct": fallback_at} + if warn_only: + budget_kwargs["warn_only"] = True + + has_limit = ( + budget_kwargs.get("max_usd") is not None + or budget_kwargs.get("max_llm_calls") is not None + or budget_kwargs.get("max_tool_calls") is not None + ) + + original_argv = sys.argv[:] + sys.argv = [script, *args] + + script_exit_code = 0 + exceeded = False + b = make_budget(**budget_kwargs) # type: ignore[arg-type] + try: + with b: + if dry_run and output == "text": + click.echo("[dry-run] cost tracking only — budget limits will not be enforced") + if output == "text": + providers = detect_patched_providers() + if providers: + click.echo(f"Patching: {', '.join(providers)}") + runpy.run_path(script, run_name="__main__") + except BudgetExceededError as exc: + exceeded = True + if output == "text": + if warn_only: + click.echo( + f"⚠ Budget limit reached (warn-only): {exc.model}" + f" · ${exc.spent:.4f} / ${exc.limit:.2f}", + err=True, + ) + else: + click.echo( + f"✗ Budget exceeded: {exc.model} · {b.calls_used} calls" + f" · ${exc.spent:.4f} / ${exc.limit:.2f}", + err=True, + ) + if not warn_only: + script_exit_code = 1 + except ToolBudgetExceededError as exc: + exceeded = True + if output == "text": + limit_str = str(exc.calls_limit) if exc.calls_limit is not None else "∞" + if warn_only: + click.echo( + f"⚠ Tool limit reached (warn-only): {exc.tool_name}" + f" · {exc.calls_used}/{limit_str} calls", + err=True, + ) + else: + click.echo( + f"✗ Tool budget exceeded: {exc.tool_name}" + f" · {exc.calls_used}/{limit_str} calls", + err=True, + ) + if not warn_only: + script_exit_code = 1 + except FileNotFoundError: + click.echo(f"shekel: script not found: {script}", err=True) + script_exit_code = 2 + except SystemExit as exc: + code = exc.code + if isinstance(code, int): + script_exit_code = code + elif code is None: + script_exit_code = 0 + else: + script_exit_code = 1 + finally: + sys.argv = original_argv + # Determine status for both text and JSON output + if exceeded or (has_limit and b.max_usd is not None and b.spent > b.max_usd): + status = "exceeded" + elif b._warn_fired: + status = "warn" + else: + status = "ok" + + if output == "json": + data = b.summary_data() + by_model: dict[str, object] = data["by_model"] # type: ignore[assignment] + json_out: dict[str, object] = { + "spent": data["total_spent"], + "limit": data["limit"], + "calls": data["calls_used"], + "tool_calls": data["tool_calls_used"], + "status": status, + } + if by_model: + top_model = max( + by_model.items(), + key=lambda kv: kv[1]["calls"], # type: ignore[index] + )[0] + json_out["model"] = top_model + click.echo(_json.dumps(json_out)) + else: + click.echo(format_spend_summary(b)) + if has_limit and b.calls_used == 0 and b.tool_calls_used == 0: + click.echo( + "Warning: 0 LLM calls intercepted — budget may not be enforced.", + err=True, + ) + + sys.exit(script_exit_code) diff --git a/shekel/_run_config.py b/shekel/_run_config.py new file mode 100644 index 0000000..d1f451e --- /dev/null +++ b/shekel/_run_config.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import sys +from typing import Any + + +def load_budget_file(path: str) -> dict[str, object]: + """Parse a TOML budget config file and return budget kwargs. + + Supports the following keys under ``[budget]``: + + .. code-block:: toml + + [budget] + max_usd = 5.0 + warn_at = 0.8 + max_llm_calls = 20 + max_tool_calls = 50 + + Requires Python 3.11+ (uses stdlib ``tomllib``) or the optional + ``tomli`` package for Python 3.9/3.10. + + Raises: + FileNotFoundError: If *path* does not exist. + SystemExit: If ``tomllib``/``tomli`` is unavailable on Python < 3.11. + """ + if sys.version_info >= (3, 11): + import tomllib + else: # pragma: no cover + try: + import tomli as tomllib # type: ignore[import-not-found] + except ImportError: + raise SystemExit("shekel: --budget-file requires Python 3.11+ or: pip install tomli") + + with open(path, "rb") as f: + data = tomllib.load(f) + + section: dict[str, Any] = data.get("budget", {}) + kwargs: dict[str, object] = {} + + if "max_usd" in section: + kwargs["max_usd"] = float(section["max_usd"]) + if "warn_at" in section: + kwargs["warn_at"] = float(section["warn_at"]) + if "max_llm_calls" in section: + kwargs["max_llm_calls"] = int(section["max_llm_calls"]) + if "max_tool_calls" in section: + kwargs["max_tool_calls"] = int(section["max_tool_calls"]) + + return kwargs diff --git a/shekel/_run_utils.py b/shekel/_run_utils.py new file mode 100644 index 0000000..5bb5a9a --- /dev/null +++ b/shekel/_run_utils.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from shekel._budget import Budget + +# Maps _patch._originals key prefixes to human-readable provider names. +_KEY_PREFIX_TO_PROVIDER: dict[str, str] = { + "openai": "openai", + "anthropic": "anthropic", + "litellm": "litellm", + "gemini": "gemini", + "huggingface": "huggingface", + "langchain": "langchain", + "mcp": "mcp", + "crewai": "crewai", + "openai_agents": "openai-agents", +} + + +def detect_patched_providers() -> list[str]: + """Return sorted list of provider names whose patches are currently active. + + Must be called after budget.__enter__() (i.e. inside ``with budget():``). + """ + import shekel._patch as _patch_module + + seen: set[str] = set() + for key in _patch_module._originals: + for prefix, provider in _KEY_PREFIX_TO_PROVIDER.items(): + if key.startswith(prefix): + seen.add(provider) + return sorted(seen) + + +def format_spend_summary(b: Budget) -> str: + """Return a compact one-line spend summary for CLI output.""" + data = b.summary_data() + spent: float = float(data["total_spent"]) # type: ignore[arg-type] + calls: int = int(data["calls_used"]) # type: ignore[call-overload] + limit: float | None = data["limit"] # type: ignore[assignment] + + by_model: dict[str, object] = data["by_model"] # type: ignore[assignment] + model_part = "" + if by_model: + top_model = max(by_model.items(), key=lambda kv: kv[1]["calls"])[0] # type: ignore[index] + model_part = f" · {top_model}" + + if limit is not None: + pct = (spent / limit * 100) if limit > 0 else 0.0 + limit_part = f" / ${limit:.2f} ({pct:.0f}%)" + else: + limit_part = "" + + return f"${spent:.4f} spent{limit_part} · {calls} calls{model_part}" diff --git a/tests/performance/test_run_overhead.py b/tests/performance/test_run_overhead.py new file mode 100644 index 0000000..d76e82b --- /dev/null +++ b/tests/performance/test_run_overhead.py @@ -0,0 +1,51 @@ +"""Performance test: shekel run overhead should be sub-100 ms for a no-op script. + +Uses pytest-benchmark when available (CI), falls back to a wall-clock assertion +so the test still passes in environments without the benchmark plugin. +""" +from __future__ import annotations + +import time +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from shekel._cli import cli + + +@pytest.fixture +def noop_script(tmp_path: Path) -> Path: + p = tmp_path / "noop.py" + p.write_text("pass") + return p + + +def _run_once(noop_script: Path) -> float: + runner = CliRunner() + t0 = time.perf_counter() + result = runner.invoke(cli, ["run", str(noop_script)]) + elapsed = time.perf_counter() - t0 + assert result.exit_code == 0 + return elapsed + + +def test_run_overhead_under_100ms(noop_script: Path) -> None: + """shekel run on a no-op script must complete in under 100 ms (wall clock).""" + # Warm-up: first call may pay import costs + _run_once(noop_script) + # Measure: take the minimum of 5 runs to reduce noise + times = [_run_once(noop_script) for _ in range(5)] + best = min(times) + assert best < 0.1, f"shekel run overhead too high: best={best*1000:.1f} ms (limit 100 ms)" + + +def test_run_overhead_benchmark(benchmark: pytest.fixture, noop_script: Path) -> None: # type: ignore[type-arg] + """Benchmark version — only runs when pytest-benchmark is active.""" + runner = CliRunner() + + def _invoke() -> None: + result = runner.invoke(cli, ["run", str(noop_script)]) + assert result.exit_code == 0 + + benchmark(_invoke) diff --git a/tests/test_budget_warn_only.py b/tests/test_budget_warn_only.py new file mode 100644 index 0000000..0d78ead --- /dev/null +++ b/tests/test_budget_warn_only.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import pytest + +from shekel._budget import Budget +from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError + +# --------------------------------------------------------------------------- +# Story 11: Budget(warn_only=True) — enforce silently, never raise +# --------------------------------------------------------------------------- + + +def test_warn_only_does_not_raise_on_usd_exceeded() -> None: + b = Budget(max_usd=0.001, warn_only=True) + with b: + b._record_spend(1.0, "gpt-4o", {"input": 100, "output": 50}) + # No BudgetExceededError raised + + +def test_warn_only_false_still_raises_on_usd_exceeded() -> None: + b = Budget(max_usd=0.001, warn_only=False) + with pytest.raises(BudgetExceededError): + with b: + b._record_spend(1.0, "gpt-4o", {"input": 100, "output": 50}) + + +def test_warn_only_does_not_raise_on_call_limit_exceeded() -> None: + b = Budget(max_usd=10.0, max_llm_calls=1, warn_only=True) + with b: + # First call — within limit + b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5}) + # Second call — exceeds max_llm_calls=1, but warn_only so no raise + b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5}) + + +def test_warn_only_false_raises_on_call_limit_exceeded() -> None: + b = Budget(max_usd=10.0, max_llm_calls=1, warn_only=False) + with pytest.raises(BudgetExceededError): + with b: + b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5}) + b._record_spend(0.001, "gpt-4o", {"input": 10, "output": 5}) + + +def test_warn_only_does_not_raise_on_tool_limit_exceeded() -> None: + b = Budget(max_tool_calls=1, warn_only=True) + with b: + b._check_tool_limit("web_search", "manual") + b._record_tool_call("web_search", 0.0, "manual") + # Second call — exceeds max_tool_calls=1, but warn_only so no raise + b._check_tool_limit("web_search", "manual") + + +def test_warn_only_false_raises_on_tool_limit_exceeded() -> None: + b = Budget(max_tool_calls=1, warn_only=False) + with pytest.raises(ToolBudgetExceededError): + with b: + b._check_tool_limit("web_search", "manual") + b._record_tool_call("web_search", 0.0, "manual") + b._check_tool_limit("web_search", "manual") + + +def test_warn_only_still_fires_warn_callback_when_exceeded() -> None: + fired: list[tuple[float, float]] = [] + b = Budget( + max_usd=1.0, + warn_at=0.5, + on_warn=lambda s, l: fired.append((s, l)), + warn_only=True, + ) + with b: + b._record_spend(2.0, "gpt-4o", {"input": 100, "output": 50}) + assert len(fired) == 1 + assert fired[0][1] == 1.0 # limit + + +def test_warn_only_spent_is_tracked_correctly() -> None: + b = Budget(max_usd=0.001, warn_only=True) + with b: + b._record_spend(1.5, "gpt-4o", {"input": 100, "output": 50}) + assert b.spent == pytest.approx(1.5) + + +def test_warn_only_tool_usd_limit_does_not_raise() -> None: + """Tool USD limit check also respects warn_only.""" + b = Budget(max_usd=0.01, tool_prices={"web_search": 0.05}, warn_only=True) + with b: + # This would raise ToolBudgetExceededError without warn_only + b._check_tool_limit("web_search", "manual") + + +def test_warn_only_default_is_false() -> None: + b = Budget(max_usd=0.001) + assert b.warn_only is False + + +def test_warn_only_stored_on_budget() -> None: + b = Budget(max_usd=1.0, warn_only=True) + assert b.warn_only is True diff --git a/tests/test_cli.py b/tests/test_cli.py index 5dd263f..4e752d2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -105,3 +105,12 @@ def test_models_filter_google(runner: CliRunner) -> None: def test_models_invalid_provider(runner: CliRunner) -> None: result = runner.invoke(cli, ["models", "--provider", "invalid"]) assert result.exit_code != 0 + + +def test_models_no_results(runner: CliRunner) -> None: + from unittest.mock import patch + + with patch("shekel._cli._PRICES", {}): + result = runner.invoke(cli, ["models"]) + assert result.exit_code == 0 + assert "No models found" in result.output diff --git a/tests/test_cli_run.py b/tests/test_cli_run.py new file mode 100644 index 0000000..6a6eb07 --- /dev/null +++ b/tests/test_cli_run.py @@ -0,0 +1,374 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import patch + +import pytest +from click.testing import CliRunner + +from shekel._cli import cli +from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def script(tmp_path: Path): + """Helper to write a temp Python script and return its path.""" + + def _make(content: str, name: str = "agent.py") -> Path: + p = tmp_path / name + p.write_text(content) + return p + + return _make + + +# --------------------------------------------------------------------------- +# Story 1: bare shekel run (no flags) +# --------------------------------------------------------------------------- + + +def test_run_executes_script_and_exits_zero(runner: CliRunner, script) -> None: + s = script("x = 1 + 1") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 0 + + +def test_run_prints_spend_summary_on_exit(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 0 + assert "$" in result.output + assert "spent" in result.output + + +def test_run_passes_script_args_via_argv(runner: CliRunner, script, tmp_path: Path) -> None: + out = tmp_path / "argv.txt" + s = script(f"import sys; open(r'{out}', 'w').write(' '.join(sys.argv))") + result = runner.invoke(cli, ["run", str(s), "arg1", "arg2"]) + assert result.exit_code == 0 + content = out.read_text() + assert "arg1" in content + assert "arg2" in content + # sys.argv[0] should be the script path, not the shekel binary + assert content.split()[0] == str(s) + + +def test_run_script_not_found_exits_nonzero(runner: CliRunner) -> None: + result = runner.invoke(cli, ["run", "/nonexistent/agent.py"]) + assert result.exit_code != 0 + assert "not found" in result.output.lower() + + +def test_run_script_nonzero_exit_propagated(runner: CliRunner, script) -> None: + s = script("import sys; sys.exit(42)") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 42 + + +def test_run_script_exit_zero_explicit(runner: CliRunner, script) -> None: + s = script("import sys; sys.exit(0)") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 0 + + +def test_run_script_exit_none_is_zero(runner: CliRunner, script) -> None: + s = script("import sys; sys.exit()") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 0 + + +def test_run_spend_summary_always_printed_on_nonzero_exit(runner: CliRunner, script) -> None: + s = script("import sys; sys.exit(1)") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 1 + assert "$" in result.output + + +# --------------------------------------------------------------------------- +# Story 2: budget flags +# --------------------------------------------------------------------------- + + +def test_run_no_budget_flag_is_tracking_only(runner: CliRunner, script) -> None: + """Without --budget, the run succeeds even if LLM calls would exceed a limit.""" + s = script("pass") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 0 + + +def test_run_budget_exceeded_exits_one(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1"]) + assert result.exit_code == 1 + + +def test_run_budget_exceeded_no_stacktrace(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1"]) + full_output = result.output + assert "Traceback" not in full_output + + +def test_run_budget_exceeded_shows_spent_and_limit(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1"]) + full_output = result.output + assert "1.50" in full_output or "1.5" in full_output + assert "1.00" in full_output or "1.0" in full_output + + +def test_run_budget_exceeded_shows_model(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1"]) + full_output = result.output + assert "gpt-4o" in full_output + + +def test_run_tool_budget_exceeded_clean_output(runner: CliRunner, script) -> None: + s = script("pass") + exc = ToolBudgetExceededError( + tool_name="web_search", + calls_used=10, + calls_limit=10, + usd_spent=0.10, + usd_limit=None, + ) + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--max-tool-calls", "10"]) + assert result.exit_code == 1 + full_output = result.output + assert "Traceback" not in full_output + assert "web_search" in full_output + + +def test_run_warn_at_flag_accepted(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--warn-at", "0.8"]) + assert result.exit_code == 0 + + +def test_run_max_llm_calls_flag_accepted(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--max-llm-calls", "10"]) + assert result.exit_code == 0 + + +def test_run_max_tool_calls_flag_accepted(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--max-tool-calls", "50"]) + assert result.exit_code == 0 + + +def test_run_fallback_model_flag_accepted(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--fallback-model", "gpt-4o-mini"]) + assert result.exit_code == 0 + + +def test_run_fallback_at_default_is_point_eight(runner: CliRunner, script) -> None: + """--fallback-at defaults to 0.8 when not supplied.""" + s = script("pass") + result = runner.invoke( + cli, + ["run", str(s), "--budget", "5", "--fallback-model", "gpt-4o-mini", "--fallback-at", "0.5"], + ) + assert result.exit_code == 0 + + +# --------------------------------------------------------------------------- +# Story 3: AGENT_BUDGET_USD env var +# --------------------------------------------------------------------------- + + +def test_run_env_var_sets_budget(runner: CliRunner, script) -> None: + s = script("pass") + with runner.isolated_filesystem(): + result = runner.invoke(cli, ["run", str(s)], env={"AGENT_BUDGET_USD": "10.0"}) + assert result.exit_code == 0 + + +def test_run_flag_overrides_env_var(runner: CliRunner, script) -> None: + """--budget flag takes precedence over AGENT_BUDGET_USD env var.""" + s = script("pass") + exc = BudgetExceededError(spent=2.0, limit=1.0, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke( + cli, + ["run", str(s), "--budget", "1"], + env={"AGENT_BUDGET_USD": "999"}, + ) + assert result.exit_code == 1 + + +def test_run_env_var_invalid_value_exits_with_error(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s)], env={"AGENT_BUDGET_USD": "notanumber"}) + assert result.exit_code != 0 + full_output = result.output + assert "AGENT_BUDGET_USD" in full_output + + +def test_run_no_flag_no_env_is_tracking_only(runner: CliRunner, script, monkeypatch) -> None: + monkeypatch.delenv("AGENT_BUDGET_USD", raising=False) + s = script("pass") + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 0 + + +# --------------------------------------------------------------------------- +# Story 4: provider detection message +# --------------------------------------------------------------------------- + + +def test_run_provider_detection_shown_when_providers_patched(runner: CliRunner, script) -> None: + s = script("pass") + with patch( + "shekel._run_utils.detect_patched_providers", + return_value=["anthropic", "openai"], + ): + result = runner.invoke(cli, ["run", str(s)]) + assert "Patching:" in result.output + assert "openai" in result.output + assert "anthropic" in result.output + + +def test_run_no_provider_detection_when_nothing_patched(runner: CliRunner, script) -> None: + s = script("pass") + with patch("shekel._run_utils.detect_patched_providers", return_value=[]): + result = runner.invoke(cli, ["run", str(s)]) + assert "Patching:" not in result.output + + +# --------------------------------------------------------------------------- +# Story 5: zero calls intercepted warning +# --------------------------------------------------------------------------- + + +def test_run_zero_calls_with_budget_prints_warning(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--budget", "5"]) + full_output = result.output + assert "0" in full_output and "intercept" in full_output.lower() + + +def test_run_calls_made_no_zero_intercept_warning(runner: CliRunner, script, tmp_path) -> None: + """When calls are made, no zero-intercept warning should appear.""" + s = script("pass") + # Simulate a budget that recorded calls by patching calls_used + from shekel._budget import Budget + + original_init = Budget.__init__ + + def patched_init(self, **kwargs): + original_init(self, **kwargs) + self._calls_made = 1 # simulate a call was made + + with patch.object(Budget, "__init__", patched_init): + result = runner.invoke(cli, ["run", str(s), "--budget", "5"]) + full_output = result.output + assert "0 LLM calls intercepted" not in full_output + + +def test_run_no_budget_no_zero_intercept_warning(runner: CliRunner, script, monkeypatch) -> None: + monkeypatch.delenv("AGENT_BUDGET_USD", raising=False) + s = script("pass") + result = runner.invoke(cli, ["run", str(s)]) + full_output = result.output + assert "0 LLM calls intercepted" not in full_output + + +# --------------------------------------------------------------------------- +# Story 6: clean budget-exceeded output (attribution) +# --------------------------------------------------------------------------- + + +def test_run_budget_exceeded_message_attributes_to_agent(runner: CliRunner, script) -> None: + """The error message should show agent spend, not shekel error.""" + s = script("pass") + exc = BudgetExceededError( + spent=3.14, + limit=2.00, + model="claude-3-5-sonnet", + tokens={"input": 1000, "output": 500}, + ) + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "2"]) + full_output = result.output + assert "Budget exceeded" in full_output + assert "claude-3-5-sonnet" in full_output + assert "Traceback" not in full_output + + +def test_run_spend_summary_printed_even_on_budget_exceeded(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1"]) + assert "$" in result.output + + +# --------------------------------------------------------------------------- +# Story 7: naming / nesting +# --------------------------------------------------------------------------- + + +def test_run_nested_budget_in_script_works(runner: CliRunner, script) -> None: + """A script using with budget(name='inner') should not raise due to naming conflicts.""" + s = script( + "from shekel import budget\n" "with budget(max_usd=1.0, name='inner'):\n" " pass\n" + ) + result = runner.invoke(cli, ["run", str(s), "--budget", "5"]) + assert result.exit_code == 0 + + +def test_run_sys_argv_restored_after_run(runner: CliRunner, script) -> None: + original_argv = sys.argv[:] + s = script("pass") + runner.invoke(cli, ["run", str(s)]) + # CliRunner isolates sys.argv, so we just confirm no crash + assert sys.argv == original_argv + + +def test_run_script_exit_string_message_is_one(runner: CliRunner, script) -> None: + """sys.exit("error msg") — non-int, non-None code — should map to exit code 1.""" + s = script('import sys; sys.exit("fatal error")') + result = runner.invoke(cli, ["run", str(s)]) + assert result.exit_code == 1 + + +# --------------------------------------------------------------------------- +# _run_utils unit tests +# --------------------------------------------------------------------------- + + +def test_format_spend_summary_with_model(runner: CliRunner, script, tmp_path: Path) -> None: + """format_spend_summary includes model name when calls were made.""" + from shekel._budget import Budget + from shekel._run_utils import format_spend_summary + + b = Budget(name="test") + # Simulate a recorded call by directly manipulating internal state + from shekel._budget import CallRecord + + b._calls.append(CallRecord(model="gpt-4o", cost=0.05, input_tokens=100, output_tokens=50)) + b._calls_made = 1 + b._spent = 0.05 + b._spent_direct = 0.05 + + summary = format_spend_summary(b) + assert "gpt-4o" in summary + assert "$0.0500" in summary diff --git a/tests/test_cli_run_config.py b/tests/test_cli_run_config.py new file mode 100644 index 0000000..01905fb --- /dev/null +++ b/tests/test_cli_run_config.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +import pytest +from click.testing import CliRunner + +from shekel._cli import cli +from shekel.exceptions import BudgetExceededError + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def script(tmp_path: Path): + def _make(content: str, name: str = "agent.py") -> Path: + p = tmp_path / name + p.write_text(content) + return p + + return _make + + +@pytest.fixture +def budget_toml(tmp_path: Path): + def _make(content: str) -> Path: + p = tmp_path / "shekel.toml" + p.write_text(content) + return p + + return _make + + +# --------------------------------------------------------------------------- +# Story 10: --budget-file shekel.toml +# --------------------------------------------------------------------------- + + +def test_budget_file_loaded_max_usd(runner: CliRunner, script, budget_toml) -> None: + s = script("pass") + cfg = budget_toml("[budget]\nmax_usd = 5.0\n") + result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)]) + assert result.exit_code == 0 + + +def test_budget_file_loaded_warn_at(runner: CliRunner, script, budget_toml) -> None: + s = script("pass") + cfg = budget_toml("[budget]\nmax_usd = 5.0\nwarn_at = 0.8\n") + result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)]) + assert result.exit_code == 0 + + +def test_budget_file_loaded_max_llm_calls(runner: CliRunner, script, budget_toml) -> None: + s = script("pass") + cfg = budget_toml("[budget]\nmax_llm_calls = 20\n") + result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)]) + assert result.exit_code == 0 + + +def test_budget_file_loaded_max_tool_calls(runner: CliRunner, script, budget_toml) -> None: + s = script("pass") + cfg = budget_toml("[budget]\nmax_tool_calls = 50\n") + result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)]) + assert result.exit_code == 0 + + +def test_budget_file_not_found_exits_with_error(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--budget-file", "/nonexistent/shekel.toml"]) + assert result.exit_code != 0 + assert "not found" in result.output.lower() or "no such" in result.output.lower() + + +def test_budget_file_invalid_toml_exits_with_error(runner: CliRunner, script, budget_toml) -> None: + s = script("pass") + cfg = budget_toml("this is not [ valid toml !!!") + result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)]) + assert result.exit_code != 0 + assert "invalid" in result.output.lower() or "toml" in result.output.lower() + + +def test_budget_flag_overrides_file_max_usd(runner: CliRunner, script, budget_toml) -> None: + """--budget CLI flag takes precedence over max_usd in config file.""" + s = script("pass") + cfg = budget_toml("[budget]\nmax_usd = 10.0\n") + exc = BudgetExceededError(spent=2.0, limit=1.0, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--budget-file", str(cfg)]) + # --budget 1 wins over file's max_usd=10; script exceeds $1 cap + assert result.exit_code == 1 + + +def test_budget_file_missing_budget_section_is_ok(runner: CliRunner, script, budget_toml) -> None: + """A TOML file with no [budget] section is valid — just no budget constraints.""" + s = script("pass") + cfg = budget_toml("[other_section]\nfoo = 'bar'\n") + result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)]) + assert result.exit_code == 0 + + +def test_budget_file_enforces_limit(runner: CliRunner, script, budget_toml) -> None: + """Budget from file is actually enforced.""" + s = script("pass") + cfg = budget_toml("[budget]\nmax_usd = 1.0\n") + exc = BudgetExceededError(spent=2.0, limit=1.0, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget-file", str(cfg)]) + assert result.exit_code == 1 + + +# --------------------------------------------------------------------------- +# load_budget_file unit tests (_run_config) +# --------------------------------------------------------------------------- + + +def test_load_budget_file_returns_empty_for_no_section(tmp_path: Path) -> None: + from shekel._run_config import load_budget_file + + f = tmp_path / "shekel.toml" + f.write_text("[other]\nfoo = 1\n") + result = load_budget_file(str(f)) + assert result == {} + + +def test_load_budget_file_parses_all_keys(tmp_path: Path) -> None: + from shekel._run_config import load_budget_file + + f = tmp_path / "shekel.toml" + f.write_text( + "[budget]\nmax_usd = 5.0\nwarn_at = 0.8\nmax_llm_calls = 20\nmax_tool_calls = 50\n" + ) + result = load_budget_file(str(f)) + assert result["max_usd"] == pytest.approx(5.0) + assert result["warn_at"] == pytest.approx(0.8) + assert result["max_llm_calls"] == 20 + assert result["max_tool_calls"] == 50 + + +def test_load_budget_file_file_not_found_raises(tmp_path: Path) -> None: + from shekel._run_config import load_budget_file + + with pytest.raises(FileNotFoundError): + load_budget_file(str(tmp_path / "missing.toml")) diff --git a/tests/test_cli_run_output.py b/tests/test_cli_run_output.py new file mode 100644 index 0000000..b0e85d7 --- /dev/null +++ b/tests/test_cli_run_output.py @@ -0,0 +1,198 @@ +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import patch + +import pytest +from click.testing import CliRunner + +from shekel._cli import cli +from shekel.exceptions import BudgetExceededError, ToolBudgetExceededError + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def script(tmp_path: Path): + def _make(content: str, name: str = "agent.py") -> Path: + p = tmp_path / name + p.write_text(content) + return p + + return _make + + +# --------------------------------------------------------------------------- +# Story 8: --output json +# --------------------------------------------------------------------------- + + +def test_output_json_emits_valid_json(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--output", "json"]) + assert result.exit_code == 0 + data = json.loads(result.output) + assert isinstance(data, dict) + + +def test_output_json_contains_required_keys(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--output", "json"]) + data = json.loads(result.output) + for key in ("spent", "limit", "calls", "tool_calls", "status"): + assert key in data, f"missing key: {key}" + + +def test_output_json_ok_status_on_normal_exit(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--output", "json"]) + data = json.loads(result.output) + assert data["status"] == "ok" + assert data["limit"] == pytest.approx(5.0) + + +def test_output_json_no_limit_when_no_budget_flag(runner: CliRunner, script, monkeypatch) -> None: + monkeypatch.delenv("AGENT_BUDGET_USD", raising=False) + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--output", "json"]) + data = json.loads(result.output) + assert data["limit"] is None + + +def test_output_json_exceeded_status_on_budget_exceeded(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--output", "json"]) + assert result.exit_code == 1 + data = json.loads(result.output) + assert data["status"] == "exceeded" + + +def test_output_json_hides_human_spend_summary(runner: CliRunner, script) -> None: + """In JSON mode, the human 'spent · calls' line should not appear.""" + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--output", "json"]) + assert "spent" not in result.output.replace('"spent"', "") # only JSON key, not label + + +def test_output_json_with_model_field(runner: CliRunner, script) -> None: + """When LLM calls are recorded, model field appears in JSON.""" + s = script("pass") + from shekel._budget import Budget, CallRecord + + original_init = Budget.__init__ + + def patched_init(self, **kwargs): + original_init(self, **kwargs) + self._calls.append( + CallRecord(model="claude-3-5-sonnet", cost=0.02, input_tokens=100, output_tokens=50) + ) + self._calls_made = 1 + self._spent = 0.02 + + with patch.object(Budget, "__init__", patched_init): + result = runner.invoke(cli, ["run", str(s), "--output", "json"]) + data = json.loads(result.output) + assert data.get("model") == "claude-3-5-sonnet" + + +# --------------------------------------------------------------------------- +# Story 11: --warn-only +# --------------------------------------------------------------------------- + + +def test_warn_only_exits_zero_on_budget_exceeded(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--warn-only"]) + assert result.exit_code == 0 + + +def test_warn_only_prints_warning_on_budget_exceeded(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--warn-only"]) + assert "warn" in result.output.lower() or "limit" in result.output.lower() + + +def test_warn_only_exits_zero_on_tool_budget_exceeded(runner: CliRunner, script) -> None: + s = script("pass") + exc = ToolBudgetExceededError( + tool_name="web_search", calls_used=5, calls_limit=5, usd_spent=0.05, usd_limit=None + ) + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--max-tool-calls", "5", "--warn-only"]) + assert result.exit_code == 0 + + +def test_warn_only_json_status_exceeded(runner: CliRunner, script) -> None: + """In warn-only + json mode, status should reflect that budget was exceeded.""" + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke( + cli, ["run", str(s), "--budget", "1", "--warn-only", "--output", "json"] + ) + assert result.exit_code == 0 + data = json.loads(result.output) + assert data["status"] == "exceeded" + + +# --------------------------------------------------------------------------- +# Story 9: --dry-run +# --------------------------------------------------------------------------- + + +def test_dry_run_exits_zero(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--dry-run"]) + assert result.exit_code == 0 + + +def test_dry_run_prints_dry_run_indicator(runner: CliRunner, script) -> None: + s = script("pass") + result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--dry-run"]) + assert "dry-run" in result.output.lower() or "dry run" in result.output.lower() + + +def test_dry_run_exits_zero_even_when_budget_would_be_exceeded(runner: CliRunner, script) -> None: + s = script("pass") + exc = BudgetExceededError(spent=1.50, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--dry-run"]) + assert result.exit_code == 0 + + +def test_dry_run_implies_warn_only(runner: CliRunner, script) -> None: + """--dry-run + budget exceeded should not exit 1.""" + s = script("pass") + exc = BudgetExceededError(spent=99.0, limit=1.00, model="gpt-4o") + with patch("runpy.run_path", side_effect=exc): + result = runner.invoke(cli, ["run", str(s), "--budget", "1", "--dry-run"]) + assert result.exit_code == 0 + + +def test_output_json_warn_status_when_warn_threshold_fired(runner: CliRunner, script) -> None: + """JSON status is 'warn' when warn_at threshold fired but budget not exceeded.""" + import json as _json + + s = script("pass") + from shekel._budget import Budget + + original_init = Budget.__init__ + + def patched_init(self, **kwargs): + original_init(self, **kwargs) + self._warn_fired = True # simulate warn threshold fired + + with patch.object(Budget, "__init__", patched_init): + result = runner.invoke(cli, ["run", str(s), "--budget", "5", "--output", "json"]) + data = _json.loads(result.output) + assert data["status"] == "warn" From a355c69b75ecd6f59ddeb3c8bf2eb95acaf56ec0 Mon Sep 17 00:00:00 2001 From: arieradle Date: Sun, 15 Mar 2026 16:37:35 +0200 Subject: [PATCH 2/4] style: black formatting fix on test_run_overhead.py Co-Authored-By: Claude Sonnet 4.6 --- tests/performance/test_run_overhead.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/performance/test_run_overhead.py b/tests/performance/test_run_overhead.py index d76e82b..2ff4236 100644 --- a/tests/performance/test_run_overhead.py +++ b/tests/performance/test_run_overhead.py @@ -3,6 +3,7 @@ Uses pytest-benchmark when available (CI), falls back to a wall-clock assertion so the test still passes in environments without the benchmark plugin. """ + from __future__ import annotations import time From ad48f97cde558e98affc31197ce792e35a1e3efe Mon Sep 17 00:00:00 2001 From: arieradle Date: Sun, 15 Mar 2026 16:39:34 +0200 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20rename=20ambiguous=20lambda=20param?= =?UTF-8?q?=20l=20=E2=86=92=20lim=20(ruff=20E741)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- tests/test_budget_warn_only.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_budget_warn_only.py b/tests/test_budget_warn_only.py index 0d78ead..bda530a 100644 --- a/tests/test_budget_warn_only.py +++ b/tests/test_budget_warn_only.py @@ -64,7 +64,7 @@ def test_warn_only_still_fires_warn_callback_when_exceeded() -> None: b = Budget( max_usd=1.0, warn_at=0.5, - on_warn=lambda s, l: fired.append((s, l)), + on_warn=lambda s, lim: fired.append((s, lim)), warn_only=True, ) with b: From b2c12f3d92e63e5e884bb75321bdbb3823f485ad Mon Sep 17 00:00:00 2001 From: arieradle Date: Sun, 15 Mar 2026 16:44:15 +0200 Subject: [PATCH 4/4] fix: use mypy override for tomli instead of per-line type: ignore Avoids environment-dependent unused-ignore errors: CI has tomli installed (no import-not-found), local dev does not. The [[tool.mypy.overrides]] for tomli handles both cases without needing a fragile inline comment. Co-Authored-By: Claude Sonnet 4.6 --- pyproject.toml | 4 ++++ shekel/_run_config.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 629f4bb..9b43e77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -142,6 +142,10 @@ warn_return_any = true warn_unused_configs = true exclude = ["/_pytest/"] +[[tool.mypy.overrides]] +module = "tomli" +ignore_missing_imports = true + [[tool.mypy.overrides]] module = "tokencost" ignore_missing_imports = true diff --git a/shekel/_run_config.py b/shekel/_run_config.py index d1f451e..9341cc6 100644 --- a/shekel/_run_config.py +++ b/shekel/_run_config.py @@ -28,7 +28,7 @@ def load_budget_file(path: str) -> dict[str, object]: import tomllib else: # pragma: no cover try: - import tomli as tomllib # type: ignore[import-not-found] + import tomli as tomllib # noqa: F401 except ImportError: raise SystemExit("shekel: --budget-file requires Python 3.11+ or: pip install tomli")