diff --git a/CLAUDE.md b/CLAUDE.md
index a2f719c..8989137 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,76 +1,104 @@
 # NullBoiler
 
-DAG-based workflow orchestrator for NullClaw AI bot agents. Part of the Null ecosystem (NullTracker, NullClaw).
+Graph-based workflow orchestrator with unified state model for NullClaw AI bot agents. Part of the Null ecosystem (NullTracker, NullClaw).
 
 ## Tech Stack
 
 - **Language**: Zig 0.15.2
 - **Database**: SQLite (vendored in `deps/sqlite/`), WAL mode
 - **Protocol**: HTTP/1.1 REST API with JSON payloads
-- **Dispatch**: HTTP (webhook/api_chat/openai_chat), MQTT, Redis Streams
+- **Dispatch**: HTTP (webhook/api_chat/openai_chat/a2a), MQTT, Redis Streams
 - **Vendored C libs**: SQLite (`deps/sqlite/`), hiredis (`deps/hiredis/`), libmosquitto (`deps/mosquitto/`)
 
 ## Module Map
 
 | File | Role |
 |------|------|
-| `main.zig` | CLI args (`--port`, `--db`, `--config`, `--version`), HTTP accept loop, engine thread, tracker thread |
-| `api.zig` | REST API routing and 19 endpoint handlers (incl. signal, chat, tracker status) |
-| `store.zig` | SQLite layer, 30+ CRUD methods, schema migrations |
-| `engine.zig` | DAG scheduler: tick loop, 14 step type handlers, graph cycles, worker handoff |
-| `dispatch.zig` | Worker selection (tags, capacity), protocol-aware dispatch (`webhook`, `api_chat`, `openai_chat`, `mqtt`, `redis_stream`) |
+| `main.zig` | CLI args (`--port`, `--db`, `--config`, `--version`, `--export-manifest`, `--from-json`), HTTP accept loop, engine thread, tracker thread |
+| `api.zig` | REST API routing and 30+ endpoint handlers (runs, workers, workflows, checkpoints, state, SSE stream, tracker) |
+| `store.zig` | SQLite layer, CRUD methods for all tables, schema migrations (4 migration files) |
+| `engine.zig` | Graph-based state scheduler: tick loop, 7 node type handlers, checkpoints, reducers, goto, breakpoints, deferred nodes, reconciliation |
+| `state.zig` | Unified state model: 7 reducer types (last_value, append, merge, add, min, max, add_messages), overwrite bypass, ephemeral keys, state path resolution |
+| `sse.zig` | Server-Sent Events hub: per-run event queues, 5 stream modes (values, updates, tasks, debug, custom) |
+| `dispatch.zig` | Worker selection (tags, capacity, A2A preference), protocol-aware dispatch |
 | `async_dispatch.zig` | Thread-safe response queue for async MQTT/Redis dispatch (keyed by correlation_id) |
 | `redis_client.zig` | Hiredis wrapper: connect, XADD, listener thread for response streams |
 | `mqtt_client.zig` | Libmosquitto wrapper: connect, publish, subscribe, listener thread for response topics |
-| `templates.zig` | Prompt template rendering: `{{input.X}}`, `{{steps.ID.output}}`, `{{item}}`, `{{task.X}}`, `{{debate_responses}}`, `{{chat_history}}`, `{{role}}` |
+| `templates.zig` | Prompt template rendering: state-based `{{state.X}}`, legacy `{{input.X}}`, `{{item}}`, `{{task.X}}`, `{{attempt}}`, conditional blocks |
 | `callbacks.zig` | Fire-and-forget webhook callbacks on step/run events |
 | `config.zig` | JSON config loader (`Config`, `WorkerConfig`, `EngineConfig`, `TrackerConfig`) |
-| `types.zig` | `RunStatus`, `StepStatus`, `StepType` (14 types), `WorkerStatus`, `TrackerTaskState`, row types |
+| `types.zig` | `RunStatus`, `StepStatus`, `StepType` (7 types), `WorkerStatus`, `ReducerType`, row types |
 | `tracker.zig` | Pull-mode tracker thread: poll NullTickets, claim tasks, heartbeat leases, stall detection |
 | `tracker_client.zig` | HTTP client for NullTickets API (claim, heartbeat, transition, fail, artifacts) |
 | `workspace.zig` | Workspace lifecycle: create, hook execution, cleanup, path sanitization |
 | `subprocess.zig` | NullClaw subprocess: spawn, health check, prompt sending, kill |
-| `workflow_loader.zig` | Load JSON workflow definitions from `workflows/` directory |
+| `workflow_loader.zig` | Load JSON workflow definitions from `workflows/` directory, hot-reload watcher |
+| `workflow_validation.zig` | Graph-based workflow validation: reachability, cycles, state key refs, route/send targets |
 | `ids.zig` | UUID v4 generation, `nowMs()` |
-| `migrations/001_init.sql` | 6 tables: workers, runs, steps, step_deps, events, artifacts |
-| `migrations/002_advanced_steps.sql` | 3 tables: cycle_state, chat_messages, saga_state + ALTER TABLE |
+| `metrics.zig` | Prometheus-style metrics counters |
+| `strategy.zig` | Pluggable strategy map for workflow execution |
+| `worker_protocol.zig` | Protocol-specific request body builders |
+| `worker_response.zig` | Protocol-specific response parsers |
+| `export_manifest.zig` | Export tool manifest for CLI integration |
+| `from_json.zig` | Import workflow from JSON CLI command |
 
 ## Build / Test / Run
 
 ```sh
 zig build              # build
-zig build test         # unit tests
+zig build test         # unit tests (320 tests)
 zig build && bash tests/test_e2e.sh   # e2e tests (requires Python 3 for mock workers)
 ./zig-out/bin/nullboiler --port 8080 --db nullboiler.db --config config.json
 ```
 
+## Step Types (7)
+
+`task`, `route`, `interrupt`, `agent`, `send`, `transform`, `subgraph`
+
+## Reducers (7)
+
+`last_value`, `append`, `merge`, `add`, `min`, `max`, `add_messages`
+
 ## API Endpoints
 
 | Method | Path | Description |
 |--------|------|-------------|
 | GET | `/health` | Health check |
+| GET | `/metrics` | Prometheus metrics |
 | POST | `/workers` | Register worker |
 | GET | `/workers` | List workers |
 | DELETE | `/workers/{id}` | Remove worker |
-| POST | `/runs` | Create workflow run |
-| GET | `/runs` | List runs |
+| POST | `/runs` | Create workflow run (legacy step-array or graph format) |
+| GET | `/runs` | List runs (supports ?status= filter) |
 | GET | `/runs/{id}` | Get run details |
 | POST | `/runs/{id}/cancel` | Cancel run |
 | POST | `/runs/{id}/retry` | Retry failed run |
+| POST | `/runs/{id}/resume` | Resume interrupted run (with optional state updates) |
+| POST | `/runs/{id}/state` | Inject state into running run (pending injection) |
+| POST | `/runs/{id}/replay` | Replay run from a checkpoint |
+| POST | `/runs/fork` | Fork run from a checkpoint into a new run |
 | GET | `/runs/{id}/steps` | List steps for run |
 | GET | `/runs/{id}/steps/{step_id}` | Get step details |
-| POST | `/runs/{id}/steps/{step_id}/approve` | Approve approval step |
-| POST | `/runs/{id}/steps/{step_id}/reject` | Reject approval step |
 | GET | `/runs/{id}/events` | List run events |
-| POST | `/runs/{id}/steps/{step_id}/signal` | Signal a waiting step |
-| GET | `/runs/{id}/steps/{step_id}/chat` | Get group_chat transcript |
-| GET | `/tracker/status` | Pull-mode tracker status (running tasks, concurrency, counters) |
+| GET | `/runs/{id}/checkpoints` | List checkpoints for run |
+| GET | `/runs/{id}/checkpoints/{cpId}` | Get checkpoint details |
+| GET | `/runs/{id}/stream` | SSE stream (supports ?mode=values\|updates\|tasks\|debug) |
+| POST | `/workflows` | Create workflow definition |
+| GET | `/workflows` | List workflow definitions |
+| GET | `/workflows/{id}` | Get workflow definition |
+| PUT | `/workflows/{id}` | Update workflow definition |
+| DELETE | `/workflows/{id}` | Delete workflow definition |
+| POST | `/workflows/{id}/validate` | Validate workflow definition |
+| GET | `/workflows/{id}/mermaid` | Export workflow as Mermaid diagram |
+| POST | `/workflows/{id}/run` | Start a run from a stored workflow |
+| GET | `/rate-limits` | Get current rate limit info per worker |
+| POST | `/admin/drain` | Enable drain mode |
+| GET | `/tracker/status` | Pull-mode tracker status |
 | GET | `/tracker/tasks` | List running pull-mode tasks |
 | GET | `/tracker/tasks/{task_id}` | Get single pull-mode task details |
-
-## Step Types
-
-`task`, `fan_out`, `map`, `condition`, `approval`, `reduce`, `loop`, `sub_workflow`, `wait`, `router`, `transform`, `saga`, `debate`, `group_chat`
+| GET | `/tracker/stats` | Tracker statistics |
+| POST | `/tracker/refresh` | Force tracker poll |
+| POST | `/internal/agent-events/{run_id}/{step_id}` | Agent event callback (from NullClaw) |
 
 ## Coding Conventions
 
@@ -83,16 +111,47 @@ zig build && bash tests/test_e2e.sh   # e2e tests (requires Python 3 for mock wo
 
 ## Architecture
 
-- Single-threaded HTTP accept loop on main thread
-- Background engine thread polls DB for active runs (+ polls async response queue for MQTT/Redis steps)
-- `std.atomic.Value(bool)` for coordinated shutdown
-- Config workers seeded into DB on startup (source = "config")
-- Schema in `migrations/001_init.sql` + `002_advanced_steps.sql`, applied on `Store.init`
-- Graph cycles: condition/router can route back to completed steps, engine creates new step instances per iteration
-- Worker handoff: dispatch result can include `handoff_to` for chained delegation (max 5)
-- Async dispatch: MQTT/Redis workers use two-phase dispatch (publish → engine polls response queue)
-- Background listener threads (MQTT/Redis) started conditionally when async workers are configured
-- Pull-mode tracker thread (conditional): polls NullTickets for tasks, claims work, manages subprocess lifecycles
+- **Unified state model**: Every node reads from state, returns partial updates, engine applies reducers
+- **Graph-based execution**: Workflow = `{nodes: {}, edges: [], state_schema: {}}` with `__start__` and `__end__` synthetic nodes
+- **Checkpoints**: State snapshot after every node, enabling fork/replay/resume
+- **Conditional edges**: Route nodes produce values, edges like `["router:yes", "next"]` are taken when route result matches
+- **Deferred nodes**: Nodes with `"defer": true` execute right before `__end__`
+- **Command primitive**: Workers can return `{"goto": "node_name"}` to override normal graph traversal
+- **Breakpoints**: `interrupt_before` / `interrupt_after` arrays pause execution
+- **Subgraph**: Inline child workflow execution with input/output mapping (max recursion depth 10)
+- **Multi-turn agents**: Agent nodes can loop with `continuation_prompt` up to `max_turns`
+- **Configurable runs**: Per-run config stored as `state.__config`
+- **Node-level cache**: FNV hash of (node_name, rendered_prompt) with configurable TTL
+- **Token accounting**: Cumulative input/output token tracking per step and per run
+- **Workflow hot-reload**: `WorkflowWatcher` polls `workflows/` directory for JSON changes, upserts into DB
+- **Reconciliation**: Check NullTickets task status between steps, cancel if task is terminal
+
+### Thread Model
+
+```
+Main thread:       HTTP accept loop (push API)
+Engine thread:     Graph tick loop (state-based scheduler)
+Tracker thread:    Poll NullTickets -> claim -> workspace -> subprocess/dispatch
+MQTT listener:     (conditional, for async MQTT workers)
+Redis listener:    (conditional, for async Redis workers)
+```
+
+### SSE Streaming
+
+5 modes for real-time consumption via `GET /runs/{id}/stream?mode=X`:
+- `values` -- full state after each step
+- `updates` -- node name + partial state updates
+- `tasks` -- task start/finish with metadata
+- `debug` -- everything with step number + timestamp
+- `custom` -- user-defined events from worker output (`ui_messages`, `stream_messages`)
+
+## Database
+
+SQLite with WAL mode. Schema across 4 migrations:
+- `001_init.sql`: workers, runs, steps, step_deps, events, artifacts
+- `002_advanced_steps.sql`: cycle_state, chat_messages, saga_state (legacy, unused by current engine)
+- `003_tracker.sql`: tracker_runs
+- `004_orchestration.sql`: workflows, checkpoints, agent_events, pending_state_injections, node_cache, pending_writes + ALTER TABLE extensions for state_json, config_json, parent_run_id, token accounting
 
 ## Pull-Mode (NullTickets Integration)
 
@@ -131,27 +190,3 @@ Optional pull-mode where NullBoiler acts as an agent polling NullTickets for wor
 ```
 
 If `tracker` is absent or null, the tracker thread does not start and push-mode operates unchanged.
-
-### Workflow Definitions
-
-JSON files in `workflows/` directory. Two execution modes:
-- `subprocess` — spawn NullClaw child process per task (isolated workspace)
-- `dispatch` — use existing registered workers (no workspace)
-
-Three-axis concurrency: global (`max_concurrent_tasks`) + per-pipeline + per-role limits.
-
-### Thread Model
-
-```
-Main thread:       HTTP accept loop (push API — unchanged)
-Engine thread:     DAG tick loop (unchanged)
-Tracker thread:    Poll NullTickets → claim → workspace → subprocess/dispatch
-MQTT listener:     (unchanged, conditional)
-Redis listener:    (unchanged, conditional)
-```
-
-## Database
-
-SQLite with WAL mode. Schema: 9 tables across 2 migrations.
-- `001_init.sql`: workers, runs, steps, step_deps, events, artifacts
-- `002_advanced_steps.sql`: cycle_state, chat_messages, saga_state + iteration_index/child_run_id columns on steps
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..bc38dea
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 nullclaw contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 10d466e..7fda7e1 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,22 @@ This keeps the architecture modular, simpler to reason about, and easier to evol
 
 See additional integration docs in [`docs/`](./docs).
 
+## Workflow Graph Features
+
+The orchestration graph runtime supports:
+
+- `task`, `agent`, `route`, `interrupt`, `send`, `transform`, and `subgraph` nodes
+- run replay, checkpoint forking, breakpoint interrupts, and post-start state injection
+- `send` fan-out with canonical `items_key` and configurable `output_key`
+- task/agent output shaping via `output_key` and `output_mapping`
+- template access to `state.*`, `input.*`, `item.*`, `config.*`, and `store.<namespace>.<key>`
+- `transform.store_updates` for writing durable workflow memory back to NullTickets
+
+Store-backed templates and `store_updates` require a NullTickets base URL. The
+runtime resolves it from workflow fields such as `tracker_url` or from run config
+(`config.tracker_url` / `config.tracker_api_token`), which are injected into
+state as `__config`.
+
 ## Config Location
 
 - Default config path: `~/.nullboiler/config.json`
diff --git a/config.example.json b/config.example.json
index d069183..fca25c5 100644
--- a/config.example.json
+++ b/config.example.json
@@ -3,6 +3,7 @@
   "port": 8080,
   "db": "nullboiler.db",
   "api_token": null,
+  "self_url": null,
   "workers": [
     {
       "id": "nullclaw-1",
@@ -28,6 +29,14 @@
       "model": "anthropic/claude-sonnet-4-6",
       "tags": ["writer", "editor"],
       "max_concurrent": 2
+    },
+    {
+      "id": "nullclaw-a2a",
+      "url": "http://localhost:3000",
+      "token": "set_same_value_as_nullclaw_gateway_paired_tokens",
+      "protocol": "a2a",
+      "tags": ["coder", "agent"],
+      "max_concurrent": 3
     }
   ],
   "engine": {
diff --git a/docs/superpowers/specs/2026-03-13-orchestration-gaps-design.md b/docs/superpowers/specs/2026-03-13-orchestration-gaps-design.md
new file mode 100644
index 0000000..414ca81
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-13-orchestration-gaps-design.md
@@ -0,0 +1,333 @@
+# Orchestration Gaps Design — Phase 2
+
+**Date:** 2026-03-13
+**Status:** Draft
+**Scope:** NullBoiler, NullTickets, NullHub
+**Branch:** feat/orchestration (extends Phase 1)
+
+---
+
+## Overview
+
+Phase 2 closes remaining gaps vs LangGraph and Symphony. No backward compatibility needed.
+
+---
+
+## 1. Command Primitive
+
+Nodes can return `goto` alongside `state_updates` to control routing:
+
+```json
+{
+    "state_updates": {"review_grade": "approve"},
+    "goto": "merge_step"
+}
+```
+
+Engine behavior: if response contains `goto`, skip normal edge evaluation and jump directly to the named node. The node must exist in the workflow. `goto` can be a string (single node) or array (fan-out to multiple nodes).
+
+Worker response JSON:
+```json
+{"response": "Approved", "goto": "merge_step"}
+```
+
+Engine parses `goto` from worker response alongside the text response. For `task` and `agent` nodes only. `route`, `transform`, `interrupt` nodes don't use `goto`.
+
+---
+
+## 2. Subgraphs
+
+New node type `subgraph`:
+
+```json
+{
+    "review_flow": {
+        "type": "subgraph",
+        "workflow_id": "code-review-workflow",
+        "input_mapping": {
+            "code": "state.fix_result",
+            "description": "state.task_description"
+        },
+        "output_key": "review_result"
+    }
+}
+```
+
+Engine behavior:
+1. Load workflow definition from `workflows` table by `workflow_id`
+2. Build subgraph input from parent state via `input_mapping` (key = subgraph input key, value = parent state path)
+3. Create a child run with `createRunWithState()`, linking to parent via a new `parent_run_id` column
+4. Execute child run to completion (inline, not spawning a separate engine tick loop — just call `processRun` recursively)
+5. On completion, extract child's final state and write to parent's `output_key`
+6. On failure, propagate failure to parent run
+
+### Schema changes
+
+```sql
+ALTER TABLE runs ADD COLUMN parent_run_id TEXT REFERENCES runs(id);
+```
+
+### StepType update
+
+Add `subgraph` to StepType enum in types.zig.
+
+---
+
+## 3. Breakpoints on Any Node
+
+Workflow-level config:
+
+```json
+{
+    "interrupt_before": ["review", "merge"],
+    "interrupt_after": ["generate"],
+    ...
+}
+```
+
+Engine behavior: before executing a node, check if it's in `interrupt_before`. If so, save checkpoint and set run to `interrupted`. After executing a node, check `interrupt_after`. Same behavior.
+
+Resume works exactly like interrupt node resume — `POST /runs/{id}/resume` with optional `state_updates`.
+
+This is purely engine logic — no schema changes, no new API endpoints.
+
+---
+
+## 4. Store API in NullTickets
+
+New table:
+
+```sql
+CREATE TABLE store (
+    namespace TEXT NOT NULL,
+    key TEXT NOT NULL,
+    value_json TEXT NOT NULL,
+    created_at_ms INTEGER NOT NULL,
+    updated_at_ms INTEGER NOT NULL,
+    PRIMARY KEY (namespace, key)
+);
+CREATE INDEX idx_store_namespace ON store(namespace);
+```
+
+### API endpoints
+
+```
+PUT    /store/{namespace}/{key}     — put (upsert)
+GET    /store/{namespace}/{key}     — get single
+GET    /store/{namespace}           — list all in namespace
+DELETE /store/{namespace}/{key}     — delete
+DELETE /store/{namespace}           — delete namespace
+```
+
+Request body for PUT:
+```json
+{"value": {"any": "json"}}
+```
+
+Response for GET:
+```json
+{
+    "namespace": "user_123",
+    "key": "preferences",
+    "value": {"theme": "dark"},
+    "created_at_ms": 1710300000000,
+    "updated_at_ms": 1710300005000
+}
+```
+
+### Usage from NullBoiler workflows
+
+New template syntax: `{{store.namespace.key}}` — engine fetches from nulltickets Store API during prompt rendering.
+
+Runtime resolution:
+
+- NullTickets base URL comes from workflow-level `tracker_url` / `nulltickets_url`, or from run config (`config.tracker_url`, surfaced as `state.__config.tracker_url`).
+- Optional auth token comes from `tracker_api_token` / `nulltickets_api_token` on the workflow or run config.
+- Missing store keys render as empty strings in templates.
+
+New node type isn't needed — `task` nodes can read via template, and `transform` nodes can write via a `store_updates` field (single object or array of objects):
+
+```json
+{
+    "save_context": {
+        "type": "transform",
+        "updates": {},
+        "store_updates": {
+            "namespace": "project_context",
+            "key": "latest_review",
+            "value": "state.review_result"
+        }
+    }
+}
+```
+
+`store_updates.value` can point at a state path such as `state.review_result`, or it can be inline JSON that will be written as-is.
+
+Engine calls nulltickets `PUT /store/{namespace}/{key}` after `updates` are applied, so writes can reference the node's freshly updated state.
+
+---
+
+## 5. Multi-Turn Continuation
+
+Extend `agent` node with multi-turn support:
+
+```json
+{
+    "fix_bug": {
+        "type": "agent",
+        "prompt": "Fix this: {{state.task_description}}",
+        "continuation_prompt": "Task is still active. Continue from current state.",
+        "max_turns": 10,
+        "tags": ["coder"],
+        "output_key": "fix_result"
+    }
+}
+```
+
+Engine behavior:
+1. Turn 1: A2A `tasks/send` with rendered `prompt`, `contextId = "run_{id}_step_{name}"`
+2. Parse response — check if agent indicated completion (response contains final answer, no pending tool calls)
+3. If not complete and turn < `max_turns`: send `continuation_prompt` via A2A with same `contextId` (session persistence)
+4. Repeat until complete or `max_turns` exhausted
+5. Final response text → state_updates via `output_key`
+
+Between turns, engine can:
+- Check if nulltickets task state changed (reconciliation)
+- Apply pending state injections
+- Broadcast SSE `agent_turn` events
+
+No schema changes needed — this is engine logic using existing A2A infrastructure.
+
+---
+
+## 6. Configurable Runs
+
+Workflow JSON gets optional `defaults` section:
+
+```json
+{
+    "defaults": {
+        "model": "claude-sonnet-4-6",
+        "temperature": 0.7,
+        "max_agent_turns": 10
+    },
+    ...
+}
+```
+
+Run creation accepts `config` overrides:
+
+```
+POST /workflows/{id}/run
+{
+    "input": {"task": "fix bug"},
+    "config": {"model": "claude-opus-4-6", "temperature": 0.3}
+}
+```
+
+Merged config (run overrides > workflow defaults) stored in `run.config_json`.
+
+Template access: `{{config.model}}`, `{{config.temperature}}`.
+
+### Schema changes
+
+```sql
+ALTER TABLE runs ADD COLUMN config_json TEXT;
+```
+
+---
+
+## 7. Per-State Concurrency in NullTickets
+
+Extend nulltickets claim endpoint to support per-state limits.
+
+Claim request gets optional `concurrency` parameter:
+
+```
+POST /leases/claim
+{
+    "agent_id": "boiler-01",
+    "agent_role": "coder",
+    "concurrency": {
+        "per_state": {"in_progress": 5, "rework": 2}
+    }
+}
+```
+
+Claim logic: before returning a task, count currently-leased tasks in the same state. If at limit, skip to next eligible task.
+
+This is a nulltickets store.zig change in the claim query.
+
+---
+
+## 8. Reconciliation
+
+Engine tick adds a reconciliation step for runs linked to nulltickets tasks:
+
+After each step completes, if `run.task_id` is set (pull-mode run):
+1. Fetch current task state from nulltickets: `GET /tasks/{task_id}`
+2. If task state changed to a terminal state → cancel the run
+3. If task state changed to a different active state → update run metadata, continue
+
+This prevents wasted agent execution on tasks that humans already resolved.
+
+Engine logic only — no schema changes.
+
+---
+
+## 9. Workspace Reuse Per Issue
+
+In NullBoiler's tracker/workspace system, workspaces should be reused for the same nulltickets task:
+
+- Workspace directory name based on `task_id` (not `run_id`)
+- On new run for same task: reuse existing workspace (skip `after_create` hook, still run `before_run`)
+- On task completion: run `after_run` hook, keep workspace
+- On task terminal state + configurable cleanup: run `before_remove`, delete workspace
+
+This is a tracker.zig + workspace.zig change.
+
+---
+
+## 10. Message-Native State (add_messages reducer)
+
+New reducer type `add_messages`:
+
+```json
+{
+    "state_schema": {
+        "messages": {"type": "array", "reducer": "add_messages"}
+    }
+}
+```
+
+Behavior:
+- Each message has an `id` field
+- On update: if message with same `id` exists, replace it. Otherwise append.
+- Special: if update contains `{"remove": true, "id": "msg_123"}`, remove that message.
+- If message has no `id`, auto-generate one and append.
+
+This enables chat-history-aware workflows where messages can be updated or removed by ID.
+
+Implementation: new case in `state.zig` `applyReducer()`.
+
+### ReducerType update
+
+Add `add_messages` to ReducerType enum in types.zig.
+
+---
+
+## Summary of Changes
+
+| Repo | Changes |
+|------|---------|
+| NullBoiler types.zig | Add `subgraph` to StepType, `add_messages` to ReducerType |
+| NullBoiler engine.zig | Command goto, subgraph execution, breakpoints, multi-turn, reconciliation, store_updates |
+| NullBoiler state.zig | add_messages reducer |
+| NullBoiler store.zig | `parent_run_id` + `config_json` columns |
+| NullBoiler api.zig | config in run creation, template store access |
+| NullBoiler templates.zig | `{{store.X.Y}}`, `{{config.X}}` access |
+| NullBoiler tracker.zig | Workspace reuse, reconciliation |
+| nulltickets store.zig | Store KV CRUD, per-state concurrency in claim |
+| nulltickets api.zig | Store endpoints, claim concurrency param |
+| nulltickets migrations | Store table |
+| nullhub UI | Store viewer page (optional) |
diff --git a/src/api.zig b/src/api.zig
index 5f1dd59..0ce34a9 100644
--- a/src/api.zig
+++ b/src/api.zig
@@ -8,6 +8,9 @@ const metrics_mod = @import("metrics.zig");
 const strategy_mod = @import("strategy.zig");
 const tracker_mod = @import("tracker.zig");
 const config_mod = @import("config.zig");
+const sse_mod = @import("sse.zig");
+const state_mod = @import("state.zig");
+const engine_mod = @import("engine.zig");
 
 // ── Types ────────────────────────────────────────────────────────────
 
@@ -24,6 +27,8 @@ pub const Context = struct {
     strategies: ?*const strategy_mod.StrategyMap = null,
     tracker_state: ?*tracker_mod.TrackerState = null,
     tracker_cfg: ?*const config_mod.TrackerConfig = null,
+    sse_hub: ?*sse_mod.SseHub = null,
+    rate_limits: ?*std.StringHashMap(engine_mod.RateLimitInfo) = null,
 };
 
 pub const HttpResponse = struct {
@@ -41,16 +46,16 @@ pub fn handleRequest(ctx: *Context, method: []const u8, target: []const u8, body
     }
 
     const path = parsePath(target);
-    const seg0 = getPathSegment(path, 0);
-    const seg1 = getPathSegment(path, 1);
-    const seg2 = getPathSegment(path, 2);
-    const seg3 = getPathSegment(path, 3);
-    const seg4 = getPathSegment(path, 4);
-    const seg5 = getPathSegment(path, 5);
+    const seg0 = decodePathSegment(ctx.allocator, getPathSegment(path, 0));
+    const seg1 = decodePathSegment(ctx.allocator, getPathSegment(path, 1));
+    const seg2 = decodePathSegment(ctx.allocator, getPathSegment(path, 2));
+    const seg3 = decodePathSegment(ctx.allocator, getPathSegment(path, 3));
+    const seg4 = decodePathSegment(ctx.allocator, getPathSegment(path, 4));
 
     const is_get = eql(method, "GET");
     const is_post = eql(method, "POST");
     const is_delete = eql(method, "DELETE");
+    const is_put = eql(method, "PUT");
 
     if (!isAuthorized(ctx, seg0, seg1)) {
         return jsonResponse(401, "{\"error\":{\"code\":\"unauthorized\",\"message\":\"missing or invalid bearer token\"}}");
@@ -101,26 +106,6 @@ pub fn handleRequest(ctx: *Context, method: []const u8, target: []const u8, body
         return handleGetStep(ctx, seg1.?, seg3.?);
     }
 
-    // POST /runs/{id}/steps/{step_id}/approve
-    if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "approve") and seg5 == null) {
-        return handleApproveStep(ctx, seg1.?, seg3.?);
-    }
-
-    // POST /runs/{id}/steps/{step_id}/reject
-    if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "reject") and seg5 == null) {
-        return handleRejectStep(ctx, seg1.?, seg3.?);
-    }
-
-    // POST /runs/{id}/steps/{step_id}/signal
-    if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "signal") and seg5 == null) {
-        return handleSignalStep(ctx, seg1.?, seg3.?, body);
-    }
-
-    // GET /runs/{id}/steps/{step_id}/chat
-    if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "chat") and seg5 == null) {
-        return handleGetChatTranscript(ctx, seg1.?, seg3.?);
-    }
-
     // GET /runs/{id}/events
     if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "events") and seg3 == null) {
         return handleListEvents(ctx, seg1.?);
@@ -171,6 +156,103 @@ pub fn handleRequest(ctx: *Context, method: []const u8, target: []const u8, body
         return handleTrackerRefresh(ctx);
     }
 
+    // GET /rate-limits
+    if (is_get and eql(seg0, "rate-limits") and seg1 == null) {
+        return handleGetRateLimits(ctx);
+    }
+
+    // ── Workflow CRUD ───────────────────────────────────────────────
+
+    // POST /workflows
+    if (is_post and eql(seg0, "workflows") and seg1 == null) {
+        return handleCreateWorkflow(ctx, body);
+    }
+
+    // GET /workflows
+    if (is_get and eql(seg0, "workflows") and seg1 == null) {
+        return handleListWorkflows(ctx);
+    }
+
+    // GET /workflows/{id}
+    if (is_get and eql(seg0, "workflows") and seg1 != null and seg2 == null) {
+        return handleGetWorkflow(ctx, seg1.?);
+    }
+
+    // PUT /workflows/{id}
+    if (is_put and eql(seg0, "workflows") and seg1 != null and seg2 == null) {
+        return handleUpdateWorkflow(ctx, seg1.?, body);
+    }
+
+    // DELETE /workflows/{id}
+    if (is_delete and eql(seg0, "workflows") and seg1 != null and seg2 == null) {
+        return handleDeleteWorkflow(ctx, seg1.?);
+    }
+
+    // POST /workflows/{id}/validate
+    if (is_post and eql(seg0, "workflows") and seg1 != null and eql(seg2, "validate") and seg3 == null) {
+        return handleValidateWorkflow(ctx, seg1.?);
+    }
+
+    // GET /workflows/{id}/mermaid
+    if (is_get and eql(seg0, "workflows") and seg1 != null and eql(seg2, "mermaid") and seg3 == null) {
+        return handleGetMermaid(ctx, seg1.?);
+    }
+
+    // POST /workflows/{id}/run
+    if (is_post and eql(seg0, "workflows") and seg1 != null and eql(seg2, "run") and seg3 == null) {
+        return handleRunWorkflow(ctx, seg1.?, body);
+    }
+
+    // ── Checkpoint endpoints ────────────────────────────────────────
+
+    // GET /runs/{id}/checkpoints
+    if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "checkpoints") and seg3 == null) {
+        return handleListCheckpoints(ctx, seg1.?);
+    }
+
+    // GET /runs/{id}/checkpoints/{cpId}
+    if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "checkpoints") and seg3 != null and seg4 == null) {
+        return handleGetCheckpoint(ctx, seg1.?, seg3.?);
+    }
+
+    // ── State control endpoints ─────────────────────────────────────
+
+    // POST /runs/{id}/resume
+    if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "resume") and seg3 == null) {
+        return handleResumeRun(ctx, seg1.?, body);
+    }
+
+    // POST /runs/fork
+    if (is_post and eql(seg0, "runs") and eql(seg1, "fork") and seg2 == null) {
+        return handleForkRun(ctx, body);
+    }
+
+    // POST /runs/{id}/state
+    if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "state") and seg3 == null) {
+        return handleInjectState(ctx, seg1.?, body);
+    }
+
+    // ── SSE stream endpoint ─────────────────────────────────────────
+
+    // GET /runs/{id}/stream
+    if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "stream") and seg3 == null) {
+        return handleStream(ctx, seg1.?, target);
+    }
+
+    // ── Replay endpoint ────────────────────────────────────────────
+
+    // POST /runs/{id}/replay
+    if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "replay") and seg3 == null) {
+        return handleReplayRun(ctx, seg1.?, body);
+    }
+
+    // ── Agent events callback ───────────────────────────────────────
+
+    // POST /internal/agent-events/{run_id}/{step_id}
+    if (is_post and eql(seg0, "internal") and eql(seg1, "agent-events") and seg2 != null and seg3 != null and seg4 == null) {
+        return handleAgentEventCallback(ctx, seg2.?, seg3.?, body);
+    }
+
     return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"endpoint not found\"}}");
 }
 
@@ -212,6 +294,36 @@ fn handleEnableDrain(ctx: *Context) HttpResponse {
     return jsonResponse(200, "{\"status\":\"draining\"}");
 }
 
+// ── Rate Limit Handler ──────────────────────────────────────────────
+
+fn handleGetRateLimits(ctx: *Context) HttpResponse {
+    const rl_map = ctx.rate_limits orelse {
+        return jsonResponse(200, "[]");
+    };
+
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
+    buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+    var it = rl_map.iterator();
+    var first = true;
+    while (it.next()) |entry| {
+        if (!first) {
+            buf.append(ctx.allocator, ',') catch continue;
+        }
+        first = false;
+
+        const rl = entry.value_ptr.*;
+        const wid_json = jsonQuoted(ctx.allocator, rl.worker_id) catch continue;
+        const item = std.fmt.allocPrint(ctx.allocator,
+            \\{{"worker_id":{s},"remaining":{d},"limit":{d},"reset_ms":{d},"updated_at_ms":{d}}}
+        , .{ wid_json, rl.remaining, rl.limit, rl.reset_ms, rl.updated_at_ms }) catch continue;
+        buf.appendSlice(ctx.allocator, item) catch continue;
+    }
+
+    buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, buf.items);
+}
+
 // ── Worker Handlers ──────────────────────────────────────────────────
 
 fn handleListWorkers(ctx: *Context) HttpResponse {
@@ -294,7 +406,7 @@ fn handleRegisterWorker(ctx: *Context, body: []const u8) HttpResponse {
     const model = getJsonString(obj, "model");
 
     const protocol = worker_protocol.parse(protocol_raw) orelse {
-        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid protocol (expected webhook|api_chat|openai_chat)\"}}");
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid protocol (expected webhook|api_chat|openai_chat|mqtt|redis_stream|a2a)\"}}");
     };
     if (!worker_protocol.validateUrlForProtocol(url, protocol)) {
         return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"webhook protocol requires explicit URL path (for example /webhook)\"}}");
@@ -615,20 +727,50 @@ fn handleGetRun(ctx: *Context, id: []const u8) HttpResponse {
         const ik_json = jsonQuoted(ctx.allocator, ik) catch "";
         break :blk std.fmt.allocPrint(ctx.allocator, ",\"idempotency_key\":{s}", .{ik_json}) catch "";
     } else "";
+    const workflow_id_field = if (run.workflow_id) |wid| blk: {
+        const wid_json = jsonQuoted(ctx.allocator, wid) catch "";
+        break :blk std.fmt.allocPrint(ctx.allocator, ",\"workflow_id\":{s}", .{wid_json}) catch "";
+    } else "";
+
+    // Include state_json if present
+    const state_field = if (run.state_json) |sj|
+        std.fmt.allocPrint(ctx.allocator, ",\"state_json\":{s}", .{sj}) catch ""
+    else
+        "";
+
+    // Count checkpoints
+    const checkpoints = ctx.store.listCheckpoints(ctx.allocator, id) catch &.{};
+    const checkpoint_count: i64 = @intCast(checkpoints.len);
+    const checkpoint_field = std.fmt.allocPrint(ctx.allocator, ",\"checkpoint_count\":{d}", .{checkpoint_count}) catch "";
+
+    // Token accounting (Gap 2)
+    var token_input: i64 = 0;
+    var token_output: i64 = 0;
+    var token_total: i64 = 0;
+    if (ctx.store.getRunTokens(id)) |t| {
+        token_input = t.input;
+        token_output = t.output;
+        token_total = t.total;
+    } else |_| {}
+    const token_field = std.fmt.allocPrint(ctx.allocator, ",\"total_input_tokens\":{d},\"total_output_tokens\":{d},\"total_tokens\":{d}", .{ token_input, token_output, token_total }) catch "";
 
     const run_id_json = jsonQuoted(ctx.allocator, run.id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     const run_status_json = jsonQuoted(ctx.allocator, run.status) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     const resp = std.fmt.allocPrint(ctx.allocator,
-        \\{{"id":{s},"status":{s}{s},"created_at_ms":{d},"updated_at_ms":{d}{s}{s}{s},"steps":{s}}}
+        \\{{"id":{s},"status":{s}{s},"created_at_ms":{d},"updated_at_ms":{d}{s}{s}{s}{s}{s}{s}{s},"steps":{s}}}
     , .{
         run_id_json,
         run_status_json,
         idempotency_field,
         run.created_at_ms,
         run.updated_at_ms,
+        workflow_id_field,
         error_field,
         started_field,
         ended_field,
+        state_field,
+        checkpoint_field,
+        token_field,
         steps_json,
     }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     return jsonResponse(200, resp);
@@ -636,11 +778,12 @@ fn handleGetRun(ctx: *Context, id: []const u8) HttpResponse {
 
 fn handleListRuns(ctx: *Context, target: []const u8) HttpResponse {
     const status_filter = getQueryParam(target, "status");
+    const workflow_id_filter = getQueryParam(target, "workflow_id");
     const limit = parseQueryInt(target, "limit", 100, 1, 1000);
     const offset = parseQueryInt(target, "offset", 0, 0, 1_000_000_000);
 
     // Fetch one extra row to compute has_more.
-    const runs = ctx.store.listRuns(ctx.allocator, status_filter, limit + 1, offset) catch {
+    const runs = ctx.store.listRuns(ctx.allocator, status_filter, workflow_id_filter, limit + 1, offset) catch {
         return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to list runs\"}}");
     };
 
@@ -660,12 +803,17 @@ fn handleListRuns(ctx: *Context, target: []const u8) HttpResponse {
             const ik_json = jsonQuoted(ctx.allocator, ik) catch "";
             break :blk std.fmt.allocPrint(ctx.allocator, ",\"idempotency_key\":{s}", .{ik_json}) catch "";
         } else "";
+        const workflow_id_field = if (r.workflow_id) |wid| blk: {
+            const wid_json = jsonQuoted(ctx.allocator, wid) catch "";
+            break :blk std.fmt.allocPrint(ctx.allocator, ",\"workflow_id\":{s}", .{wid_json}) catch "";
+        } else "";
         const entry = std.fmt.allocPrint(ctx.allocator,
-            \\{{"id":{s},"status":{s}{s},"created_at_ms":{d},"updated_at_ms":{d}}}
+            \\{{"id":{s},"status":{s}{s}{s},"created_at_ms":{d},"updated_at_ms":{d}}}
         , .{
             run_id_json,
             run_status_json,
             idempotency_field,
+            workflow_id_field,
             r.created_at_ms,
             r.updated_at_ms,
         }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
@@ -765,7 +913,10 @@ fn handleCancelRun(ctx: *Context, run_id: []const u8) HttpResponse {
     // 5. Insert event
     ctx.store.insertEvent(run_id, null, "run.cancelled", "{}") catch {};
 
-    // 6. Return 200
+    // 6. Mark SSE queue closed but keep buffered events available for late subscribers.
+    if (ctx.sse_hub) |hub| hub.closeQueue(run_id);
+
+    // 7. Return 200
     const resp = std.fmt.allocPrint(ctx.allocator,
         \\{{"id":"{s}","status":"cancelled"}}
     , .{run_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
@@ -814,213 +965,837 @@ fn handleRetryRun(ctx: *Context, run_id: []const u8) HttpResponse {
     return jsonResponse(200, resp);
 }
 
-fn handleApproveStep(ctx: *Context, run_id: []const u8, step_id: []const u8) HttpResponse {
-    // 1. Get step from store
-    const step = switch (lookupStepInRun(ctx, run_id, step_id)) {
-        .ok => |s| s,
-        .err => |resp| return resp,
+fn handleListEvents(ctx: *Context, run_id: []const u8) HttpResponse {
+    // 1. Get events from store
+    const events = ctx.store.getEventsByRun(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get events\"}}");
     };
 
-    // 2. Must be "waiting_approval"
-    if (!std.mem.eql(u8, step.status, "waiting_approval")) {
-        const resp = std.fmt.allocPrint(ctx.allocator,
-            \\{{"error":{{"code":"conflict","message":"step is not waiting_approval (current: {s})"}}}}
-        , .{step.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"step is not waiting_approval\"}}");
-        return jsonResponse(409, resp);
+    // 2. Build JSON array
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
+    buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+    for (events, 0..) |ev, i| {
+        if (i > 0) {
+            buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        }
+
+        const step_field = if (ev.step_id) |sid| blk: {
+            const sid_json = jsonQuoted(ctx.allocator, sid) catch "";
+            break :blk std.fmt.allocPrint(ctx.allocator, ",\"step_id\":{s}", .{sid_json}) catch "";
+        } else "";
+        const run_id_json = jsonQuoted(ctx.allocator, ev.run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        const kind_json = jsonQuoted(ctx.allocator, ev.kind) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+        const entry = std.fmt.allocPrint(ctx.allocator,
+            \\{{"id":{d},"run_id":{s}{s},"kind":{s},"data":{s},"ts_ms":{d}}}
+        , .{
+            ev.id,
+            run_id_json,
+            step_field,
+            kind_json,
+            ev.data_json,
+            ev.ts_ms,
+        }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     }
 
-    // 3. Update status to "completed"
-    ctx.store.updateStepStatus(step_id, "completed", null, null, null, step.attempt) catch {
-        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}");
+    buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, json_body);
+}
+
+// ── Workflow CRUD Handlers ───────────────────────────────────────────
+
+fn handleCreateWorkflow(ctx: *Context, body: []const u8) HttpResponse {
+    const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}");
     };
+    defer parsed.deinit();
 
-    // 4. Insert event
-    ctx.store.insertEvent(run_id, step_id, "step.approved", "{}") catch {};
+    if (parsed.value != .object) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}");
+    }
+    const obj = parsed.value.object;
+
+    const name = getJsonString(obj, "name") orelse "untitled";
+
+    // Use provided id or generate one
+    const wf_id = if (getJsonString(obj, "id")) |provided_id|
+        ctx.allocator.dupe(u8, provided_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}")
+    else blk: {
+        const id_buf = ids.generateId();
+        break :blk ctx.allocator.dupe(u8, &id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    };
+
+    // If definition_json is a sub-key, extract it; otherwise use the whole body
+    const definition_json = if (obj.get("definition_json")) |def_val| blk: {
+        break :blk serializeJsonValue(ctx.allocator, def_val) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize definition\"}}");
+    } else body;
+
+    // Extract version from body (default 1)
+    const version: i64 = if (obj.get("version")) |v| blk: {
+        if (v == .integer) break :blk v.integer;
+        break :blk 1;
+    } else 1;
 
-    // 5. Return 200
+    ctx.store.createWorkflowWithVersion(wf_id, name, definition_json, version) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create workflow\"}}");
+    };
+
+    const id_json = jsonQuoted(ctx.allocator, wf_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const name_json = jsonQuoted(ctx.allocator, name) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     const resp = std.fmt.allocPrint(ctx.allocator,
-        \\{{"step_id":"{s}","status":"completed"}}
-    , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-    return jsonResponse(200, resp);
+        \\{{"id":{s},"name":{s},"version":{d}}}
+    , .{ id_json, name_json, version }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(201, resp);
 }
 
-fn handleRejectStep(ctx: *Context, run_id: []const u8, step_id: []const u8) HttpResponse {
-    // 1. Get step from store
-    const step = switch (lookupStepInRun(ctx, run_id, step_id)) {
-        .ok => |s| s,
-        .err => |resp| return resp,
+fn handleListWorkflows(ctx: *Context) HttpResponse {
+    const workflows = ctx.store.listWorkflows(ctx.allocator) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to list workflows\"}}");
     };
 
-    // 2. Must be "waiting_approval"
-    if (!std.mem.eql(u8, step.status, "waiting_approval")) {
-        const resp = std.fmt.allocPrint(ctx.allocator,
-            \\{{"error":{{"code":"conflict","message":"step is not waiting_approval (current: {s})"}}}}
-        , .{step.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"step is not waiting_approval\"}}");
-        return jsonResponse(409, resp);
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
+    buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+    for (workflows, 0..) |wf, i| {
+        if (i > 0) {
+            buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        }
+        const id_json = jsonQuoted(ctx.allocator, wf.id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        const name_json = jsonQuoted(ctx.allocator, wf.name) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        const entry = std.fmt.allocPrint(ctx.allocator,
+            \\{{"id":{s},"name":{s},"version":{d},"definition":{s},"created_at_ms":{d},"updated_at_ms":{d}}}
+        , .{
+            id_json,
+            name_json,
+            wf.version,
+            wf.definition_json,
+            wf.created_at_ms,
+            wf.updated_at_ms,
+        }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     }
 
-    // 3. Update status to "failed", set error_text
-    ctx.store.updateStepStatus(step_id, "failed", null, null, "rejected by user", step.attempt) catch {
-        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}");
-    };
+    buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, json_body);
+}
 
-    // 4. Insert event
-    ctx.store.insertEvent(run_id, step_id, "step.rejected", "{}") catch {};
+fn handleGetWorkflow(ctx: *Context, id: []const u8) HttpResponse {
+    const wf = ctx.store.getWorkflow(ctx.allocator, id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}");
+    };
 
-    // 5. Return 200
+    const id_json = jsonQuoted(ctx.allocator, wf.id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const name_json = jsonQuoted(ctx.allocator, wf.name) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     const resp = std.fmt.allocPrint(ctx.allocator,
-        \\{{"step_id":"{s}","status":"failed"}}
-    , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        \\{{"id":{s},"name":{s},"version":{d},"definition":{s},"created_at_ms":{d},"updated_at_ms":{d}}}
+    , .{
+        id_json,
+        name_json,
+        wf.version,
+        wf.definition_json,
+        wf.created_at_ms,
+        wf.updated_at_ms,
+    }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     return jsonResponse(200, resp);
 }
 
-fn handleSignalStep(ctx: *Context, run_id: []const u8, step_id: []const u8, body: []const u8) HttpResponse {
-    // 1. Get step from store
-    const step = switch (lookupStepInRun(ctx, run_id, step_id)) {
-        .ok => |s| s,
-        .err => |resp| return resp,
+fn handleUpdateWorkflow(ctx: *Context, id: []const u8, body: []const u8) HttpResponse {
+    // Verify workflow exists
+    _ = ctx.store.getWorkflow(ctx.allocator, id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}");
     };
 
-    // 2. Must be "waiting_approval" (signal mode uses this status)
-    if (!std.mem.eql(u8, step.status, "waiting_approval")) {
-        const resp = std.fmt.allocPrint(ctx.allocator,
-            \\{{"error":{{"code":"conflict","message":"step is not waiting for signal (current: {s})"}}}}
-        , .{step.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"step is not waiting for signal\"}}");
-        return jsonResponse(409, resp);
-    }
+    const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}");
+    };
+    defer parsed.deinit();
 
-    // 3. Parse optional signal data from body
-    var signal_data: []const u8 = "{}";
-    if (body.len > 0) {
-        const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch {
-            // Body is not valid JSON; use empty
-            signal_data = "{}";
-            // Continue anyway
-            const output = std.fmt.allocPrint(ctx.allocator,
-                \\{{"output":"signaled","data":{{}}}}
-            , .{}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-
-            ctx.store.updateStepStatus(step_id, "completed", null, output, null, step.attempt) catch {
-                return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}");
-            };
-            ctx.store.insertEvent(run_id, step_id, "step.signaled", output) catch {};
-            const resp = std.fmt.allocPrint(ctx.allocator,
-                \\{{"step_id":"{s}","status":"completed"}}
-            , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-            return jsonResponse(200, resp);
-        };
-        _ = parsed;
-        signal_data = body;
+    if (parsed.value != .object) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}");
     }
+    const obj = parsed.value.object;
+
+    const name = getJsonString(obj, "name") orelse "untitled";
+    const definition_json = if (obj.get("definition_json")) |def_val| blk: {
+        break :blk serializeJsonValue(ctx.allocator, def_val) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize definition\"}}");
+    } else body;
 
-    // 4. Build output with signal data
-    const output = std.fmt.allocPrint(ctx.allocator,
-        \\{{"output":"signaled","data":{s}}}
-    , .{signal_data}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    // Extract version if provided
+    const version: ?i64 = if (obj.get("version")) |v| blk: {
+        if (v == .integer) break :blk v.integer;
+        break :blk null;
+    } else null;
 
-    // 5. Update step to "completed"
-    ctx.store.updateStepStatus(step_id, "completed", null, output, null, step.attempt) catch {
-        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}");
+    ctx.store.updateWorkflowWithVersion(id, name, definition_json, version) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update workflow\"}}");
     };
 
-    // 6. Insert event
-    ctx.store.insertEvent(run_id, step_id, "step.signaled", output) catch {};
+    return jsonResponse(200, "{\"ok\":true}");
+}
 
-    // 7. Return 200
-    const resp = std.fmt.allocPrint(ctx.allocator,
-        \\{{"step_id":"{s}","status":"completed"}}
-    , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-    return jsonResponse(200, resp);
+fn handleDeleteWorkflow(ctx: *Context, id: []const u8) HttpResponse {
+    // Verify workflow exists
+    _ = ctx.store.getWorkflow(ctx.allocator, id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}");
+    };
+
+    ctx.store.deleteWorkflow(id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to delete workflow\"}}");
+    };
+
+    return jsonResponse(200, "{\"ok\":true}");
 }
 
-fn handleListEvents(ctx: *Context, run_id: []const u8) HttpResponse {
-    // 1. Get events from store
-    const events = ctx.store.getEventsByRun(ctx.allocator, run_id) catch {
-        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get events\"}}");
+fn handleValidateWorkflow(ctx: *Context, id: []const u8) HttpResponse {
+    const wf = ctx.store.getWorkflow(ctx.allocator, id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}");
     };
 
-    // 2. Build JSON array
+    const errors = workflow_validation.validate(ctx.allocator, wf.definition_json) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"validation failed\"}}");
+    };
+
+    // Build validation result
     var buf: std.ArrayListUnmanaged(u8) = .empty;
-    buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    buf.appendSlice(ctx.allocator, "{\"valid\":") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    buf.appendSlice(ctx.allocator, if (errors.len == 0) "true" else "false") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    buf.appendSlice(ctx.allocator, ",\"errors\":[") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
 
-    for (events, 0..) |ev, i| {
+    for (errors, 0..) |ve, i| {
         if (i > 0) {
             buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
         }
-
-        const step_field = if (ev.step_id) |sid| blk: {
-            const sid_json = jsonQuoted(ctx.allocator, sid) catch "";
-            break :blk std.fmt.allocPrint(ctx.allocator, ",\"step_id\":{s}", .{sid_json}) catch "";
+        const err_type_json = jsonQuoted(ctx.allocator, ve.err_type) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        const node_field = if (ve.node) |n| blk: {
+            const n_json = jsonQuoted(ctx.allocator, n) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+            break :blk std.fmt.allocPrint(ctx.allocator, ",\"node\":{s}", .{n_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
         } else "";
-        const run_id_json = jsonQuoted(ctx.allocator, ev.run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-        const kind_json = jsonQuoted(ctx.allocator, ev.kind) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-
+        const key_field = if (ve.key) |k| blk: {
+            const k_json = jsonQuoted(ctx.allocator, k) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+            break :blk std.fmt.allocPrint(ctx.allocator, ",\"key\":{s}", .{k_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        } else "";
+        const msg_json = jsonQuoted(ctx.allocator, ve.message) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
         const entry = std.fmt.allocPrint(ctx.allocator,
-            \\{{"id":{d},"run_id":{s}{s},"kind":{s},"data":{s},"ts_ms":{d}}}
+            \\{{"type":{s}{s}{s},"message":{s}}}
         , .{
-            ev.id,
-            run_id_json,
-            step_field,
-            kind_json,
-            ev.data_json,
-            ev.ts_ms,
+            err_type_json,
+            node_field,
+            key_field,
+            msg_json,
         }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
         buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     }
 
+    buf.appendSlice(ctx.allocator, "]") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+    // Include Mermaid diagram in validation response
+    const mermaid_str = engine_mod.generateMermaid(ctx.allocator, wf.definition_json) catch null;
+    if (mermaid_str) |ms| {
+        const mermaid_json = jsonQuoted(ctx.allocator, ms) catch null;
+        if (mermaid_json) |mj| {
+            buf.appendSlice(ctx.allocator, ",\"mermaid\":") catch {};
+            buf.appendSlice(ctx.allocator, mj) catch {};
+        }
+    }
+
+    buf.appendSlice(ctx.allocator, "}") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, json_body);
+}
+
+fn handleGetMermaid(ctx: *Context, id: []const u8) HttpResponse {
+    const wf = ctx.store.getWorkflow(ctx.allocator, id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}");
+    };
+
+    const mermaid = engine_mod.generateMermaid(ctx.allocator, wf.definition_json) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to generate mermaid diagram\"}}");
+    };
+
+    return plainResponse(200, mermaid);
+}
+
+fn handleRunWorkflow(ctx: *Context, workflow_id: []const u8, body: []const u8) HttpResponse {
+    // Load workflow
+    const wf = ctx.store.getWorkflow(ctx.allocator, workflow_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}");
+    };
+
+    // Validate
+    const errors = workflow_validation.validate(ctx.allocator, wf.definition_json) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"validation failed\"}}");
+    };
+    if (errors.len > 0) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"workflow has validation errors\"}}");
+    }
+
+    // Parse definition to extract state_schema for initState
+    const def_parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, wf.definition_json, .{}) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to parse workflow definition\"}}");
+    };
+    defer def_parsed.deinit();
+
+    const schema_json = if (def_parsed.value == .object) blk: {
+        if (def_parsed.value.object.get("state_schema")) |ss| {
+            break :blk serializeJsonValue(ctx.allocator, ss) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize schema\"}}");
+        }
+        break :blk "{}";
+    } else "{}";
+
+    // Parse input from request body (or default to {})
+    const input_json = if (body.len > 0) blk: {
+        const bp = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch break :blk "{}";
+        defer bp.deinit();
+        if (bp.value == .object) {
+            if (bp.value.object.get("input")) |input_val| {
+                break :blk serializeJsonValue(ctx.allocator, input_val) catch break :blk "{}";
+            }
+        }
+        break :blk "{}";
+    } else "{}";
+
+    // Init state
+    const initial_state = state_mod.initState(ctx.allocator, input_json, schema_json) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to initialize state\"}}");
+    };
+
+    // Generate run ID
+    const run_id_buf = ids.generateId();
+    const run_id = ctx.allocator.dupe(u8, &run_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+    // Create run directly with "running" status to avoid race window where
+    // engine could miss a run created as "pending" then updated to "running".
+    ctx.store.createRunWithStateAndStatus(run_id, workflow_id, wf.definition_json, input_json, initial_state, "running") catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create run\"}}");
+    };
+
+    // Create initial checkpoint (version 0, no completed nodes)
+    const cp_id_buf = ids.generateId();
+    const cp_id = ctx.allocator.dupe(u8, &cp_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    ctx.store.createCheckpoint(cp_id, run_id, "__init__", null, initial_state, "[]", 0, null) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create checkpoint\"}}");
+    };
+
+    const run_id_json = jsonQuoted(ctx.allocator, run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const resp = std.fmt.allocPrint(ctx.allocator,
+        \\{{"id":{s},"status":"running"}}
+    , .{run_id_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(201, resp);
+}
+
+// ── Checkpoint Handlers ─────────────────────────────────────────────
+
+fn handleListCheckpoints(ctx: *Context, run_id: []const u8) HttpResponse {
+    // Verify run exists
+    _ = ctx.store.getRun(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}");
+    };
+
+    const checkpoints = ctx.store.listCheckpoints(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to list checkpoints\"}}");
+    };
+
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
+    buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+    for (checkpoints, 0..) |cp, i| {
+        if (i > 0) {
+            buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        }
+        const entry = buildCheckpointJson(ctx.allocator, cp) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    }
+
     buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
     return jsonResponse(200, json_body);
 }
 
-// ── Chat Transcript Handler ──────────────────────────────────────────
+fn handleGetCheckpoint(ctx: *Context, run_id: []const u8, cp_id: []const u8) HttpResponse {
+    const cp = ctx.store.getCheckpoint(ctx.allocator, cp_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get checkpoint\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}");
+    };
 
-fn handleGetChatTranscript(ctx: *Context, run_id: []const u8, step_id: []const u8) HttpResponse {
-    _ = switch (lookupStepInRun(ctx, run_id, step_id)) {
-        .ok => |s| s,
-        .err => |resp| return resp,
+    // Verify checkpoint belongs to run
+    if (!std.mem.eql(u8, cp.run_id, run_id)) {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}");
+    }
+
+    const json_body = buildCheckpointJson(ctx.allocator, cp) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, json_body);
+}
+
+fn buildCheckpointJson(allocator: std.mem.Allocator, cp: types.CheckpointRow) ![]const u8 {
+    const id_json = try jsonQuoted(allocator, cp.id);
+    const run_id_json = try jsonQuoted(allocator, cp.run_id);
+    const step_id_json = try jsonQuoted(allocator, cp.step_id);
+    const parent_field = if (cp.parent_id) |pid| blk: {
+        const pid_json = try jsonQuoted(allocator, pid);
+        break :blk try std.fmt.allocPrint(allocator, ",\"parent_id\":{s}", .{pid_json});
+    } else "";
+    const metadata_field = if (cp.metadata_json) |md|
+        try std.fmt.allocPrint(allocator, ",\"metadata\":{s}", .{md})
+    else
+        "";
+
+    return try std.fmt.allocPrint(allocator,
+        \\{{"id":{s},"run_id":{s},"step_id":{s}{s},"state":{s},"completed_nodes":{s},"version":{d}{s},"created_at_ms":{d}}}
+    , .{
+        id_json,
+        run_id_json,
+        step_id_json,
+        parent_field,
+        cp.state_json,
+        cp.completed_nodes_json,
+        cp.version,
+        metadata_field,
+        cp.created_at_ms,
+    });
+}
+
+// ── State Control Handlers ──────────────────────────────────────────
+
+fn handleResumeRun(ctx: *Context, run_id: []const u8, body: []const u8) HttpResponse {
+    // Load run — must be status=interrupted
+    const run = ctx.store.getRun(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}");
+    };
+
+    if (!std.mem.eql(u8, run.status, "interrupted")) {
+        const resp = std.fmt.allocPrint(ctx.allocator,
+            \\{{"error":{{"code":"conflict","message":"run is not interrupted (current: {s})"}}}}
+        , .{run.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"run is not interrupted\"}}");
+        return jsonResponse(409, resp);
+    }
+
+    // Load latest checkpoint
+    const latest_cp = ctx.store.getLatestCheckpoint(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get latest checkpoint\"}}");
+    } orelse {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"no checkpoint found for run\"}}");
+    };
+
+    // Get current state
+    var current_state = latest_cp.state_json;
+
+    // Apply state_updates from body if provided
+    if (body.len > 0) {
+        const bp = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{});
+        if (bp) |body_parsed| {
+            defer body_parsed.deinit();
+
+            if (body_parsed.value == .object) {
+                if (body_parsed.value.object.get("state_updates")) |updates_val| {
+                    const updates_json = serializeJsonValue(ctx.allocator, updates_val) catch {
+                        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize updates\"}}");
+                    };
+
+                    // Get schema from workflow definition
+                    const schema_json = getSchemaFromRun(ctx, run);
+
+                    current_state = state_mod.applyUpdates(ctx.allocator, latest_cp.state_json, updates_json, schema_json) catch {
+                        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to apply state updates\"}}");
+                    };
+                }
+            }
+        } else |_| {
+            // Body is not valid JSON — proceed without updates
+        }
+    }
+
+    // Save new state
+    ctx.store.updateRunState(run_id, current_state) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run state\"}}");
+    };
+
+    // Set status to running
+    ctx.store.updateRunStatus(run_id, "running", null) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run status\"}}");
+    };
+
+    const run_id_json = jsonQuoted(ctx.allocator, run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const resp = std.fmt.allocPrint(ctx.allocator,
+        \\{{"id":{s},"status":"running"}}
+    , .{run_id_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, resp);
+}
+
+fn handleForkRun(ctx: *Context, body: []const u8) HttpResponse {
+    const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}");
+    };
+    defer parsed.deinit();
+
+    if (parsed.value != .object) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}");
+    }
+    const obj = parsed.value.object;
+
+    // Get checkpoint_id from body
+    const checkpoint_id = getJsonString(obj, "checkpoint_id") orelse {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"missing required field: checkpoint_id\"}}");
+    };
+
+    // Load checkpoint
+    const cp = ctx.store.getCheckpoint(ctx.allocator, checkpoint_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get checkpoint\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}");
+    };
+
+    // Load the original run to get workflow_json
+    const orig_run = ctx.store.getRun(ctx.allocator, cp.run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get original run\"}}");
+    } orelse {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"original run not found\"}}");
+    };
+
+    // Apply state_overrides if provided
+    var fork_state = cp.state_json;
+    if (obj.get("state_overrides")) |overrides_val| {
+        const overrides_json = serializeJsonValue(ctx.allocator, overrides_val) catch {
+            return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize overrides\"}}");
+        };
+        const schema_json = getSchemaFromRun(ctx, orig_run);
+        fork_state = state_mod.applyUpdates(ctx.allocator, cp.state_json, overrides_json, schema_json) catch {
+            return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to apply state overrides\"}}");
+        };
+    }
+
+    // Generate new run ID
+    const new_run_id_buf = ids.generateId();
+    const new_run_id = ctx.allocator.dupe(u8, &new_run_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+
+    // Create forked run
+    ctx.store.createForkedRun(new_run_id, orig_run.workflow_json, fork_state, cp.run_id, checkpoint_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create forked run\"}}");
+    };
+
+    // Create initial checkpoint for forked run
+    const cp_id_buf = ids.generateId();
+    const cp_id = ctx.allocator.dupe(u8, &cp_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    ctx.store.createCheckpoint(cp_id, new_run_id, "__fork__", checkpoint_id, fork_state, cp.completed_nodes_json, 0, null) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create checkpoint\"}}");
     };
 
-    const messages = ctx.store.getChatMessages(ctx.allocator, step_id) catch {
-        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get chat messages\"}}");
+    // Set to running
+    ctx.store.updateRunStatus(new_run_id, "running", null) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run status\"}}");
+    };
+
+    const run_id_json = jsonQuoted(ctx.allocator, new_run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const resp = std.fmt.allocPrint(ctx.allocator,
+        \\{{"id":{s},"status":"running","forked_from_checkpoint":{s}}}
+    , .{ run_id_json, checkpoint_id }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(201, resp);
+}
+
+// ── Replay Handler ──────────────────────────────────────────────────
+
+fn handleReplayRun(ctx: *Context, run_id: []const u8, body: []const u8) HttpResponse {
+    // Parse replay checkpoint ID. Accept both the canonical
+    // `from_checkpoint_id` field and the older `checkpoint_id` alias so
+    // existing clients keep working.
+    const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}");
+    };
+    defer parsed.deinit();
+
+    if (parsed.value != .object) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}");
+    }
+    const obj = parsed.value.object;
+
+    const checkpoint_id = getJsonString(obj, "from_checkpoint_id") orelse getJsonString(obj, "checkpoint_id") orelse {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"missing required field: from_checkpoint_id or checkpoint_id\"}}");
+    };
+
+    // Load checkpoint
+    const cp = ctx.store.getCheckpoint(ctx.allocator, checkpoint_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get checkpoint\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}");
+    };
+
+    // Verify checkpoint belongs to this run
+    if (!std.mem.eql(u8, cp.run_id, run_id)) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"checkpoint does not belong to this run\"}}");
+    }
+
+    // Load run to verify it exists
+    _ = ctx.store.getRun(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}");
+    };
+
+    // Delete steps and checkpoints created after the replay checkpoint
+    // so the engine re-executes from a clean slate.
+    ctx.store.deleteStepsAfterTimestamp(run_id, cp.created_at_ms) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to clear old steps\"}}");
+    };
+    ctx.store.deleteCheckpointsAfterVersion(run_id, cp.version) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to clear old checkpoints\"}}");
+    };
+
+    // Reset run state to checkpoint's state
+    ctx.store.updateRunState(run_id, cp.state_json) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run state\"}}");
+    };
+
+    // Set run status to running — engine will pick it up on next tick
+    // with the checkpoint's completed_nodes
+    ctx.store.updateRunStatus(run_id, "running", null) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run status\"}}");
+    };
+
+    ctx.store.insertEvent(run_id, null, "run.replayed", "{}") catch {};
+
+    const run_id_json = jsonQuoted(ctx.allocator, run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const cp_id_json = jsonQuoted(ctx.allocator, checkpoint_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const resp = std.fmt.allocPrint(ctx.allocator,
+        \\{{"id":{s},"status":"running","replayed_from_checkpoint":{s}}}
+    , .{ run_id_json, cp_id_json }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, resp);
+}
+
+fn handleInjectState(ctx: *Context, run_id: []const u8, body: []const u8) HttpResponse {
+    // Verify run exists
+    const run = ctx.store.getRun(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}");
+    };
+
+    const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}");
+    };
+    defer parsed.deinit();
+
+    if (parsed.value != .object) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}");
+    }
+    const obj = parsed.value.object;
+
+    // Get updates
+    const updates_val = obj.get("updates") orelse {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"missing required field: updates\"}}");
+    };
+    const updates_json = serializeJsonValue(ctx.allocator, updates_val) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize updates\"}}");
+    };
+
+    // Check apply_after_step
+    const apply_after_step = getJsonString(obj, "apply_after_step");
+
+    if (apply_after_step == null) {
+        // Apply immediately to run.state_json
+        const current_state = run.state_json orelse "{}";
+        const schema_json = getSchemaFromRun(ctx, run);
+        const new_state = state_mod.applyUpdates(ctx.allocator, current_state, updates_json, schema_json) catch {
+            return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to apply state updates\"}}");
+        };
+        ctx.store.updateRunState(run_id, new_state) catch {
+            return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run state\"}}");
+        };
+        return jsonResponse(200, "{\"applied\":true}");
+    } else {
+        // Insert into pending_state_injections
+        ctx.store.createPendingInjection(run_id, updates_json, apply_after_step) catch {
+            return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create pending injection\"}}");
+        };
+        return jsonResponse(200, "{\"applied\":false,\"pending\":true}");
+    }
+}
+
+// ── SSE Stream Handler ──────────────────────────────────────────────
+
+fn handleStream(ctx: *Context, run_id: []const u8, target: []const u8) HttpResponse {
+    // For now, return the current state and events as a regular JSON response.
+    // Full SSE streaming with held-open connections will be implemented
+    // when the threading model is wired in main.zig (Task 12).
+    //
+    // Supports ?mode=values,tasks,debug,updates,custom query param to filter
+    // which streaming modes the client wants. Default: all modes.
+    const run = ctx.store.getRun(ctx.allocator, run_id) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}");
+    } orelse {
+        return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}");
+    };
+
+    // Parse requested modes from ?mode= query param
+    const mode_param = getQueryParam(target, "mode");
+    const after_seq = if (getQueryParam(target, "after_seq")) |raw|
+        std.fmt.parseInt(u64, raw, 10) catch 0
+    else
+        0;
+    var requested_modes: [5]bool = .{ true, true, true, true, true }; // all modes by default
+    if (mode_param) |modes_str| {
+        // Reset all to false, then enable requested
+        requested_modes = .{ false, false, false, false, false };
+        var mode_it = std.mem.splitScalar(u8, modes_str, ',');
+        while (mode_it.next()) |mode_name| {
+            if (sse_mod.StreamMode.fromString(mode_name)) |m| {
+                requested_modes[@intFromEnum(m)] = true;
+            }
+        }
+    }
+
+    const events_json = if (after_seq == 0) blk: {
+        const events = ctx.store.getEventsByRun(ctx.allocator, run_id) catch {
+            return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get events\"}}");
+        };
+
+        // Build events JSON array
+        var events_buf: std.ArrayListUnmanaged(u8) = .empty;
+        events_buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        for (events, 0..) |ev, i| {
+            if (i > 0) {
+                events_buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+            }
+            const kind_json = jsonQuoted(ctx.allocator, ev.kind) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+            const entry = std.fmt.allocPrint(ctx.allocator,
+                \\{{"kind":{s},"data":{s},"ts_ms":{d}}}
+            , .{ kind_json, ev.data_json, ev.ts_ms }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+            events_buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        }
+        events_buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+        break :blk events_buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    } else "[]";
+
+    // If SSE hub available, snapshot queued SSE events filtered by requested modes
+    var sse_events_json: []const u8 = "[]";
+    var latest_stream_seq: u64 = 0;
+    var oldest_stream_seq: u64 = 0;
+    var stream_gap = false;
+    if (ctx.sse_hub) |hub| {
+        const queue = hub.getOrCreateQueue(run_id);
+        const snapshot = queue.snapshotSince(ctx.allocator, after_seq);
+        latest_stream_seq = snapshot.latest_seq;
+        oldest_stream_seq = snapshot.oldest_seq;
+        stream_gap = snapshot.gap_detected;
+        if (snapshot.events.len > 0) {
+            var sse_buf: std.ArrayListUnmanaged(u8) = .empty;
+            sse_buf.append(ctx.allocator, '[') catch {};
+            var first = true;
+            for (snapshot.events) |sse_ev| {
+                // Filter by requested modes
+                if (!requested_modes[@intFromEnum(sse_ev.mode)]) continue;
+                if (!first) {
+                    sse_buf.append(ctx.allocator, ',') catch {};
+                }
+                first = false;
+                const mode_str = sse_ev.mode.toString();
+                const sse_entry = std.fmt.allocPrint(ctx.allocator,
+                    \\{{"seq":{d},"event":{s},"mode":"{s}","data":{s}}}
+                , .{
+                    sse_ev.seq,
+                    jsonQuoted(ctx.allocator, sse_ev.event_type) catch "\"\"",
+                    mode_str,
+                    sse_ev.data,
+                }) catch continue;
+                sse_buf.appendSlice(ctx.allocator, sse_entry) catch {};
+            }
+            sse_buf.append(ctx.allocator, ']') catch {};
+            sse_events_json = sse_buf.toOwnedSlice(ctx.allocator) catch "[]";
+        }
+    }
+
+    const status_json = jsonQuoted(ctx.allocator, run.status) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    const state_field = if (run.state_json) |sj|
+        std.fmt.allocPrint(ctx.allocator, ",\"state\":{s}", .{sj}) catch ""
+    else
+        "";
+
+    const resp = std.fmt.allocPrint(ctx.allocator,
+        \\{{"status":{s}{s},"events":{s},"stream_events":{s},"next_stream_seq":{d},"stream_oldest_seq":{d},"stream_gap":{s}}}
+    , .{
+        status_json,
+        state_field,
+        events_json,
+        sse_events_json,
+        latest_stream_seq,
+        oldest_stream_seq,
+        if (stream_gap) "true" else "false",
+    }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, resp);
+}
+
+// ── Agent Events Callback Handler ───────────────────────────────────
+
+fn handleAgentEventCallback(ctx: *Context, run_id: []const u8, step_id: []const u8, body: []const u8) HttpResponse {
+    const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}");
+    };
+    defer parsed.deinit();
+
+    if (parsed.value != .object) {
+        return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}");
+    }
+    const obj = parsed.value.object;
+
+    const iteration: i64 = if (obj.get("iteration")) |it| blk: {
+        if (it == .integer) break :blk it.integer;
+        break :blk 0;
+    } else 0;
+
+    const tool = getJsonString(obj, "tool");
+    const args_json = if (obj.get("args")) |args_val|
+        serializeJsonValue(ctx.allocator, args_val) catch null
+    else
+        null;
+    const result_text = getJsonString(obj, "result");
+    const status = getJsonString(obj, "status") orelse "running";
+
+    ctx.store.createAgentEvent(run_id, step_id, iteration, tool, args_json, result_text, status) catch {
+        return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create agent event\"}}");
     };
 
-    // Build JSON array of chat messages
-    var buf: std.ArrayListUnmanaged(u8) = .empty;
-    buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-
-    for (messages, 0..) |msg, i| {
-        if (i > 0) {
-            buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    // If sse_hub is available, broadcast as agent_event
+    if (ctx.sse_hub) |hub| {
+        const event_data = std.fmt.allocPrint(ctx.allocator,
+            \\{{"run_id":"{s}","step_id":"{s}","iteration":{d},"status":"{s}"}}
+        , .{ run_id, step_id, iteration, status }) catch "";
+        if (event_data.len > 0) {
+            hub.broadcast(run_id, .{ .event_type = "agent_event", .data = event_data });
         }
+    }
 
-        const worker_field = if (msg.worker_id) |wid| blk: {
-            const wid_json = jsonQuoted(ctx.allocator, wid) catch "";
-            break :blk std.fmt.allocPrint(ctx.allocator, ",\"worker_id\":{s}", .{wid_json}) catch "";
-        } else "";
-        const msg_run_id_json = jsonQuoted(ctx.allocator, msg.run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-        const msg_step_id_json = jsonQuoted(ctx.allocator, msg.step_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-        const role_json = jsonQuoted(ctx.allocator, msg.role) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-        const message_json = jsonQuoted(ctx.allocator, msg.message) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
+    return jsonResponse(200, "{\"ok\":true}");
+}
 
-        const entry = std.fmt.allocPrint(ctx.allocator,
-            \\{{"id":{d},"run_id":{s},"step_id":{s},"round":{d},"role":{s}{s},"message":{s},"ts_ms":{d}}}
-        , .{
-            msg.id,
-            msg_run_id_json,
-            msg_step_id_json,
-            msg.round,
-            role_json,
-            worker_field,
-            message_json,
-            msg.ts_ms,
-        }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-        buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-    }
+// ── State Helper ────────────────────────────────────────────────────
 
-    buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-    const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}");
-    return jsonResponse(200, json_body);
+fn getSchemaFromRun(ctx: *Context, run: types.RunRow) []const u8 {
+    const def_parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, run.workflow_json, .{}) catch return "{}";
+    defer def_parsed.deinit();
+    if (def_parsed.value != .object) return "{}";
+    if (def_parsed.value.object.get("state_schema")) |ss| {
+        return serializeJsonValue(ctx.allocator, ss) catch "{}";
+    }
+    return "{}";
 }
 
 // ── Tracker Handlers ─────────────────────────────────────────────────
@@ -1188,16 +1963,6 @@ fn validationErrorResponse(err: workflow_validation.ValidateError) HttpResponse
         error.DependsOnItemNotString => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"depends_on items must be strings\"}}"),
         error.DependsOnDuplicate => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"depends_on contains duplicate step id\"}}"),
         error.DependsOnUnknownStepId => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"depends_on references unknown step id\"}}"),
-        error.LoopBodyRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"loop step requires 'body' field\"}}"),
-        error.SubWorkflowRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"sub_workflow step requires 'workflow' field\"}}"),
-        error.WaitConditionRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait step requires 'duration_ms', 'until_ms', or 'signal'\"}}"),
-        error.WaitDurationInvalid => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait.duration_ms must be a non-negative integer\"}}"),
-        error.WaitUntilInvalid => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait.until_ms must be a non-negative integer\"}}"),
-        error.WaitSignalInvalid => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait.signal must be a non-empty string\"}}"),
-        error.RouterRoutesRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"router step requires 'routes' field\"}}"),
-        error.SagaBodyRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"saga step requires 'body' field\"}}"),
-        error.DebateCountRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"debate step requires 'count' field\"}}"),
-        error.GroupChatParticipantsRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"group_chat step requires 'participants' field\"}}"),
         error.RetryMustBeObject => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"retry must be an object\"}}"),
         error.MaxAttemptsMustBePositiveInteger => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"retry.max_attempts must be a positive integer\"}}"),
         error.TimeoutMsMustBePositiveInteger => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"timeout_ms must be a positive integer\"}}"),
@@ -1377,6 +2142,14 @@ fn getPathSegment(segments: [max_segments]?[]const u8, index: usize) ?[]const u8
     return segments[index];
 }
 
+fn decodePathSegment(allocator: std.mem.Allocator, segment: ?[]const u8) ?[]const u8 {
+    const raw = segment orelse return null;
+    if (std.mem.indexOfScalar(u8, raw, '%') == null) return raw;
+
+    const encoded = allocator.dupe(u8, raw) catch return raw;
+    return std.Uri.percentDecodeInPlace(encoded);
+}
+
 fn eql(a: ?[]const u8, b: []const u8) bool {
     if (a) |val| return std.mem.eql(u8, val, b);
     return false;
@@ -1581,48 +2354,6 @@ test "API: create run rejects non-positive timeout_ms" {
     try std.testing.expectEqual(@as(u16, 400), resp.status_code);
 }
 
-test "API: create run rejects invalid wait duration string" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    var ctx = Context{
-        .store = &store,
-        .allocator = arena.allocator(),
-    };
-
-    const body =
-        \\{"steps":[{"id":"w1","type":"wait","duration_ms":"abc"}]}
-    ;
-
-    const resp = handleRequest(&ctx, "POST", "/runs", body);
-    try std.testing.expectEqual(@as(u16, 400), resp.status_code);
-}
-
-test "API: create run rejects invalid wait signal type" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    var ctx = Context{
-        .store = &store,
-        .allocator = arena.allocator(),
-    };
-
-    const body =
-        \\{"steps":[{"id":"w1","type":"wait","signal":1}]}
-    ;
-
-    const resp = handleRequest(&ctx, "POST", "/runs", body);
-    try std.testing.expectEqual(@as(u16, 400), resp.status_code);
-}
-
 test "API: create run rejects duplicate depends_on items" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
@@ -1664,34 +2395,6 @@ test "API: get step enforces run ownership" {
     try std.testing.expectEqual(@as(u16, 404), resp.status_code);
 }
 
-test "API: chat transcript escapes message content" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    try store.insertRun("run-chat", null, "running", "{\"steps\":[]}", "{}", "[]");
-    try store.insertStep("step-chat-1", "run-chat", "chat", "group_chat", "completed", "{}", 1, null, null, null);
-    try store.insertChatMessage("run-chat", "step-chat-1", 1, "agent", null, "He said \"go\"\\nline");
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    var ctx = Context{
-        .store = &store,
-        .allocator = arena.allocator(),
-    };
-
-    const resp = handleRequest(&ctx, "GET", "/runs/run-chat/steps/step-chat-1/chat", "");
-    try std.testing.expectEqual(@as(u16, 200), resp.status_code);
-
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, resp.body, .{});
-    defer parsed.deinit();
-
-    try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len);
-    const msg = parsed.value.array.items[0].object.get("message").?;
-    try std.testing.expectEqualStrings("He said \"go\"\\nline", msg.string);
-}
-
 test "API: register worker rejects non-array tags" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
@@ -1772,30 +2475,6 @@ test "API: register worker rejects non-positive max_concurrent" {
     try std.testing.expectEqual(@as(u16, 400), resp.status_code);
 }
 
-test "API: approve route does not match extra path segment" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]");
-    try store.insertStep("s1", "r1", "approve-1", "approval", "waiting_approval", "{}", 1, null, null, null);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    var ctx = Context{
-        .store = &store,
-        .allocator = arena.allocator(),
-    };
-
-    const resp = handleRequest(&ctx, "POST", "/runs/r1/steps/s1/approve/extra", "");
-    try std.testing.expectEqual(@as(u16, 404), resp.status_code);
-    try std.testing.expect(std.mem.indexOf(u8, resp.body, "endpoint not found") != null);
-
-    const step = (try store.getStep(arena.allocator(), "s1")).?;
-    try std.testing.expectEqualStrings("waiting_approval", step.status);
-}
-
 test "API: register openai_chat worker requires model" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
@@ -1942,3 +2621,246 @@ test "API: metrics endpoint returns text format" {
     try std.testing.expect(std.mem.startsWith(u8, resp.content_type, "text/plain"));
     try std.testing.expect(std.mem.indexOf(u8, resp.body, "nullboiler_http_requests_total") != null);
 }
+
+test "API: list runs supports workflow_id filter" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    try store.createWorkflowWithVersion("wf_1", "WF 1", "{\"nodes\":{},\"edges\":[]}", 1);
+    try store.createWorkflowWithVersion("wf_2", "WF 2", "{\"nodes\":{},\"edges\":[]}", 1);
+    try store.createRunWithStateAndStatus("r1", "wf_1", "{\"nodes\":{},\"edges\":[]}", "{}", "{}", "running");
+    try store.createRunWithStateAndStatus("r2", "wf_2", "{\"nodes\":{},\"edges\":[]}", "{}", "{}", "running");
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    const resp = handleRequest(&ctx, "GET", "/runs?workflow_id=wf_1", "");
+    try std.testing.expectEqual(@as(u16, 200), resp.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, resp.body, "\"workflow_id\":\"wf_1\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, resp.body, "\"workflow_id\":\"wf_2\"") == null);
+}
+
+test "API: replay run from checkpoint" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    // Create a run with a checkpoint
+    try store.createRunWithState("r1", null, "{\"nodes\":{}}", "{}", "{\"x\":1}");
+    try store.updateRunStatus("r1", "completed", null);
+    try store.createCheckpoint("cp1", "r1", "step_a", null, "{\"x\":1}", "[\"step_a\"]", 1, null);
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    const body =
+        \\{"from_checkpoint_id":"cp1"}
+    ;
+
+    const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body);
+    try std.testing.expectEqual(@as(u16, 200), resp.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, resp.body, "running") != null);
+    try std.testing.expect(std.mem.indexOf(u8, resp.body, "replayed_from_checkpoint") != null);
+
+    // Verify run state was reset to checkpoint state
+    const run = (try store.getRun(arena.allocator(), "r1")).?;
+    try std.testing.expectEqualStrings("running", run.status);
+    if (run.state_json) |sj| {
+        try std.testing.expectEqualStrings("{\"x\":1}", sj);
+    }
+}
+
+test "API: replay run accepts checkpoint_id alias" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    try store.createRunWithState("r1", null, "{\"nodes\":{}}", "{}", "{\"x\":1}");
+    try store.updateRunStatus("r1", "completed", null);
+    try store.createCheckpoint("cp1", "r1", "step_a", null, "{\"x\":1}", "[\"step_a\"]", 1, null);
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    const body =
+        \\{"checkpoint_id":"cp1"}
+    ;
+
+    const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body);
+    try std.testing.expectEqual(@as(u16, 200), resp.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, resp.body, "replayed_from_checkpoint") != null);
+}
+
+test "API: replay run rejects wrong checkpoint" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    // Create two runs, checkpoint belongs to r2
+    try store.createRunWithState("r1", null, "{}", "{}", "{}");
+    try store.createRunWithState("r2", null, "{}", "{}", "{}");
+    try store.createCheckpoint("cp_r2", "r2", "step_a", null, "{}", "[]", 1, null);
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    const body =
+        \\{"from_checkpoint_id":"cp_r2"}
+    ;
+
+    const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body);
+    try std.testing.expectEqual(@as(u16, 400), resp.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, resp.body, "does not belong") != null);
+}
+
+test "API: replay run rejects missing checkpoint" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    try store.createRunWithState("r1", null, "{}", "{}", "{}");
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    const body =
+        \\{"from_checkpoint_id":"nonexistent"}
+    ;
+
+    const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body);
+    try std.testing.expectEqual(@as(u16, 404), resp.status_code);
+}
+
+test "API: replay run rejects missing field" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    try store.createRunWithState("r1", null, "{}", "{}", "{}");
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", "{}");
+    try std.testing.expectEqual(@as(u16, 400), resp.status_code);
+}
+
+test "API: stream with mode query param" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    try store.createRunWithState("r1", null, "{}", "{}", "{\"x\":1}");
+    try store.updateRunStatus("r1", "running", null);
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    // Default (no mode param) — should succeed
+    const resp1 = handleRequest(&ctx, "GET", "/runs/r1/stream", "");
+    try std.testing.expectEqual(@as(u16, 200), resp1.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, resp1.body, "stream_events") != null);
+
+    // With specific modes
+    const resp2 = handleRequest(&ctx, "GET", "/runs/r1/stream?mode=values,debug", "");
+    try std.testing.expectEqual(@as(u16, 200), resp2.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, resp2.body, "stream_events") != null);
+}
+
+test "API: stream supports independent cursors for multiple consumers" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    var hub = sse_mod.SseHub.init(allocator);
+    defer hub.deinit();
+
+    try store.createRunWithState("r1", null, "{}", "{}", "{\"x\":1}");
+    try store.updateRunStatus("r1", "running", null);
+
+    const queue = hub.getOrCreateQueue("r1");
+    queue.push(.{ .event_type = "values", .data = "{\"step\":\"n1\"}", .mode = .values });
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+        .sse_hub = &hub,
+    };
+
+    const consumer_a = handleRequest(&ctx, "GET", "/runs/r1/stream", "");
+    try std.testing.expectEqual(@as(u16, 200), consumer_a.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, consumer_a.body, "\"seq\":1") != null);
+
+    const consumer_b = handleRequest(&ctx, "GET", "/runs/r1/stream", "");
+    try std.testing.expectEqual(@as(u16, 200), consumer_b.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, consumer_b.body, "\"seq\":1") != null);
+
+    queue.push(.{ .event_type = "updates", .data = "{\"step\":\"n2\"}", .mode = .updates });
+    const consumer_a_next = handleRequest(&ctx, "GET", "/runs/r1/stream?after_seq=1", "");
+    try std.testing.expectEqual(@as(u16, 200), consumer_a_next.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, consumer_a_next.body, "\"seq\":2") != null);
+    try std.testing.expect(std.mem.indexOf(u8, consumer_a_next.body, "\"events\":[]") != null);
+    try std.testing.expect(std.mem.indexOf(u8, consumer_a_next.body, "\"next_stream_seq\":2") != null);
+}
+
+test "API: workflow routes decode percent-encoded ids" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    try store.createWorkflowWithVersion("wf/alpha beta", "Encoded Workflow", "{\"nodes\":{},\"edges\":[]}", 1);
+
+    var ctx = Context{
+        .store = &store,
+        .allocator = arena.allocator(),
+    };
+
+    const get_resp = handleRequest(&ctx, "GET", "/workflows/wf%2Falpha%20beta", "");
+    try std.testing.expectEqual(@as(u16, 200), get_resp.status_code);
+    try std.testing.expect(std.mem.indexOf(u8, get_resp.body, "\"id\":\"wf/alpha beta\"") != null);
+
+    const validate_resp = handleRequest(&ctx, "POST", "/workflows/wf%2Falpha%20beta/validate", "");
+    try std.testing.expectEqual(@as(u16, 200), validate_resp.status_code);
+}
diff --git a/src/config.zig b/src/config.zig
index 430f5b2..0d3406e 100644
--- a/src/config.zig
+++ b/src/config.zig
@@ -77,6 +77,7 @@ pub const Config = struct {
     port: u16 = 8080,
     db: []const u8 = "nullboiler.db",
     api_token: ?[]const u8 = null,
+    self_url: ?[]const u8 = null,
     strategies_dir: []const u8 = "strategies",
     workers: []const WorkerConfig = &.{},
     engine: EngineConfig = .{},
diff --git a/src/dispatch.zig b/src/dispatch.zig
index c1ccdbb..65fdadd 100644
--- a/src/dispatch.zig
+++ b/src/dispatch.zig
@@ -14,7 +14,7 @@ pub const WorkerInfo = struct {
     id: []const u8,
     url: []const u8,
     token: []const u8,
-    protocol: []const u8 = "webhook", // "webhook", "api_chat", "openai_chat"
+    protocol: []const u8 = "webhook", // "webhook", "api_chat", "openai_chat", "a2a"
     model: ?[]const u8 = null,
     tags_json: []const u8, // JSON array like ["coder","researcher"]
     max_concurrent: i64,
@@ -84,6 +84,23 @@ fn workerMatchesTags(
     return false;
 }
 
+// ── Agent Step Options ────────────────────────────────────────────────
+
+/// Extra fields included in the webhook body when step type is "agent".
+pub const AgentOpts = struct {
+    /// "autonomous" or "managed"
+    mode: ?[]const u8 = null,
+    /// Full callback URL for agent events; if null, omitted from body.
+    /// Typically constructed as: self_url + "/internal/agent-events/{run_id}/{step_id}"
+    callback_url: ?[]const u8 = null,
+    /// Maximum agent iterations; if null, omitted from body.
+    max_iterations: ?i64 = null,
+    /// JSON array of tool names, e.g. "[\"search\",\"code\"]"; if null, omitted from body.
+    tools_json: ?[]const u8 = null,
+    /// Current state JSON to pass to the agent; if null, omitted from body.
+    state_json: ?[]const u8 = null,
+};
+
 // ── HTTP Dispatch ─────────────────────────────────────────────────────
 
 pub fn dispatchStep(
@@ -95,6 +112,24 @@ pub fn dispatchStep(
     run_id: []const u8,
     step_id: []const u8,
     rendered_prompt: []const u8,
+) !DispatchResult {
+    return dispatchStepWithOpts(allocator, worker_url, worker_token, worker_protocol_raw, worker_model, run_id, step_id, rendered_prompt, null);
+}
+
+/// Like dispatchStep but also accepts optional agent-specific fields.
+/// When agent_opts is non-null and the protocol is webhook, the additional
+/// fields (mode, callback_url, max_iterations, tools, state) are merged
+/// into the request body.
+pub fn dispatchStepWithOpts(
+    allocator: std.mem.Allocator,
+    worker_url: []const u8,
+    worker_token: []const u8,
+    worker_protocol_raw: []const u8,
+    worker_model: ?[]const u8,
+    run_id: []const u8,
+    step_id: []const u8,
+    rendered_prompt: []const u8,
+    agent_opts: ?AgentOpts,
 ) !DispatchResult {
     const protocol = worker_protocol.parse(worker_protocol_raw) orelse {
         const err_msg = try std.fmt.allocPrint(allocator, "unsupported worker protocol: {s}", .{worker_protocol_raw});
@@ -131,6 +166,7 @@ pub fn dispatchStep(
         run_id,
         step_id,
         rendered_prompt,
+        agent_opts,
     ) catch |err| switch (err) {
         error.MissingWorkerModel => {
             return DispatchResult{
@@ -195,6 +231,12 @@ pub fn dispatchStep(
     }
 
     const response_data = response_body.written();
+
+    // A2A uses JSON-RPC 2.0 responses; parse them with the A2A-specific parser
+    if (protocol == .a2a) {
+        return try parseA2aResponse(allocator, response_data);
+    }
+
     return try worker_response.parse(allocator, response_data);
 }
 
@@ -205,7 +247,7 @@ pub fn probeWorker(
 ) bool {
     const protocol = worker_protocol.parse(worker_protocol_raw) orelse return false;
 
-    // Async protocols (mqtt/redis_stream) can't be probed via HTTP
+    // Async protocols (mqtt/redis_stream) can't be probed via HTTP; a2a is probed via its own endpoint
     if (protocol == .mqtt or protocol == .redis_stream) return true;
 
     const url = worker_protocol.buildRequestUrl(allocator, worker_url, protocol) catch return false;
@@ -234,12 +276,18 @@ fn buildRequestBody(
     run_id: []const u8,
     step_id: []const u8,
     rendered_prompt: []const u8,
+    agent_opts: ?AgentOpts,
 ) ![]const u8 {
     const session_key = try std.fmt.allocPrint(allocator, "run_{s}_step_{s}", .{ run_id, step_id });
     defer allocator.free(session_key);
 
     switch (protocol) {
         .webhook => {
+            // For agent steps with opts, build an extended body that includes
+            // agent-specific fields alongside the standard webhook fields.
+            if (agent_opts) |opts| {
+                return buildWebhookAgentBody(allocator, session_key, rendered_prompt, opts);
+            }
             return std.json.Stringify.valueAlloc(allocator, .{
                 .message = rendered_prompt,
                 .text = rendered_prompt,
@@ -267,6 +315,9 @@ fn buildRequestBody(
                 .messages = messages[0..],
             }, .{});
         },
+        .a2a => {
+            return buildA2aRequestBody(allocator, rendered_prompt, session_key);
+        },
         .mqtt, .redis_stream => {
             // MQTT and Redis Stream use async dispatch; body built by their respective clients
             return std.json.Stringify.valueAlloc(allocator, .{
@@ -277,6 +328,247 @@ fn buildRequestBody(
     }
 }
 
+/// Build the webhook JSON body for an agent step, merging standard fields with
+/// agent-specific optional fields (mode, callback_url, max_iterations, tools, state).
+/// Only non-null fields from agent_opts are included in the output.
+fn buildWebhookAgentBody(
+    allocator: std.mem.Allocator,
+    session_key: []const u8,
+    rendered_prompt: []const u8,
+    opts: AgentOpts,
+) ![]const u8 {
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
+    errdefer buf.deinit(allocator);
+
+    // Standard webhook fields
+    try buf.appendSlice(allocator, "{\"message\":");
+    try appendJsonString(&buf, allocator, rendered_prompt);
+    try buf.appendSlice(allocator, ",\"text\":");
+    try appendJsonString(&buf, allocator, rendered_prompt);
+    try buf.appendSlice(allocator, ",\"session_key\":");
+    try appendJsonString(&buf, allocator, session_key);
+    try buf.appendSlice(allocator, ",\"session_id\":");
+    try appendJsonString(&buf, allocator, session_key);
+
+    // Optional agent fields
+    if (opts.mode) |mode| {
+        try buf.appendSlice(allocator, ",\"mode\":");
+        try appendJsonString(&buf, allocator, mode);
+    }
+    if (opts.callback_url) |cb_url| {
+        try buf.appendSlice(allocator, ",\"callback_url\":");
+        try appendJsonString(&buf, allocator, cb_url);
+    }
+    if (opts.max_iterations) |max_iter| {
+        const field = try std.fmt.allocPrint(allocator, ",\"max_iterations\":{d}", .{max_iter});
+        defer allocator.free(field);
+        try buf.appendSlice(allocator, field);
+    }
+    if (opts.tools_json) |tools| {
+        // tools_json is already a JSON array string — embed it verbatim
+        try buf.appendSlice(allocator, ",\"tools\":");
+        try buf.appendSlice(allocator, tools);
+    }
+    if (opts.state_json) |state| {
+        // state_json is already a JSON object/value — embed it verbatim
+        try buf.appendSlice(allocator, ",\"state\":");
+        try buf.appendSlice(allocator, state);
+    }
+
+    try buf.append(allocator, '}');
+
+    return buf.toOwnedSlice(allocator);
+}
+
+/// Append a JSON-encoded string (with surrounding quotes and escapes) to buf.
+fn appendJsonString(buf: *std.ArrayListUnmanaged(u8), allocator: std.mem.Allocator, s: []const u8) !void {
+    try buf.append(allocator, '"');
+    for (s) |byte| {
+        switch (byte) {
+            '"' => try buf.appendSlice(allocator, "\\\""),
+            '\\' => try buf.appendSlice(allocator, "\\\\"),
+            '\n' => try buf.appendSlice(allocator, "\\n"),
+            '\r' => try buf.appendSlice(allocator, "\\r"),
+            '\t' => try buf.appendSlice(allocator, "\\t"),
+            0x00...0x08, 0x0b, 0x0c, 0x0e...0x1f => {
+                const escaped = try std.fmt.allocPrint(allocator, "\\u{x:0>4}", .{byte});
+                defer allocator.free(escaped);
+                try buf.appendSlice(allocator, escaped);
+            },
+            else => try buf.append(allocator, byte),
+        }
+    }
+    try buf.append(allocator, '"');
+}
+
+// ── A2A Protocol Support ──────────────────────────────────────────────
+
+/// Build an A2A (Agent-to-Agent) JSON-RPC 2.0 request body using tasks/send.
+/// The context_id provides session persistence — same context_id means same conversation.
+fn buildA2aRequestBody(
+    allocator: std.mem.Allocator,
+    prompt: []const u8,
+    context_id: []const u8,
+) ![]const u8 {
+    // Build the parts array
+    const parts = [_]struct {
+        type: []const u8,
+        text: []const u8,
+    }{
+        .{ .type = "text", .text = prompt },
+    };
+
+    // Build the message
+    const message = .{
+        .role = "user",
+        .parts = parts[0..],
+    };
+
+    // Build the params
+    const params = .{
+        .message = message,
+        .contextId = context_id,
+    };
+
+    // Build the full JSON-RPC request
+    return std.json.Stringify.valueAlloc(allocator, .{
+        .jsonrpc = "2.0",
+        .id = context_id,
+        .method = "tasks/send",
+        .params = params,
+    }, .{});
+}
+
+/// Parse an A2A JSON-RPC 2.0 response and extract the text from the first artifact.
+/// Expected structure: result.artifacts[0].parts[0].text (or .kind=="text")
+/// Also checks for JSON-RPC error responses.
+fn parseA2aResponse(allocator: std.mem.Allocator, response_body: []const u8) !DispatchResult {
+    const parsed = std.json.parseFromSlice(std.json.Value, allocator, response_body, .{}) catch {
+        return DispatchResult{
+            .output = "",
+            .success = false,
+            .error_text = "A2A: invalid JSON response",
+        };
+    };
+    defer parsed.deinit();
+
+    if (parsed.value != .object) {
+        return DispatchResult{
+            .output = "",
+            .success = false,
+            .error_text = "A2A: response is not a JSON object",
+        };
+    }
+    const obj = parsed.value.object;
+
+    // Check for JSON-RPC error
+    if (obj.get("error")) |err_val| {
+        if (err_val == .object) {
+            if (err_val.object.get("message")) |msg_val| {
+                if (msg_val == .string) {
+                    return DispatchResult{
+                        .output = "",
+                        .success = false,
+                        .error_text = try allocator.dupe(u8, msg_val.string),
+                    };
+                }
+            }
+        }
+        return DispatchResult{
+            .output = "",
+            .success = false,
+            .error_text = "A2A: JSON-RPC error",
+        };
+    }
+
+    // Extract result
+    const result_val = obj.get("result") orelse {
+        return DispatchResult{
+            .output = "",
+            .success = false,
+            .error_text = "A2A: missing result field",
+        };
+    };
+    if (result_val != .object) {
+        return DispatchResult{
+            .output = "",
+            .success = false,
+            .error_text = "A2A: result is not an object",
+        };
+    }
+    const result_obj = result_val.object;
+
+    // Check task status
+    if (result_obj.get("status")) |status_val| {
+        if (status_val == .object) {
+            if (status_val.object.get("state")) |state_val| {
+                if (state_val == .string) {
+                    if (std.mem.eql(u8, state_val.string, "failed")) {
+                        // Extract error message from status if available
+                        if (status_val.object.get("message")) |msg| {
+                            if (msg == .object) {
+                                if (msg.object.get("parts")) |msg_parts| {
+                                    if (msg_parts == .array and msg_parts.array.items.len > 0) {
+                                        const first_part = msg_parts.array.items[0];
+                                        if (first_part == .object) {
+                                            if (first_part.object.get("text")) |t| {
+                                                if (t == .string) {
+                                                    return DispatchResult{
+                                                        .output = "",
+                                                        .success = false,
+                                                        .error_text = try allocator.dupe(u8, t.string),
+                                                    };
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        return DispatchResult{
+                            .output = "",
+                            .success = false,
+                            .error_text = "A2A: task failed",
+                        };
+                    }
+                }
+            }
+        }
+    }
+
+    // Extract text from artifacts[0].parts[0].text
+    if (result_obj.get("artifacts")) |artifacts_val| {
+        if (artifacts_val == .array and artifacts_val.array.items.len > 0) {
+            const first_artifact = artifacts_val.array.items[0];
+            if (first_artifact == .object) {
+                if (first_artifact.object.get("parts")) |parts_val| {
+                    if (parts_val == .array and parts_val.array.items.len > 0) {
+                        const first_part = parts_val.array.items[0];
+                        if (first_part == .object) {
+                            // Check for "text" field (A2A uses "text" key for text parts)
+                            if (first_part.object.get("text")) |text_val| {
+                                if (text_val == .string) {
+                                    return DispatchResult{
+                                        .output = try allocator.dupe(u8, text_val.string),
+                                        .success = true,
+                                        .error_text = null,
+                                    };
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return DispatchResult{
+        .output = "",
+        .success = false,
+        .error_text = "A2A: no text found in artifacts",
+    };
+}
+
 /// Build the wire-format JSON body for async (MQTT/Redis) dispatch.
 /// Includes correlation_id, reply_to topic/stream, timestamp, auth token,
 /// the rendered prompt, and a session_key matching the correlation_id.
@@ -623,8 +915,54 @@ test "buildRequestBody: openai_chat requires model" {
     const allocator = std.testing.allocator;
     try std.testing.expectError(
         error.MissingWorkerModel,
-        buildRequestBody(allocator, .openai_chat, null, "run-1", "step-1", "hello"),
+        buildRequestBody(allocator, .openai_chat, null, "run-1", "step-1", "hello", null),
+    );
+}
+
+test "buildWebhookAgentBody: includes all agent fields when present" {
+    const allocator = std.testing.allocator;
+    const opts = AgentOpts{
+        .mode = "autonomous",
+        .callback_url = "http://localhost:8080/internal/agent-events/run-1/step-1",
+        .max_iterations = 25,
+        .tools_json = "[\"search\",\"code\"]",
+        .state_json = "{\"foo\":\"bar\"}",
+    };
+    const body = try buildRequestBody(allocator, .webhook, null, "run-1", "step-1", "do something", opts);
+    defer allocator.free(body);
+
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{});
+    defer parsed.deinit();
+    const obj = parsed.value.object;
+
+    try std.testing.expectEqualStrings("do something", obj.get("message").?.string);
+    try std.testing.expectEqualStrings("autonomous", obj.get("mode").?.string);
+    try std.testing.expectEqualStrings(
+        "http://localhost:8080/internal/agent-events/run-1/step-1",
+        obj.get("callback_url").?.string,
     );
+    try std.testing.expectEqual(@as(i64, 25), obj.get("max_iterations").?.integer);
+    // tools and state are embedded JSON — check they round-trip
+    const tools_arr = obj.get("tools").?.array;
+    try std.testing.expectEqual(@as(usize, 2), tools_arr.items.len);
+    try std.testing.expectEqualStrings("search", tools_arr.items[0].string);
+}
+
+test "buildWebhookAgentBody: omits null agent fields" {
+    const allocator = std.testing.allocator;
+    const opts = AgentOpts{ .mode = "managed" };
+    const body = try buildRequestBody(allocator, .webhook, null, "run-1", "step-1", "hello", opts);
+    defer allocator.free(body);
+
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{});
+    defer parsed.deinit();
+    const obj = parsed.value.object;
+
+    try std.testing.expectEqualStrings("managed", obj.get("mode").?.string);
+    try std.testing.expect(obj.get("callback_url") == null);
+    try std.testing.expect(obj.get("max_iterations") == null);
+    try std.testing.expect(obj.get("tools") == null);
+    try std.testing.expect(obj.get("state") == null);
 }
 
 test "buildAsyncRequestBody: produces valid wire-format JSON with all fields" {
@@ -703,3 +1041,92 @@ test "dispatchRedis: invalid URL returns error" {
     try std.testing.expect(!result.success);
     try std.testing.expectEqualStrings("invalid redis:// URL", result.error_text.?);
 }
+
+test "buildA2aRequestBody: produces valid JSON-RPC 2.0 request" {
+    const allocator = std.testing.allocator;
+    const body = try buildA2aRequestBody(allocator, "Fix the bug in main.py", "run_abc_step_fix");
+    defer allocator.free(body);
+
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{});
+    defer parsed.deinit();
+    const obj = parsed.value.object;
+
+    try std.testing.expectEqualStrings("2.0", obj.get("jsonrpc").?.string);
+    try std.testing.expectEqualStrings("run_abc_step_fix", obj.get("id").?.string);
+    try std.testing.expectEqualStrings("tasks/send", obj.get("method").?.string);
+
+    const params = obj.get("params").?.object;
+    try std.testing.expectEqualStrings("run_abc_step_fix", params.get("contextId").?.string);
+
+    const message = params.get("message").?.object;
+    try std.testing.expectEqualStrings("user", message.get("role").?.string);
+
+    const parts = message.get("parts").?.array;
+    try std.testing.expectEqual(@as(usize, 1), parts.items.len);
+    try std.testing.expectEqualStrings("text", parts.items[0].object.get("type").?.string);
+    try std.testing.expectEqualStrings("Fix the bug in main.py", parts.items[0].object.get("text").?.string);
+}
+
+test "buildRequestBody: a2a protocol produces JSON-RPC body" {
+    const allocator = std.testing.allocator;
+    const body = try buildRequestBody(allocator, .a2a, null, "run-1", "step-1", "hello agent", null);
+    defer allocator.free(body);
+
+    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{});
+    defer parsed.deinit();
+    const obj = parsed.value.object;
+
+    try std.testing.expectEqualStrings("2.0", obj.get("jsonrpc").?.string);
+    try std.testing.expectEqualStrings("tasks/send", obj.get("method").?.string);
+    // context_id is "run_{run_id}_step_{step_id}"
+    try std.testing.expectEqualStrings("run_run-1_step_step-1", obj.get("id").?.string);
+}
+
+test "parseA2aResponse: extracts text from successful response" {
+    const allocator = std.testing.allocator;
+    const response =
+        \\{"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","contextId":"ctx-1","status":{"state":"completed","timestamp":"2025-01-01T00:00:00Z"},"artifacts":[{"artifactId":"a1","parts":[{"kind":"text","text":"The bug has been fixed."}]}]}}
+    ;
+    const result = try parseA2aResponse(allocator, response);
+    defer allocator.free(result.output);
+    try std.testing.expect(result.success);
+    try std.testing.expectEqualStrings("The bug has been fixed.", result.output);
+}
+
+test "parseA2aResponse: handles JSON-RPC error" {
+    const allocator = std.testing.allocator;
+    const response =
+        \\{"jsonrpc":"2.0","id":"req-1","error":{"code":-32600,"message":"Invalid Request"}}
+    ;
+    const result = try parseA2aResponse(allocator, response);
+    defer allocator.free(result.error_text.?);
+    try std.testing.expect(!result.success);
+    try std.testing.expectEqualStrings("Invalid Request", result.error_text.?);
+}
+
+test "parseA2aResponse: handles failed task status" {
+    const allocator = std.testing.allocator;
+    const response =
+        \\{"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","status":{"state":"failed"}}}
+    ;
+    const result = try parseA2aResponse(allocator, response);
+    try std.testing.expect(!result.success);
+    try std.testing.expectEqualStrings("A2A: task failed", result.error_text.?);
+}
+
+test "parseA2aResponse: handles missing artifacts" {
+    const allocator = std.testing.allocator;
+    const response =
+        \\{"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","status":{"state":"completed"}}}
+    ;
+    const result = try parseA2aResponse(allocator, response);
+    try std.testing.expect(!result.success);
+    try std.testing.expectEqualStrings("A2A: no text found in artifacts", result.error_text.?);
+}
+
+test "parseA2aResponse: handles invalid JSON" {
+    const allocator = std.testing.allocator;
+    const result = try parseA2aResponse(allocator, "not json");
+    try std.testing.expect(!result.success);
+    try std.testing.expectEqualStrings("A2A: invalid JSON response", result.error_text.?);
+}
diff --git a/src/engine.zig b/src/engine.zig
index c3b3baa..e2727c4 100644
--- a/src/engine.zig
+++ b/src/engine.zig
@@ -1,15 +1,30 @@
-/// DAG Engine — Scheduler Loop
+/// DAG Engine — Unified State Model Scheduler
 ///
 /// The engine runs on its own thread, polling the database for active runs
-/// and processing their steps according to the DAG dependencies.
+/// and processing them using a graph-based state model with 7 node types:
+///   task, route, interrupt, agent, send, transform, subgraph
 ///
 /// Each tick:
-///   1. Get active runs
-///   2. For each run, promote pending steps to ready
-///   3. Process ready steps by type (task, fan_out, map, reduce, condition, approval)
-///   4. Check run completion
+///   1. Get active runs (status = running)
+///   2. For each run:
+///      a. Load current state from run.state_json
+///      b. Load workflow definition from run.workflow_json
+///      c. Get completed nodes from latest checkpoint (or [])
+///      d. Find ready nodes (all nodes whose inbound edges are satisfied)
+///      e. Execute ready nodes in sequence
+///      f. Apply state updates via reducers, save checkpoint
+///      g. Check termination / deadlock
+///
+/// Features:
+///   - Command primitive (goto): worker responses can contain "goto" to override routing
+///   - Breakpoints: interrupt_before / interrupt_after arrays in workflow definition
+///   - Subgraph: inline execution of child workflows with input/output mapping
+///   - Multi-turn: agent nodes can loop with continuation_prompt up to max_turns
+///   - Configurable runs: config stored as state.__config, accessible via templates
+///   - Reconciliation: check nulltickets task status between steps
 const std = @import("std");
 const log = std.log.scoped(.engine);
+const json = std.json;
 
 const Store = @import("store.zig").Store;
 const types = @import("types.zig");
@@ -19,6 +34,110 @@ const dispatch = @import("dispatch.zig");
 const callbacks = @import("callbacks.zig");
 const metrics_mod = @import("metrics.zig");
 const async_dispatch = @import("async_dispatch.zig");
+const state_mod = @import("state.zig");
+const sse_mod = @import("sse.zig");
+const tracker_client = @import("tracker_client.zig");
+const workflow_loader = @import("workflow_loader.zig");
+
+// ── Structured Events ────────────────────────────────────────────────
+
+pub const OrchestratorEvent = struct {
+    event_type: EventType,
+    run_id: ?[]const u8,
+    step_id: ?[]const u8,
+    node_name: ?[]const u8,
+    timestamp_ms: i64,
+    metadata_json: ?[]const u8,
+
+    pub const EventType = enum {
+        run_started,
+        run_completed,
+        run_failed,
+        run_interrupted,
+        run_cancelled,
+        step_started,
+        step_completed,
+        step_failed,
+        step_retrying,
+        agent_turn_started,
+        agent_turn_completed,
+        workflow_reloaded,
+        checkpoint_created,
+        state_injected,
+    };
+
+    pub fn eventKindString(et: EventType) []const u8 {
+        return switch (et) {
+            .run_started => "run.started",
+            .run_completed => "run.completed",
+            .run_failed => "run.failed",
+            .run_interrupted => "run.interrupted",
+            .run_cancelled => "run.cancelled",
+            .step_started => "step.started",
+            .step_completed => "step.completed",
+            .step_failed => "step.failed",
+            .step_retrying => "step.retrying",
+            .agent_turn_started => "agent_turn.started",
+            .agent_turn_completed => "agent_turn.completed",
+            .workflow_reloaded => "workflow.reloaded",
+            .checkpoint_created => "checkpoint.created",
+            .state_injected => "state.injected",
+        };
+    }
+
+    pub fn toJson(self: OrchestratorEvent, alloc: std.mem.Allocator) ?[]const u8 {
+        return std.fmt.allocPrint(alloc,
+            \\{{"event_type":"{s}","run_id":"{s}","step_id":"{s}","node_name":"{s}","timestamp_ms":{d}}}
+        , .{
+            eventKindString(self.event_type),
+            self.run_id orelse "",
+            self.step_id orelse "",
+            self.node_name orelse "",
+            self.timestamp_ms,
+        }) catch null;
+    }
+};
+
+// ── Constants ────────────────────────────────────────────────────────
+
+/// Maximum number of node executions per tick to prevent infinite loops.
+const max_nodes_per_tick: u32 = 1000;
+
+/// Maximum inline subgraph recursion depth.
+const max_subgraph_depth: u32 = 10;
+
+const StoreWriter = *const fn (
+    alloc: std.mem.Allocator,
+    base_url: []const u8,
+    api_token: ?[]const u8,
+    namespace: []const u8,
+    key: []const u8,
+    value_json: []const u8,
+) anyerror!void;
+
+const TrackerRuntime = struct {
+    base_url: []const u8,
+    api_token: ?[]const u8,
+
+    fn storeAccess(self: TrackerRuntime, fetcher: templates.StoreFetcher) templates.StoreAccess {
+        return .{
+            .base_url = self.base_url,
+            .api_token = self.api_token,
+            .fetcher = fetcher,
+        };
+    }
+};
+
+const RuntimeBindings = struct {
+    input_json: ?[]const u8,
+    task_id: ?[]const u8,
+    tracker: ?TrackerRuntime,
+
+    fn storeAccess(self: RuntimeBindings, fetcher: templates.StoreFetcher) ?templates.StoreAccess {
+        const tracker = self.tracker orelse return null;
+        return tracker.storeAccess(fetcher);
+    }
+};
 
 // ── Engine ───────────────────────────────────────────────────────────
 
@@ -32,6 +151,14 @@ pub const RuntimeConfig = struct {
     retry_max_elapsed_ms: i64 = 900_000,
 };
 
+pub const RateLimitInfo = struct {
+    worker_id: []const u8,
+    remaining: i64,
+    limit: i64,
+    reset_ms: i64,
+    updated_at_ms: i64,
+};
+
 pub const Engine = struct {
     store: *Store,
     allocator: std.mem.Allocator,
@@ -41,11 +168,18 @@ pub const Engine = struct {
     next_health_check_at_ms: i64,
     metrics: ?*metrics_mod.Metrics,
     response_queue: ?*async_dispatch.ResponseQueue,
-
-    const TaskPromptSource = union(enum) {
-        rendered: []const u8,
-        template: []const u8,
-    };
+    sse_hub: ?*sse_mod.SseHub = null,
+    workflow_watcher: ?*workflow_loader.WorkflowWatcher = null,
+    rate_limits: std.StringHashMap(RateLimitInfo),
+    store_fetcher: templates.StoreFetcher,
+    store_writer: StoreWriter,
+    trusted_tracker_url: ?[]const u8 = null,
+    trusted_tracker_api_token: ?[]const u8 = null,
+    config_valid: bool = false,
+    last_config_check_ms: i64 = 0,
+
+    /// How often to re-run config validation (default 30s).
+    const config_check_interval_ms: i64 = 30_000;
 
     pub fn init(store: *Store, allocator: std.mem.Allocator, poll_interval_ms: u64) Engine {
         return .{
@@ -57,6 +191,15 @@ pub const Engine = struct {
             .next_health_check_at_ms = 0,
             .metrics = null,
             .response_queue = null,
+            .sse_hub = null,
+            .workflow_watcher = null,
+            .rate_limits = std.StringHashMap(RateLimitInfo).init(allocator),
+            .store_fetcher = templates.fetchStoreValueHttp,
+            .store_writer = putStoreValueViaHttp,
+            .trusted_tracker_url = null,
+            .trusted_tracker_api_token = null,
+            .config_valid = false,
+            .last_config_check_ms = 0,
         };
     }
 
@@ -65,6 +208,11 @@ pub const Engine = struct {
         self.metrics = metrics;
     }
 
+    pub fn setTrustedTrackerAccess(self: *Engine, base_url: ?[]const u8, api_token: ?[]const u8) void {
+        self.trusted_tracker_url = base_url;
+        self.trusted_tracker_api_token = api_token;
+    }
+
     pub fn stop(self: *Engine) void {
         self.running.store(false, .release);
     }
@@ -80,6 +228,87 @@ pub const Engine = struct {
         log.info("engine stopped", .{});
     }
 
+    // ── Config Validation ────────────────────────────────────────────
+
+    /// Validate that the engine configuration is healthy before dispatching
+    /// new work. Returns true if workers exist and the store is reachable.
+    /// Results are cached for config_check_interval_ms to avoid running
+    /// 2 DB queries (listWorkers + getActiveRuns) on every tick.
+    fn validateConfig(self: *Engine) bool {
+        const now_ms = ids.nowMs();
+        if (self.config_valid and (now_ms - self.last_config_check_ms) < config_check_interval_ms) {
+            return true;
+        }
+
+        // Check: at least one worker registered and active
+        var arena = std.heap.ArenaAllocator.init(self.allocator);
+        defer arena.deinit();
+        const alloc = arena.allocator();
+
+        const workers = self.store.listWorkers(alloc) catch {
+            log.warn("config validation: store query failed (listWorkers)", .{});
+            self.config_valid = false;
+            return false;
+        };
+
+        if (workers.len == 0) {
+            log.warn("config validation: no workers registered", .{});
+            self.config_valid = false;
+            return false;
+        }
+
+        // Check: store connection healthy (simple query)
+        _ = self.store.getActiveRuns(alloc) catch {
+            log.warn("config validation: store connection unhealthy", .{});
+            self.config_valid = false;
+            return false;
+        };
+
+        self.config_valid = true;
+        self.last_config_check_ms = now_ms;
+        return true;
+    }
+
+    // ── Structured Event Emission ────────────────────────────────────
+
+    /// Emit a structured OrchestratorEvent: persist to the events table and
+    /// broadcast via SseHub for real-time consumption.
+    fn emitEvent(
+        self: *Engine,
+        alloc: std.mem.Allocator,
+        event_type: OrchestratorEvent.EventType,
+        run_id: ?[]const u8,
+        step_id: ?[]const u8,
+        node_name: ?[]const u8,
+        metadata_json: ?[]const u8,
+    ) void {
+        const ev = OrchestratorEvent{
+            .event_type = event_type,
+            .run_id = run_id,
+            .step_id = step_id,
+            .node_name = node_name,
+            .timestamp_ms = ids.nowMs(),
+            .metadata_json = metadata_json,
+        };
+
+        const kind = OrchestratorEvent.eventKindString(event_type);
+        const data = ev.toJson(alloc) orelse "{}";
+
+        // Persist to events table
+        if (run_id) |rid| {
+            self.store.insertEvent(rid, step_id, kind, data) catch |err| {
+                log.warn("failed to persist event {s}: {}", .{ kind, err });
+            };
+        }
+
+        // Broadcast via SSE
+        if (self.sse_hub) |hub| {
+            if (run_id) |rid| {
+                hub.broadcast(rid, .{ .event_type = kind, .data = data });
+            }
+        }
+    }
+
     // ── tick — single scheduler iteration ────────────────────────────
 
     fn tick(self: *Engine) !void {
@@ -87,6 +316,17 @@ pub const Engine = struct {
         defer arena.deinit();
         const alloc = arena.allocator();
 
+        // Validate config before processing — skip dispatch if unhealthy
+        if (!self.validateConfig()) {
+            log.warn("config validation failed, skipping dispatch this tick", .{});
+            return;
+        }
+
+        // Check for hot-reloaded workflow files
+        if (self.workflow_watcher) |watcher| {
+            watcher.checkForChanges();
+        }
+
         const now_ms = ids.nowMs();
         if (now_ms >= self.next_health_check_at_ms) {
             self.runWorkerHealthChecks(alloc, now_ms) catch |err| {
@@ -137,693 +377,824 @@ pub const Engine = struct {
         }
     }
 
-    // ── processRun ───────────────────────────────────────────────────
+    // ── processRun — state-based graph execution ─────────────────────
 
     fn processRun(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow) !void {
-        // 1. Get all steps for this run
-        const steps = try self.store.getStepsByRun(alloc, run_row.id);
-
-        // 2. Promote pending -> ready: for each pending step, check if
-        //    all its deps are completed/skipped.
-        for (steps) |step| {
-            if (!std.mem.eql(u8, step.status, "pending")) continue;
-
-            const dep_ids = try self.store.getStepDeps(alloc, step.id);
-            var all_deps_met = true;
-
-            for (dep_ids) |dep_id| {
-                // Find the dep step status from our already-fetched steps
-                const dep_status = findStepStatus(steps, dep_id);
-                if (dep_status) |ds| {
-                    if (!std.mem.eql(u8, ds, "completed") and !std.mem.eql(u8, ds, "skipped")) {
-                        all_deps_met = false;
-                        break;
+        return self.processRunWithDepth(alloc, run_row, 0);
+    }
+
+    /// Wrapper for inline subgraph execution. Uses anyerror to break
+    /// the recursive inferred-error-set cycle.
+    fn processRunInline(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, recursion_depth: u32) void {
+        self.processRunWithDepth(alloc, run_row, recursion_depth) catch |err| {
+            log.err("inline subgraph run {s} failed: {}", .{ run_row.id, err });
+        };
+    }
+
+    fn processRunWithDepth(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, recursion_depth: u32) !void {
+        // 1. Load current state
+        var current_state = run_row.state_json orelse "{}";
+
+        // 1b. Inject __config into state (configurable runs)
+        if (run_row.config_json) |config_str| {
+            if (config_str.len > 0) {
+                const config_update = std.fmt.allocPrint(alloc, "{{\"__config\":{s}}}", .{config_str}) catch null;
+                if (config_update) |cu| {
+                    // Simple merge: parse state, add __config key
+                    const merged = state_mod.applyUpdates(alloc, current_state, cu, "{}") catch null;
+                    if (merged) |m| {
+                        current_state = m;
+                    }
+                }
+            }
+        }
+
+        // 2. Load and parse workflow definition once for the entire tick.
+        // Helper functions still accept raw JSON strings for external callers,
+        // but we pre-extract commonly used values here to avoid redundant parsing.
+        const workflow_json = run_row.workflow_json;
+        const wf_parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch {
+            log.err("failed to parse workflow_json for run {s}", .{run_row.id});
+            try self.store.updateRunStatus(run_row.id, "failed", "invalid workflow JSON");
+            return;
+        };
+        const wf_root = wf_parsed.value;
+
+        // Pre-extract schema (used many times in the loop)
+        const cached_schema_json = if (wf_root == .object) blk: {
+            if (wf_root.object.get("state_schema")) |ss| {
+                break :blk serializeJsonValue(alloc, ss) catch "{}";
+            }
+            if (wf_root.object.get("schema")) |ss| {
+                break :blk serializeJsonValue(alloc, ss) catch "{}";
+            }
+            break :blk "{}";
+        } else "{}";
+
+        // 2b. Parse breakpoint lists from workflow definition
+        const interrupt_before = parseBreakpointListFromRoot(alloc, wf_root, "interrupt_before");
+        const interrupt_after = parseBreakpointListFromRoot(alloc, wf_root, "interrupt_after");
+
+        // 2d. Collect deferred nodes (Gap 6)
+        const deferred_nodes = collectDeferredNodesFromRoot(alloc, wf_root);
+
+        // 2c. Get task id for reconciliation.
+        const runtime = self.buildRuntimeBindings(alloc, workflow_json, current_state, run_row.input_json);
+        const task_id = runtime.task_id;
+
+        // 3. Get completed nodes from latest checkpoint
+        var completed_nodes = std.StringHashMap(void).init(alloc);
+        var route_results = std.StringHashMap([]const u8).init(alloc);
+
+        const latest_checkpoint = try self.store.getLatestCheckpoint(alloc, run_row.id);
+        if (latest_checkpoint) |cp| {
+            // Parse completed_nodes_json array
+            const cn_parsed = json.parseFromSlice(json.Value, alloc, cp.completed_nodes_json, .{}) catch null;
+            if (cn_parsed) |p| {
+                if (p.value == .array) {
+                    for (p.value.array.items) |item| {
+                        if (item == .string) {
+                            try completed_nodes.put(item.string, {});
+                        }
                     }
-                } else {
-                    // Dep step not found — treat as unmet
-                    all_deps_met = false;
-                    break;
                 }
             }
 
-            if (all_deps_met) {
-                try self.store.updateStepStatus(step.id, "ready", null, null, null, step.attempt);
-                log.info("promoted step {s} to ready", .{step.id});
+            // Parse route results from checkpoint metadata
+            if (cp.metadata_json) |meta_str| {
+                const meta_parsed = json.parseFromSlice(json.Value, alloc, meta_str, .{}) catch null;
+                if (meta_parsed) |mp| {
+                    if (mp.value == .object) {
+                        if (mp.value.object.get("route_results")) |rr| {
+                            if (rr == .object) {
+                                var it = rr.object.iterator();
+                                while (it.next()) |entry| {
+                                    if (entry.value_ptr.* == .string) {
+                                        try route_results.put(entry.key_ptr.*, entry.value_ptr.string);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
             }
         }
 
-        // 3. Re-fetch steps to get updated statuses
-        const updated_steps = try self.store.getStepsByRun(alloc, run_row.id);
+        var version: i64 = if (latest_checkpoint) |cp| cp.version else 0;
+        const initial_version = version;
 
-        // 4. Process ready steps based on their type
-        for (updated_steps) |step| {
-            if (!std.mem.eql(u8, step.status, "ready")) continue;
+        // Track the latest checkpoint ID for correct parent chaining.
+        // Updated after each checkpoint creation so subsequent checkpoints
+        // within the same tick correctly chain to their predecessor.
+        var latest_checkpoint_id: ?[]const u8 = if (latest_checkpoint) |cp| cp.id else null;
 
-            if (std.mem.eql(u8, step.type, "task")) {
-                self.executeTaskStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing task step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "fan_out")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeFanOutStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing fan_out step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "map")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeMapStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing map step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "reduce")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeReduceStep(alloc, run_row, step, updated_steps) catch |err| {
-                    log.err("error executing reduce step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "condition")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeConditionStep(alloc, run_row, step, updated_steps) catch |err| {
-                    log.err("error executing condition step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "approval")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeApprovalStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing approval step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "transform")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeTransformStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing transform step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "wait")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeWaitStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing wait step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "router")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeRouterStep(alloc, run_row, step, updated_steps) catch |err| {
-                    log.err("error executing router step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "loop")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeLoopStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing loop step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "sub_workflow")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeSubWorkflowStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing sub_workflow step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "debate")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeDebateStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing debate step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "group_chat")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeGroupChatStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing group_chat step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "saga")) {
-                const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false;
-                if (!claimed) continue;
-                self.executeSagaStep(alloc, run_row, step) catch |err| {
-                    log.err("error executing saga step {s}: {}", .{ step.id, err });
-                };
-            } else {
-                log.warn("unknown step type {s} for step {s}", .{ step.type, step.id });
-            }
+        // Emit run_started only on the first tick (no prior checkpoints)
+        if (latest_checkpoint == null) {
+            self.emitEvent(alloc, .run_started, run_row.id, null, null, null);
         }
 
-        // 4b. Check running steps that need tick-based polling
-        for (updated_steps) |step| {
-            if (!std.mem.eql(u8, step.status, "running")) continue;
-            if (std.mem.eql(u8, step.type, "wait")) {
-                self.executeWaitStep(alloc, run_row, step) catch |err| {
-                    log.err("error polling wait step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "loop")) {
-                self.pollRunningLoopStep(alloc, run_row, step) catch |err| {
-                    log.err("error polling loop step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "sub_workflow")) {
-                self.pollRunningSubWorkflowStep(alloc, run_row, step) catch |err| {
-                    log.err("error polling sub_workflow step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "debate")) {
-                self.pollRunningDebateStep(alloc, run_row, step) catch |err| {
-                    log.err("error polling debate step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "group_chat")) {
-                self.pollRunningGroupChatStep(alloc, run_row, step) catch |err| {
-                    log.err("error polling group_chat step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "saga")) {
-                self.pollRunningSagaStep(alloc, run_row, step) catch |err| {
-                    log.err("error polling saga step {s}: {}", .{ step.id, err });
-                };
-            } else if (std.mem.eql(u8, step.type, "task")) {
-                self.pollAsyncTaskStep(alloc, run_row, step) catch |err| {
-                    log.err("error polling async task step {s}: {}", .{ step.id, err });
-                };
+        // 3b. Workflow version migration check
+        const wf_version = getWorkflowVersion(alloc, workflow_json);
+        if (latest_checkpoint) |cp| {
+            const cp_version = getCheckpointWorkflowVersion(alloc, cp.metadata_json);
+            if (cp_version != wf_version) {
+                log.warn("workflow version changed from {d} to {d}, attempting migration", .{ cp_version, wf_version });
+                // Filter completed_nodes to only include nodes that still exist
+                _ = migrateCompletedNodes(alloc, &completed_nodes, workflow_json);
             }
         }
 
-        // 5. Check run completion
-        try self.checkRunCompletion(run_row.id, alloc);
-    }
+        // 4. Main execution loop: find ready nodes, execute, repeat
+        var running_state: []const u8 = try alloc.dupe(u8, current_state);
+        var max_iterations: u32 = max_nodes_per_tick;
+        var goto_ready: ?[]const []const u8 = null; // goto override from command primitive
 
-    // ── executeTaskStep ──────────────────────────────────────────────
+        while (max_iterations > 0) : (max_iterations -= 1) {
+            // Use goto override if set, otherwise find ready nodes normally
+            const all_ready_nodes = if (goto_ready) |gr| blk: {
+                goto_ready = null;
+                break :blk gr;
+            } else try findReadyNodesFromRoot(alloc, wf_root, &completed_nodes, &route_results);
 
-    fn executeTaskStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        if (step.next_attempt_at_ms) |next_attempt| {
-            if (ids.nowMs() < next_attempt) return;
-        }
+            // Gap 6: Filter out deferred nodes from ready list (execute them later)
+            var ready_list: std.ArrayListUnmanaged([]const u8) = .empty;
+            for (all_ready_nodes) |name| {
+                if (!isInBreakpointList(name, deferred_nodes)) {
+                    try ready_list.append(alloc, name);
+                }
+            }
+            const ready_nodes = ready_list.items;
+            if (ready_nodes.len == 0) {
+                // Check termination: if all paths reached __end__
+                if (completed_nodes.get("__end__") != null) {
+                    // Save final state if we made progress
+                    if (version > initial_version) {
+                        try self.store.updateRunState(run_row.id, running_state);
+                    }
+                    try self.store.updateRunStatus(run_row.id, "completed", null);
+                    try self.store.insertEvent(run_row.id, null, "run.completed", "{}");
+                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.completed", run_row.id, null, "{}", self.metrics);
+                    log.info("run {s} completed", .{run_row.id});
+                    return;
+                }
+                // Deadlock: no ready nodes and not done
+                if (completed_nodes.count() > 0) {
+                    // Check if any step is still running asynchronously
+                    const steps = try self.store.getStepsByRun(alloc, run_row.id);
+                    var has_running = false;
+                    for (steps) |step| {
+                        if (std.mem.eql(u8, step.status, "running")) {
+                            has_running = true;
+                            break;
+                        }
+                    }
+                    if (has_running) {
+                        for (steps) |step| {
+                            if (std.mem.eql(u8, step.status, "running")) {
+                                self.pollAsyncTaskStep(alloc, run_row, step) catch |err| {
+                                    log.err("error polling async step {s}: {}", .{ step.id, err });
+                                };
+                            }
+                        }
+                        return;
+                    }
+                    log.err("run {s} deadlocked: no ready nodes, not completed", .{run_row.id});
+                    try self.store.updateRunStatus(run_row.id, "failed", "deadlock: no ready nodes");
+                    try self.store.insertEvent(run_row.id, null, "run.failed", "{\"reason\":\"deadlock\"}");
+                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics);
+                }
+                return;
+            }
 
-        // 1. Resolve prompt source for this task step.
-        const prompt_source = try self.resolveTaskPromptSource(alloc, run_row, step) orelse {
-            log.warn("no prompt_template for step {s}", .{step.def_step_id});
-            return;
-        };
+            // 5. Execute ready nodes sequentially
+            var made_progress = false;
+            var goto_override: ?[]const []const u8 = null;
+
+            for (ready_nodes) |node_name| {
+                if (std.mem.eql(u8, node_name, "__end__")) {
+                    // Gap 6: Execute deferred nodes before completing
+                    for (deferred_nodes) |deferred_name| {
+                        if (completed_nodes.get(deferred_name) != null) continue;
+
+                        const def_node_json = getNodeJsonFromRoot(alloc, wf_root, deferred_name) orelse continue;
+                        const def_node_type = getNodeField(alloc, def_node_json, "type") orelse "task";
+
+                        if (std.mem.eql(u8, def_node_type, "transform")) {
+                            const def_updates = getNodeField(alloc, def_node_json, "updates") orelse "{}";
+                            const def_schema = cached_schema_json;
+                            const def_new_state = state_mod.applyUpdates(alloc, running_state, def_updates, def_schema) catch running_state;
+                            running_state = def_new_state;
+                        } else if (std.mem.eql(u8, def_node_type, "task") or std.mem.eql(u8, def_node_type, "agent")) {
+                            const def_result = self.executeTaskNode(alloc, run_row, runtime, deferred_name, def_node_json, running_state) catch continue;
+                            switch (def_result) {
+                                .completed => |cr| {
+                                    if (cr.state_updates) |updates| {
+                                        const def_schema = cached_schema_json;
+                                        const def_new_state = state_mod.applyUpdates(alloc, running_state, updates, def_schema) catch running_state;
+                                        running_state = def_new_state;
+                                    }
+                                },
+                                else => {},
+                            }
+                        }
 
-        // 2. Build final prompt.
-        const rendered_prompt = switch (prompt_source) {
-            .rendered => |prompt| prompt,
-            .template => |prompt_template| blk: {
-                const ctx = try buildTemplateContext(alloc, run_row, step, self.store);
-                break :blk templates.render(alloc, prompt_template, ctx) catch |err| {
-                    log.err("template render failed for step {s}: {}", .{ step.id, err });
-                    try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt);
-                    try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
+                        try completed_nodes.put(try alloc.dupe(u8, deferred_name), {});
+                        log.info("deferred node {s} completed for run {s}", .{ deferred_name, run_row.id });
+                    }
+
+                    // Mark __end__ as completed
+                    try completed_nodes.put("__end__", {});
+                    version += 1;
+
+                    // Save checkpoint
+                    const cp_id_buf = ids.generateId();
+                    const cp_id = try alloc.dupe(u8, &cp_id_buf);
+                    const cn_json = try serializeCompletedNodes(alloc, &completed_nodes);
+                    const parent_id: ?[]const u8 = latest_checkpoint_id;
+                    const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version);
+                    try self.store.createCheckpoint(cp_id, run_row.id, "__end__", parent_id, running_state, cn_json, version, meta_json);
+                    try self.store.incrementCheckpointCount(run_row.id);
+                    try self.store.updateRunState(run_row.id, running_state);
+                    latest_checkpoint_id = cp_id;
+
+                    // Run is completed
+                    try self.store.updateRunStatus(run_row.id, "completed", null);
+                    try self.store.insertEvent(run_row.id, null, "run.completed", "{}");
+                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.completed", run_row.id, null, "{}", self.metrics);
+                    log.info("run {s} completed", .{run_row.id});
+                    return;
+                }
+
+                // Breakpoint: interrupt_before check
+                if (isInBreakpointList(node_name, interrupt_before)) {
+                    log.info("breakpoint interrupt_before at node {s} for run {s}", .{ node_name, run_row.id });
+                    version += 1;
+                    const cp_id_buf = ids.generateId();
+                    const cp_id = try alloc.dupe(u8, &cp_id_buf);
+                    const cn_json = try serializeCompletedNodes(alloc, &completed_nodes);
+                    const parent_id: ?[]const u8 = latest_checkpoint_id;
+                    const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version);
+                    try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json);
+                    try self.store.incrementCheckpointCount(run_row.id);
+                    try self.store.updateRunState(run_row.id, running_state);
+                    latest_checkpoint_id = cp_id;
+
+                    try self.store.updateRunStatus(run_row.id, "interrupted", null);
+                    try self.store.insertEvent(run_row.id, null, "run.interrupted", "{}");
+                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.interrupted", run_row.id, null, "{}", self.metrics);
+                    return;
+                }
+
+                // Get node definition from workflow
+                const node_json = getNodeJsonFromRoot(alloc, wf_root, node_name) orelse {
+                    log.err("node {s} not found in workflow for run {s}", .{ node_name, run_row.id });
+                    try self.store.updateRunStatus(run_row.id, "failed", "node not found in workflow");
                     return;
                 };
-            },
-        };
 
-        // 4. Get all workers and build WorkerInfo list
-        const workers = try self.store.listWorkers(alloc);
-        var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty;
-        for (workers) |w| {
-            const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0;
-            try worker_infos.append(alloc, .{
-                .id = w.id,
-                .url = w.url,
-                .token = w.token,
-                .protocol = w.protocol,
-                .model = w.model,
-                .tags_json = w.tags_json,
-                .max_concurrent = w.max_concurrent,
-                .status = w.status,
-                .current_tasks = current_tasks,
-            });
-        }
+                // Get node type
+                const node_type = getNodeField(alloc, node_json, "type") orelse "task";
 
-        // 5. Parse worker_tags from the step definition
-        const required_tags = try getStepTags(alloc, run_row.workflow_json, step.def_step_id);
+                // Execute based on type
+                if (std.mem.eql(u8, node_type, "route")) {
+                    // Route: evaluate routing logic, no worker dispatch
+                    const result = try executeRouteNode(alloc, node_json, running_state);
+                    if (result.route_value) |rv| {
+                        try route_results.put(try alloc.dupe(u8, node_name), rv);
+                    }
+                    try completed_nodes.put(try alloc.dupe(u8, node_name), {});
+
+                    // Create step record
+                    const step_id_buf = ids.generateId();
+                    const step_id = try alloc.dupe(u8, &step_id_buf);
+                    try self.store.insertStep(step_id, run_row.id, node_name, "route", "completed", "{}", 1, null, null, null);
+                    const route_output = try std.fmt.allocPrint(alloc, "{{\"route\":\"{s}\"}}", .{result.route_value orelse "default"});
+                    try self.store.updateStepStatus(step_id, "completed", null, route_output, null, 1);
+                    try self.store.insertEvent(run_row.id, step_id, "step.completed", route_output);
+
+                    log.info("route node {s} -> {s}", .{ node_name, result.route_value orelse "default" });
+                } else if (std.mem.eql(u8, node_type, "interrupt")) {
+                    // Interrupt: save checkpoint, set run to interrupted
+                    try completed_nodes.put(try alloc.dupe(u8, node_name), {});
+                    version += 1;
+
+                    const step_id_buf = ids.generateId();
+                    const step_id = try alloc.dupe(u8, &step_id_buf);
+                    try self.store.insertStep(step_id, run_row.id, node_name, "interrupt", "completed", "{}", 1, null, null, null);
+                    try self.store.updateStepStatus(step_id, "completed", null, "{\"interrupted\":true}", null, 1);
+                    try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}");
+
+                    const cp_id_buf = ids.generateId();
+                    const cp_id = try alloc.dupe(u8, &cp_id_buf);
+                    const cn_json = try serializeCompletedNodes(alloc, &completed_nodes);
+                    const parent_id: ?[]const u8 = latest_checkpoint_id;
+                    const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version);
+                    try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json);
+                    try self.store.incrementCheckpointCount(run_row.id);
+                    try self.store.updateRunState(run_row.id, running_state);
+                    latest_checkpoint_id = cp_id;
+
+                    try self.store.updateRunStatus(run_row.id, "interrupted", null);
+                    try self.store.insertEvent(run_row.id, null, "run.interrupted", "{}");
+                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.interrupted", run_row.id, null, "{}", self.metrics);
+                    log.info("run {s} interrupted at node {s}", .{ run_row.id, node_name });
+                    return;
+                } else if (std.mem.eql(u8, node_type, "transform")) {
+                    // Transform: apply static updates, no worker dispatch
+                    const state_updates = getNodeField(alloc, node_json, "updates") orelse "{}";
 
-        // 6. Select an available worker
-        const selected_worker = try dispatch.selectWorker(alloc, worker_infos.items, required_tags);
-        if (selected_worker == null) {
-            // No worker available — leave as "ready", will retry next tick
-            log.debug("no worker available for step {s}, will retry", .{step.id});
-            return;
-        }
-        const worker = selected_worker.?;
+                    // Get schema from workflow
+                    const schema_json = cached_schema_json;
 
-        // 7. Atomically claim the step to avoid duplicate dispatch across instances.
-        const claim_ts = ids.nowMs();
-        const claimed = try self.store.claimReadyStep(step.id, worker.id, claim_ts);
-        if (!claimed) {
-            return;
-        }
-        if (self.metrics) |m| {
-            metrics_mod.Metrics.incr(&m.steps_claimed_total);
-        }
-        try self.store.insertEvent(run_row.id, step.id, "step.running", "{}");
+                    // Apply updates via reducers
+                    const new_state = state_mod.applyUpdates(alloc, running_state, state_updates, schema_json) catch |err| {
+                        log.err("transform node {s} failed to apply updates: {}", .{ node_name, err });
+                        try self.store.updateRunStatus(run_row.id, "failed", "transform failed");
+                        return;
+                    };
+                    running_state = new_state;
 
-        // 8. Dispatch to worker with handoff support
-        var current_worker = worker;
-        var current_prompt = rendered_prompt;
-        var handoff_count: u32 = 0;
-        const max_handoffs: u32 = 5;
+                    if (getNodeField(alloc, node_json, "store_updates")) |store_updates_json| {
+                        self.applyStoreUpdates(alloc, running_state, store_updates_json, runtime) catch |err| {
+                            log.err("transform node {s} failed to write store updates: {}", .{ node_name, err });
+                            try self.store.updateRunStatus(run_row.id, "failed", "transform store update failed");
+                            return;
+                        };
+                    }
 
-        var final_result: dispatch.DispatchResult = undefined;
+                    try completed_nodes.put(try alloc.dupe(u8, node_name), {});
+
+                    // Create step record
+                    const step_id_buf = ids.generateId();
+                    const step_id = try alloc.dupe(u8, &step_id_buf);
+                    try self.store.insertStep(step_id, run_row.id, node_name, "transform", "completed", "{}", 1, null, null, null);
+                    try self.store.updateStepStatus(step_id, "completed", null, state_updates, null, 1);
+                    try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}");
+
+                    log.info("transform node {s} completed", .{node_name});
+                } else if (std.mem.eql(u8, node_type, "task") or std.mem.eql(u8, node_type, "agent")) {
+                    // Gap 7: Inject __meta managed values
+                    const state_with_meta = injectMeta(alloc, running_state, run_row.id, node_name, version, @as(i64, @intCast(max_iterations))) catch running_state;
+
+                    // Gap 3: Check cache before executing
+                    const cache_ttl = parseCacheTtlMs(alloc, node_json);
+                    if (cache_ttl != null) cache_check: {
+                        const pt_c = getNodeField(alloc, node_json, "prompt_template") orelse break :cache_check;
+                        const rnd_c = self.renderWorkflowTemplate(alloc, pt_c, state_with_meta, runtime, null) catch break :cache_check;
+                        const ck_c = computeCacheKey(alloc, node_name, rnd_c) catch break :cache_check;
+                        const cached = self.store.getCachedResult(alloc, ck_c) catch break :cache_check;
+                        if (cached) |cached_upd| {
+                            const cs = cached_schema_json;
+                            running_state = state_mod.applyUpdates(alloc, running_state, cached_upd, cs) catch running_state;
+                            try completed_nodes.put(try alloc.dupe(u8, node_name), {});
+                            log.info("task node {s} cache hit for run {s}", .{ node_name, run_row.id });
+                            made_progress = true;
+                            version += 1;
+                            const ccb = ids.generateId();
+                            const cci = try alloc.dupe(u8, &ccb);
+                            const ccn = try serializeCompletedNodes(alloc, &completed_nodes);
+                            const cpi: ?[]const u8 = latest_checkpoint_id;
+                            const cmj = try serializeRouteResults(alloc, &route_results);
+                            try self.store.createCheckpoint(cci, run_row.id, node_name, cpi, running_state, ccn, version, cmj);
+                            try self.store.incrementCheckpointCount(run_row.id);
+                            try self.store.updateRunState(run_row.id, running_state);
+                            latest_checkpoint_id = cci;
+                            continue;
+                        }
+                    }
 
-        while (true) {
-            final_result = try dispatch.dispatchStep(
-                alloc,
-                current_worker.url,
-                current_worker.token,
-                current_worker.protocol,
-                current_worker.model,
-                run_row.id,
-                step.id,
-                current_prompt,
-            );
+                    // Gap 2: Non-blocking retry — check for pending retry step
+                    const max_attempts = parseRetryMaxAttempts(alloc, node_json) orelse 1;
+                    const retry_init_ms = parseRetryInitialMs(alloc, node_json) orelse 500;
+                    const retry_bf = parseRetryBackoff(alloc, node_json) orelse 2.0;
+                    const retry_max_ms = parseRetryMaxMs(alloc, node_json) orelse 30000;
+
+                    // Check if there's a pending retry step for this node
+                    const retrying_step = self.store.getRetryingStepForNode(alloc, run_row.id, node_name) catch null;
+                    if (retrying_step) |rs| {
+                        const now_ms = ids.nowMs();
+                        if (rs.next_attempt_at_ms) |next_at| {
+                            if (now_ms < next_at) {
+                                // Retry delay not elapsed yet — skip this node, let other runs process
+                                return;
+                            }
+                        }
+                        // Retry timer expired — clear the retrying step and re-execute below
+                        // The attempt count is tracked on the step record
+                    }
 
-            if (!final_result.success) break;
+                    const current_attempt: u32 = if (retrying_step) |rs| @intCast(rs.attempt) else 0;
+                    const result = try self.executeTaskNode(alloc, run_row, runtime, node_name, node_json, state_with_meta);
+
+                    // Handle retry scheduling for failed results (non-blocking)
+                    const result_after_retry: TaskNodeResult = switch (result) {
+                        .failed => |err_text| blk: {
+                            if (current_attempt + 1 < max_attempts) {
+                                // Calculate delay with exponential backoff
+                                var dms: u64 = retry_init_ms;
+                                var ei: u32 = 0;
+                                while (ei < current_attempt) : (ei += 1) {
+                                    const nd = @as(f64, @floatFromInt(dms)) * retry_bf;
+                                    dms = @intFromFloat(@min(nd, @as(f64, @floatFromInt(retry_max_ms))));
+                                }
+                                if (dms > retry_max_ms) dms = retry_max_ms;
+                                log.info("task node {s} attempt {d}/{d} failed, scheduling retry in {d}ms", .{ node_name, current_attempt + 1, max_attempts, dms });
+                                self.emitEvent(alloc, .step_retrying, run_row.id, null, node_name, null);
+
+                                // Create or update step record with retry schedule
+                                const next_retry_at = ids.nowMs() + @as(i64, @intCast(dms));
+                                if (retrying_step) |rs| {
+                                    // Update existing step with next retry time
+                                    self.store.scheduleStepRetry(rs.id, next_retry_at, @as(i64, @intCast(current_attempt + 1)), err_text) catch {};
+                                } else {
+                                    // Create new step record for retry tracking
+                                    const retry_step_id_buf = ids.generateId();
+                                    const retry_step_id = alloc.dupe(u8, &retry_step_id_buf) catch {
+                                        break :blk result;
+                                    };
+                                    self.store.insertStep(retry_step_id, run_row.id, node_name, node_type, "ready", "{}", @intCast(max_attempts), null, null, null) catch {
+                                        break :blk result;
+                                    };
+                                    self.store.scheduleStepRetry(retry_step_id, next_retry_at, 1, err_text) catch {};
+                                }
 
-            // Check for handoff_to in the output
-            const handoff_target = extractHandoffTarget(alloc, final_result.output);
-            if (handoff_target == null) break; // Normal completion
+                                // Save progress checkpoint before returning
+                                if (version > initial_version) {
+                                    const cp_id_buf = ids.generateId();
+                                    const cp_id = alloc.dupe(u8, &cp_id_buf) catch {
+                                        break :blk result;
+                                    };
+                                    const cn_json = serializeCompletedNodes(alloc, &completed_nodes) catch {
+                                        break :blk result;
+                                    };
+                                    const parent_id: ?[]const u8 = if (latest_checkpoint_id) |pid| pid else null;
+                                    const meta_json = serializeRouteResultsWithVersion(alloc, &route_results, wf_version) catch {
+                                        break :blk result;
+                                    };
+                                    self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json) catch {};
+                                    self.store.incrementCheckpointCount(run_row.id) catch {};
+                                    self.store.updateRunState(run_row.id, running_state) catch {};
+                                    latest_checkpoint_id = cp_id;
+                                }
 
-            handoff_count += 1;
-            if (handoff_count >= max_handoffs) {
-                final_result = .{
-                    .output = "",
-                    .success = false,
-                    .error_text = "handoff chain limit exceeded (max 5)",
-                };
-                break;
-            }
+                                // Return without marking node as completed — next tick will retry
+                                return;
+                            }
+                            break :blk result;
+                        },
+                        else => result,
+                    };
+
+                    switch (result_after_retry) {
+                        .completed => |cr| {
+                            // Gap 7: Strip __meta (don't persist)
+                            running_state = stripMeta(alloc, running_state) catch running_state;
+
+                            if (cr.state_updates) |updates| {
+                                const schema_json = cached_schema_json;
+                                const new_state = state_mod.applyUpdates(alloc, running_state, updates, schema_json) catch |err| {
+                                    log.err("task node {s} failed to apply updates: {}", .{ node_name, err });
+                                    try self.store.updateRunStatus(run_row.id, "failed", "state update failed");
+                                    return;
+                                };
+                                running_state = new_state;
+
+                                // Gap 3: Store result in cache
+                                if (cache_ttl) |ttl| cache_store: {
+                                    const pt_s = getNodeField(alloc, node_json, "prompt_template") orelse break :cache_store;
+                                    const rnd_s = self.renderWorkflowTemplate(alloc, pt_s, state_with_meta, runtime, null) catch break :cache_store;
+                                    const ck_s = computeCacheKey(alloc, node_name, rnd_s) catch break :cache_store;
+                                    self.store.setCachedResult(ck_s, node_name, updates, ttl) catch |cerr| {
+                                        log.warn("failed to cache result for node {s}: {}", .{ node_name, cerr });
+                                    };
+                                }
 
-            // Log the handoff event
-            const handoff_event = try std.fmt.allocPrint(alloc, "{{\"handoff_from\":\"{s}\",\"handoff_to_tags\":\"{s}\"}}", .{ current_worker.id, handoff_target.?.tags_str });
-            try self.store.insertEvent(run_row.id, step.id, "step.handoff", handoff_event);
-            log.info("step {s} handoff #{d} from worker {s}", .{ step.id, handoff_count, current_worker.id });
-
-            // Select new worker by handoff tags
-            const new_worker = try dispatch.selectWorker(alloc, worker_infos.items, handoff_target.?.tags);
-            if (new_worker == null) {
-                final_result = .{
-                    .output = "",
-                    .success = false,
-                    .error_text = "no worker available for handoff",
-                };
-                break;
-            }
-            current_worker = new_worker.?;
+                                // Gap 4: Save as pending write
+                                self.store.savePendingWrite(run_row.id, node_name, node_name, updates) catch |perr| {
+                                    log.warn("failed to save pending write for node {s}: {}", .{ node_name, perr });
+                                };
+                            }
 
-            // Build handoff prompt with message
-            if (handoff_target.?.message) |msg| {
-                current_prompt = msg;
-            }
-            // Otherwise reuse current_prompt
-        }
+                            // Apply UI messages to state (__ui_messages key)
+                            if (cr.raw_output) |raw_out| {
+                                running_state = applyUiMessagesToState(alloc, running_state, raw_out) catch running_state;
+                            }
 
-        // 8.5. If async dispatch, save state and leave step running
-        if (final_result.async_pending) {
-            const async_state = try mergeAsyncState(alloc, step.input_json, final_result.correlation_id orelse "");
-            try self.store.updateStepInputJson(step.id, async_state);
-            log.info("step {s} dispatched async, correlation_id={s}", .{ step.id, final_result.correlation_id orelse "?" });
-            return;
-        }
+                            // Consume pending injections
+                            const injections = self.store.consumePendingInjections(alloc, run_row.id, node_name) catch &.{};
+                            for (injections) |injection| {
+                                const schema_json = cached_schema_json;
+                                const new_state = state_mod.applyUpdates(alloc, running_state, injection.updates_json, schema_json) catch |err| {
+                                    log.warn("failed to apply injection for run {s}: {}", .{ run_row.id, err });
+                                    continue;
+                                };
+                                running_state = new_state;
+                            }
 
-        // 9. Handle result
-        if (final_result.success) {
-            // Mark step as completed, save output_json
-            const output_json = try wrapOutput(alloc, final_result.output);
-            try self.store.updateStepStatus(step.id, "completed", current_worker.id, output_json, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
-            try self.store.markWorkerSuccess(current_worker.id, ids.nowMs());
-            if (self.metrics) |m| {
-                metrics_mod.Metrics.incr(&m.worker_dispatch_success_total);
-            }
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics);
-            log.info("step {s} completed", .{step.id});
-        } else {
-            // On failure: retry or fail
-            const err_text = final_result.error_text orelse "dispatch failed";
-            const now_ms = ids.nowMs();
-            const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms;
-            try self.store.markWorkerFailure(
-                current_worker.id,
-                err_text,
-                now_ms,
-                self.runtime_cfg.worker_failure_threshold,
-                circuit_until,
-            );
-            if (self.metrics) |m| {
-                metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total);
-            }
+                            try completed_nodes.put(try alloc.dupe(u8, node_name), {});
+
+                            if (cr.goto_targets) |targets| {
+                                var valid_targets: std.ArrayListUnmanaged([]const u8) = .empty;
+                                for (targets) |target| {
+                                    if (std.mem.eql(u8, target, "__end__") or workflowHasNode(wf_root, target)) {
+                                        try valid_targets.append(alloc, target);
+                                    } else {
+                                        log.warn("goto target {s} not found in workflow, skipping", .{target});
+                                    }
+                                }
+                                if (valid_targets.items.len > 0) {
+                                    goto_override = try valid_targets.toOwnedSlice(alloc);
+                                    log.info("task node {s} goto: {d} targets", .{ node_name, goto_override.?.len });
+                                }
+                            }
 
-            if (step.attempt < step.max_attempts) {
-                const elapsed_ms = now_ms - step.created_at_ms;
-                if (elapsed_ms > self.runtime_cfg.retry_max_elapsed_ms) {
-                    const elapsed_err = try std.fmt.allocPrint(alloc, "retry max elapsed exceeded ({d}ms)", .{self.runtime_cfg.retry_max_elapsed_ms});
-                    try self.store.updateStepStatus(step.id, "failed", current_worker.id, null, elapsed_err, step.attempt);
-                    try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-                    log.err("step {s} failed: {s}", .{ step.id, elapsed_err });
+                            // Gap 4: Clear pending writes
+                            self.store.clearPendingWrites(run_row.id) catch {};
+
+                            log.info("task node {s} completed for run {s}", .{ node_name, run_row.id });
+                        },
+                        .async_pending => {
+                            // Step is dispatched async, don't mark as completed yet
+                            // Will be polled on next tick
+                            log.info("task node {s} dispatched async for run {s}", .{ node_name, run_row.id });
+                            // Save checkpoint with current progress before returning
+                            version += 1;
+                            const cp_id_buf = ids.generateId();
+                            const cp_id = try alloc.dupe(u8, &cp_id_buf);
+                            const cn_json = try serializeCompletedNodes(alloc, &completed_nodes);
+                            const parent_id: ?[]const u8 = latest_checkpoint_id;
+                            const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version);
+                            try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json);
+                            try self.store.incrementCheckpointCount(run_row.id);
+                            try self.store.updateRunState(run_row.id, running_state);
+                            latest_checkpoint_id = cp_id;
+                            return;
+                        },
+                        .no_worker => {
+                            // No worker available, will retry next tick
+                            log.debug("no worker for task node {s}, will retry", .{node_name});
+                            // Save progress so far
+                            if (version > initial_version) {
+                                const cp_id_buf = ids.generateId();
+                                const cp_id = try alloc.dupe(u8, &cp_id_buf);
+                                const cn_json = try serializeCompletedNodes(alloc, &completed_nodes);
+                                const parent_id: ?[]const u8 = latest_checkpoint_id;
+                                const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version);
+                                try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json);
+                                try self.store.incrementCheckpointCount(run_row.id);
+                                try self.store.updateRunState(run_row.id, running_state);
+                                latest_checkpoint_id = cp_id;
+                            }
+                            return;
+                        },
+                        .failed => |err_text| {
+                            log.err("task node {s} failed: {s}", .{ node_name, err_text });
+                            try self.store.updateRunStatus(run_row.id, "failed", err_text);
+                            try self.store.insertEvent(run_row.id, null, "run.failed", "{}");
+                            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics);
+                            return;
+                        },
+                    }
+                } else if (std.mem.eql(u8, node_type, "subgraph")) {
+                    // Subgraph: execute child workflow inline
+                    const result = try self.executeSubgraphNode(alloc, run_row, node_name, node_json, running_state, recursion_depth);
+
+                    switch (result) {
+                        .completed => |cr| {
+                            if (cr.state_updates) |updates| {
+                                const schema_json = cached_schema_json;
+                                const new_state = state_mod.applyUpdates(alloc, running_state, updates, schema_json) catch |err| {
+                                    log.err("subgraph node {s} failed to apply updates: {}", .{ node_name, err });
+                                    try self.store.updateRunStatus(run_row.id, "failed", "subgraph state update failed");
+                                    return;
+                                };
+                                running_state = new_state;
+                            }
+                            try completed_nodes.put(try alloc.dupe(u8, node_name), {});
+                            log.info("subgraph node {s} completed for run {s}", .{ node_name, run_row.id });
+                        },
+                        .failed => |err_text| {
+                            log.err("subgraph node {s} failed: {s}", .{ node_name, err_text });
+                            try self.store.updateRunStatus(run_row.id, "failed", err_text);
+                            try self.store.insertEvent(run_row.id, null, "run.failed", "{}");
+                            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics);
+                            return;
+                        },
+                        else => {},
+                    }
+                } else if (std.mem.eql(u8, node_type, "send")) {
+                    // Send: read items from state, dispatch target_node per item
+                    const result = try self.executeSendNode(alloc, run_row, runtime, node_name, node_json, running_state);
+                    if (result.state_updates) |updates| {
+                        const schema_json = cached_schema_json;
+                        const new_state = state_mod.applyUpdates(alloc, running_state, updates, schema_json) catch |err| {
+                            log.err("send node {s} failed to apply updates: {}", .{ node_name, err });
+                            try self.store.updateRunStatus(run_row.id, "failed", "send state update failed");
+                            return;
+                        };
+                        running_state = new_state;
+                    }
+                    try completed_nodes.put(try alloc.dupe(u8, node_name), {});
+                    log.info("send node {s} completed for run {s}", .{ node_name, run_row.id });
+                } else {
+                    log.warn("unknown node type {s} for node {s}", .{ node_type, node_name });
+                    try self.store.updateRunStatus(run_row.id, "failed", "unknown node type");
                     return;
                 }
 
-                const delay_ms = computeRetryDelayMs(self.runtime_cfg, step, now_ms);
-                const next_attempt_ms = now_ms + delay_ms;
-                try self.store.scheduleStepRetry(step.id, next_attempt_ms, step.attempt + 1, err_text);
-                const retry_event = try std.fmt.allocPrint(alloc, "{{\"next_attempt_at_ms\":{d},\"delay_ms\":{d}}}", .{ next_attempt_ms, delay_ms });
-                try self.store.insertEvent(run_row.id, step.id, "step.retry", retry_event);
-                if (self.metrics) |m| {
-                    metrics_mod.Metrics.incr(&m.steps_retry_scheduled_total);
+                // Breakpoint: interrupt_after check
+                if (isInBreakpointList(node_name, interrupt_after)) {
+                    log.info("breakpoint interrupt_after at node {s} for run {s}", .{ node_name, run_row.id });
+                    // Save checkpoint with updated state first
+                    version += 1;
+                    const bp_cp_id_buf = ids.generateId();
+                    const bp_cp_id = try alloc.dupe(u8, &bp_cp_id_buf);
+                    const bp_cn_json = try serializeCompletedNodes(alloc, &completed_nodes);
+                    const bp_parent_id: ?[]const u8 = latest_checkpoint_id;
+                    const bp_meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version);
+                    try self.store.createCheckpoint(bp_cp_id, run_row.id, node_name, bp_parent_id, running_state, bp_cn_json, version, bp_meta_json);
+                    try self.store.incrementCheckpointCount(run_row.id);
+                    try self.store.updateRunState(run_row.id, running_state);
+                    latest_checkpoint_id = bp_cp_id;
+
+                    try self.store.updateRunStatus(run_row.id, "interrupted", null);
+                    try self.store.insertEvent(run_row.id, null, "run.interrupted", "{}");
+                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.interrupted", run_row.id, null, "{}", self.metrics);
+                    return;
                 }
-                log.info("step {s} will retry (attempt {d}/{d}, delay={d}ms)", .{ step.id, step.attempt + 1, step.max_attempts, delay_ms });
-            } else {
-                try self.store.updateStepStatus(step.id, "failed", current_worker.id, null, err_text, step.attempt);
-                try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-                callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-                log.err("step {s} failed: {s}", .{ step.id, err_text });
-            }
-        }
-    }
-
-    // ── async helpers ──────────────────────────────────────────────
-
-    /// Merge async_pending + correlation_id into existing input_json,
-    /// preserving any existing fields (e.g. rendered_prompt for retries).
-    fn mergeAsyncState(alloc: std.mem.Allocator, existing_input: []const u8, correlation_id: []const u8) ![]const u8 {
-        var obj = std.json.ObjectMap.init(alloc);
 
-        // Parse and copy existing fields
-        if (existing_input.len > 0) {
-            const parsed = std.json.parseFromSlice(std.json.Value, alloc, existing_input, .{}) catch null;
-            if (parsed) |p| {
-                if (p.value == .object) {
-                    var it = p.value.object.iterator();
-                    while (it.next()) |entry| {
-                        try obj.put(entry.key_ptr.*, entry.value_ptr.*);
+                // Reconciliation: check tracker task status between steps
+                if (runtime.tracker) |tracker| {
+                    if (task_id != null and !reconcileWithTracker(alloc, tracker.base_url, tracker.api_token, task_id.?)) {
+                        log.info("run {s} cancelled by reconciliation", .{run_row.id});
+                        try self.store.updateRunStatus(run_row.id, "failed", "cancelled by tracker reconciliation");
+                        try self.store.insertEvent(run_row.id, null, "run.failed", "{\"reason\":\"tracker_cancelled\"}");
+                        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics);
+                        return;
                     }
                 }
+
+                // Strip ephemeral keys before checkpoint persistence
+                const schema_for_eph = cached_schema_json;
+                running_state = state_mod.stripEphemeralKeys(alloc, running_state, schema_for_eph) catch running_state;
+
+                // Save checkpoint after each node
+                made_progress = true;
+                version += 1;
+                const cp_id_buf = ids.generateId();
+                const cp_id = try alloc.dupe(u8, &cp_id_buf);
+                const cn_json = try serializeCompletedNodes(alloc, &completed_nodes);
+                const parent_id: ?[]const u8 = latest_checkpoint_id;
+                const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version);
+                try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json);
+                try self.store.incrementCheckpointCount(run_row.id);
+                try self.store.updateRunState(run_row.id, running_state);
+                latest_checkpoint_id = cp_id;
+
+                // Emit structured checkpoint event
+                self.emitEvent(alloc, .checkpoint_created, run_row.id, null, node_name, null);
+
+                // Broadcast rich SSE events for all modes
+                if (self.sse_hub) |hub| {
+                    const node_json_for_sse = getNodeJsonFromRoot(alloc, wf_root, node_name);
+                    const nt = if (node_json_for_sse) |nj| (getNodeField(alloc, nj, "type") orelse "task") else "task";
+                    broadcastNodeEvents(hub, alloc, run_row.id, node_name, nt, running_state, null, version, 0);
+                }
             }
-        }
 
-        // Add async fields
-        try obj.put("async_pending", .{ .bool = true });
-        try obj.put("correlation_id", .{ .string = correlation_id });
+            // If goto override is set, use it for next iteration instead of findReadyNodes
+            if (goto_override) |targets| {
+                goto_ready = targets;
+            }
 
-        return std.json.Stringify.valueAlloc(alloc, std.json.Value{ .object = obj }, .{});
+            // If no progress was made in this iteration, break
+            if (!made_progress) break;
+        } // end while loop
     }
 
-    fn pollAsyncTaskStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // Only handle steps that are async (have async_pending in input_json)
-        const input_json = step.input_json;
-        if (input_json.len == 0) return;
+    // ── Node Execution Results ───────────────────────────────────────
 
-        // Parse input_json to check for async_pending flag
-        const parsed = std.json.parseFromSlice(std.json.Value, alloc, input_json, .{}) catch return;
-        defer parsed.deinit();
-        if (parsed.value != .object) return;
+    const TaskNodeResult = union(enum) {
+        completed: struct {
+            state_updates: ?[]const u8,
+            goto_targets: ?[]const []const u8 = null,
+            raw_output: ?[]const u8 = null,
+        },
+        async_pending: void,
+        no_worker: void,
+        failed: []const u8,
+    };
 
-        const async_flag = parsed.value.object.get("async_pending") orelse return;
-        if (async_flag != .bool or !async_flag.bool) return;
+    const SendNodeResult = struct {
+        state_updates: ?[]const u8,
+    };
 
-        const corr_val = parsed.value.object.get("correlation_id") orelse return;
-        if (corr_val != .string) return;
-        const correlation_id = corr_val.string;
+    const RouteNodeResult = struct {
+        route_value: ?[]const u8,
+    };
 
-        // Check response queue
-        const queue = self.response_queue orelse return;
-        const response = queue.take(correlation_id) orelse {
-            // Check timeout
-            if (step.timeout_ms) |timeout_ms| {
-                if (step.started_at_ms) |started_at| {
-                    const elapsed = ids.nowMs() - started_at;
-                    if (elapsed > timeout_ms) {
-                        const err_text = try std.fmt.allocPrint(alloc, "async step timed out after {d}ms", .{timeout_ms});
-                        try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt);
-                        try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-                        if (self.metrics) |m| {
-                            metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total);
-                        }
-                        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-                        log.err("async step {s} timed out", .{step.id});
-                    }
-                }
-            }
-            return;
-        };
-
-        // Got a response — complete or fail the step
-        if (response.success) {
-            const output_json = try wrapOutput(alloc, response.output);
-            try self.store.updateStepStatus(step.id, "completed", step.worker_id, output_json, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
-            if (step.worker_id) |wid| {
-                try self.store.markWorkerSuccess(wid, ids.nowMs());
-            }
-            if (self.metrics) |m| {
-                metrics_mod.Metrics.incr(&m.worker_dispatch_success_total);
-            }
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics);
-            log.info("async step {s} completed", .{step.id});
-        } else {
-            const err_text = response.error_text orelse "async dispatch failed";
-            try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-            if (step.worker_id) |wid| {
-                const now_ms = ids.nowMs();
-                const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms;
-                try self.store.markWorkerFailure(wid, err_text, now_ms, self.runtime_cfg.worker_failure_threshold, circuit_until);
-            }
-            if (self.metrics) |m| {
-                metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total);
-            }
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-            log.err("async step {s} failed: {s}", .{ step.id, err_text });
-        }
-    }
-
-    fn resolveTaskPromptSource(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !?TaskPromptSource {
-        // Explicit rendered_prompt is highest priority for generated children
-        // (for example debate judge prompts).
-        if (extractRenderedPromptFromInput(alloc, step.input_json)) |rendered_prompt| {
-            return .{ .rendered = rendered_prompt };
-        }
+    // ── executeRouteNode ─────────────────────────────────────────────
 
-        // Normal task step definition prompt.
-        if (try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template")) |tpl| {
-            return .{ .template = tpl };
-        }
+    fn executeRouteNode(alloc: std.mem.Allocator, node_json: []const u8, state_json: []const u8) !RouteNodeResult {
+        // Get the input path to read from state
+        const input_path = getNodeField(alloc, node_json, "input") orelse "state.route_input";
+        const default_route = getNodeField(alloc, node_json, "default");
 
-        // Fallback for generated child tasks that should reuse parent prompt template.
-        if (step.parent_step_id) |parent_id| {
-            if (try self.store.getStep(alloc, parent_id)) |parent_step| {
-                if (try getStepField(alloc, run_row.workflow_json, parent_step.def_step_id, "prompt_template")) |parent_tpl| {
-                    return .{ .template = parent_tpl };
-                }
-            }
+        // Read value from state
+        const value_json = state_mod.getStateValue(alloc, state_json, input_path) catch null;
+        if (value_json == null) {
+            return RouteNodeResult{ .route_value = resolveDeclaredRouteValue(alloc, node_json, default_route) };
         }
 
-        return null;
-    }
-
-    // ── executeFanOutStep ────────────────────────────────────────────
-
-    fn executeFanOutStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Parse step definition from workflow_json, get "count"
-        const count_val = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "count") orelse {
-            log.warn("no count for fan_out step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing count in fan_out definition", step.attempt);
-            return;
+        // Stringify value for route matching
+        const route_key = state_mod.stringifyForRoute(alloc, value_json.?) catch {
+            return RouteNodeResult{ .route_value = resolveDeclaredRouteValue(alloc, node_json, default_route) };
         };
-        const count: usize = @intCast(count_val);
 
-        // 2. Create N child steps
-        for (0..count) |i| {
-            const child_id_buf = ids.generateId();
-            const child_id = try alloc.dupe(u8, &child_id_buf);
-            const child_def_id = try std.fmt.allocPrint(alloc, "{s}_{d}", .{ step.def_step_id, i });
-            const idx: i64 = @intCast(i);
+        return RouteNodeResult{ .route_value = resolveDeclaredRouteValue(alloc, node_json, route_key) };
+    }
 
-            try self.store.insertStep(
-                child_id,
-                run_row.id,
-                child_def_id,
-                "task",
-                "ready",
-                step.input_json,
-                step.max_attempts,
-                step.timeout_ms,
-                step.id, // parent_step_id
-                idx,
-            );
-            log.info("created fan_out child step {s} (index {d})", .{ child_id, i });
+    fn buildWorkerInfos(self: *Engine, alloc: std.mem.Allocator) ![]dispatch.WorkerInfo {
+        const workers = try self.store.listWorkers(alloc);
+        var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty;
+        for (workers) |worker| {
+            const current_tasks = self.store.countRunningStepsByWorker(worker.id) catch 0;
+            try worker_infos.append(alloc, .{
+                .id = worker.id,
+                .url = worker.url,
+                .token = worker.token,
+                .protocol = worker.protocol,
+                .model = worker.model,
+                .tags_json = worker.tags_json,
+                .max_concurrent = worker.max_concurrent,
+                .status = worker.status,
+                .current_tasks = current_tasks,
+            });
         }
-
-        // 3. Mark fan_out step as "completed"
-        try self.store.updateStepStatus(step.id, "completed", null, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
-        log.info("fan_out step {s} completed, created {d} children", .{ step.id, count });
+        return worker_infos.toOwnedSlice(alloc);
     }
 
-    // ── executeMapStep ───────────────────────────────────────────────
+    // ── executeTaskNode ──────────────────────────────────────────────
 
-    fn executeMapStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Parse step definition, get "items_from" (e.g. "$.topics")
-        const items_from = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "items_from") orelse {
-            log.warn("no items_from for map step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing items_from in map definition", step.attempt);
-            return;
+    fn executeTaskNode(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, runtime: RuntimeBindings, node_name: []const u8, node_json: []const u8, state_json: []const u8) !TaskNodeResult {
+        // 1. Get prompt template from node definition
+        const prompt_template = getNodeField(alloc, node_json, "prompt_template") orelse {
+            // No prompt template — mark as completed with no state updates
+            return TaskNodeResult{ .completed = .{ .state_updates = null } };
         };
 
-        // 2. Resolve items_from against run.input_json — extract the array
-        //    items_from format: "$.field_name"
-        const field_name = if (std.mem.startsWith(u8, items_from, "$."))
-            items_from[2..]
-        else
-            items_from;
-
-        const items = try extractJsonArray(alloc, run_row.input_json, field_name) orelse {
-            log.warn("items_from field '{s}' not found or not an array in input", .{field_name});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "items_from field not found or not an array", step.attempt);
-            return;
+        // 2. Render prompt with graph template interpolation and optional store access.
+        const rendered_prompt = self.renderWorkflowTemplate(alloc, prompt_template, state_json, runtime, null) catch |err| {
+            log.err("template render failed for node {s}: {}", .{ node_name, err });
+            return TaskNodeResult{ .failed = "template render failed" };
         };
 
-        // 3. For each item in the array, create a child step
-        for (items, 0..) |item, i| {
-            const child_id_buf = ids.generateId();
-            const child_id = try alloc.dupe(u8, &child_id_buf);
-            const child_def_id = try std.fmt.allocPrint(alloc, "{s}_{d}", .{ step.def_step_id, i });
-            const idx: i64 = @intCast(i);
-
-            // Store the item as input_json for the child
-            const item_json = try wrapItemJson(alloc, item);
-
-            try self.store.insertStep(
-                child_id,
-                run_row.id,
-                child_def_id,
-                "task",
-                "ready",
-                item_json,
-                step.max_attempts,
-                step.timeout_ms,
-                step.id, // parent_step_id
-                idx,
-            );
-            log.info("created map child step {s} for item {d}", .{ child_id, i });
-        }
-
-        // 4. Mark map step as "completed"
-        try self.store.updateStepStatus(step.id, "completed", null, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
-        log.info("map step {s} completed, created {d} children", .{ step.id, items.len });
-    }
-
-    // ── executeReduceStep ────────────────────────────────────────────
-
-    fn executeReduceStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, all_steps: []const types.StepRow) !void {
-        // 1. Find the dependency step (the fan_out or map step this depends on)
-        const dep_ids = try self.store.getStepDeps(alloc, step.id);
-        if (dep_ids.len == 0) {
-            log.warn("reduce step {s} has no dependencies", .{step.id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "reduce step has no dependencies", step.attempt);
-            return;
-        }
-
-        // The reduce depends on a fan_out/map step; find it
-        const dep_step_id = dep_ids[0];
-
-        // 2. Get all child steps of that dependency
-        const children = try self.store.getChildSteps(alloc, dep_step_id);
-
-        if (children.len == 0) {
-            // If the dep is a fan_out/map that hasn't spawned children yet, wait
-            // Check if dep step itself is completed
-            const dep_status = findStepStatus(all_steps, dep_step_id);
-            if (dep_status == null or !std.mem.eql(u8, dep_status.?, "completed")) {
-                // Dep not completed yet, stay ready
-                return;
+        // 3. Get workers and select one
+        const worker_infos = try self.buildWorkerInfos(alloc);
+
+        const required_tags = getNodeTags(alloc, node_json);
+        const node_type = getNodeField(alloc, node_json, "type") orelse "task";
+        const is_agent_node = std.mem.eql(u8, node_type, "agent");
+
+        // For agent nodes, prefer A2A-protocol workers first, then fall back to any worker
+        var selected_worker: ?dispatch.WorkerInfo = null;
+        if (is_agent_node) {
+            // Filter to A2A workers only
+            var a2a_workers: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty;
+            for (worker_infos) |w| {
+                if (std.mem.eql(u8, w.protocol, "a2a")) {
+                    try a2a_workers.append(alloc, w);
+                }
             }
-            // Dep completed but no children? Odd, proceed with empty outputs
-        }
-
-        // 3. Check if ALL children are completed
-        var all_done = true;
-        for (children) |child| {
-            if (!std.mem.eql(u8, child.status, "completed") and !std.mem.eql(u8, child.status, "skipped")) {
-                all_done = false;
-                break;
+            if (a2a_workers.items.len > 0) {
+                selected_worker = try dispatch.selectWorker(alloc, a2a_workers.items, required_tags);
             }
         }
-        if (!all_done) {
-            // Not all children done, leave reduce as "ready", try next tick
-            return;
+        // Fall back to any protocol if no A2A worker found (or not an agent node)
+        if (selected_worker == null) {
+            selected_worker = try dispatch.selectWorker(alloc, worker_infos, required_tags);
         }
-
-        // 4. Collect all child outputs into an array
-        var child_outputs: std.ArrayListUnmanaged([]const u8) = .empty;
-        for (children) |child| {
-            if (child.output_json) |oj| {
-                // Extract "output" field from JSON, or use the raw JSON
-                const extracted = extractOutputField(alloc, oj) catch oj;
-                try child_outputs.append(alloc, extracted);
-            } else {
-                try child_outputs.append(alloc, "");
-            }
+        if (selected_worker == null) {
+            return TaskNodeResult{ .no_worker = {} };
         }
+        const worker = selected_worker.?;
 
-        // 5. Build template context with outputs array
-        // Find the dep step's def_step_id for template referencing
-        const dep_def_step_id = findStepDefId(all_steps, dep_step_id) orelse step.def_step_id;
-
-        const step_output = templates.Context.StepOutput{
-            .step_id = dep_def_step_id,
-            .output = null,
-            .outputs = child_outputs.items,
-        };
-
-        const prompt_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template") orelse {
-            // No template — just collect outputs and mark completed
-            const outputs_json = try serializeStringArray(alloc, child_outputs.items);
-            try self.store.updateStepStatus(step.id, "completed", null, outputs_json, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
-            return;
-        };
-
-        const ctx = templates.Context{
-            .input_json = run_row.input_json,
-            .step_outputs = &.{step_output},
-            .item = null,
-        };
-
-        // 6. Render template
-        const rendered_prompt = templates.render(alloc, prompt_template, ctx) catch |err| {
-            log.err("template render failed for reduce step {s}: {}", .{ step.id, err });
-            try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-            return;
-        };
+        // 4. Create step record
+        const step_id_buf = ids.generateId();
+        const step_id = try alloc.dupe(u8, &step_id_buf);
+        try self.store.insertStep(step_id, run_row.id, node_name, node_type, "running", state_json, 1, null, null, null);
+        try self.store.insertEvent(run_row.id, step_id, "step.running", "{}");
+        self.emitEvent(alloc, .step_started, run_row.id, step_id, node_name, null);
 
-        // 7. Get workers and dispatch
-        const workers = try self.store.listWorkers(alloc);
-        var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty;
-        for (workers) |w| {
-            const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0;
-            try worker_infos.append(alloc, .{
-                .id = w.id,
-                .url = w.url,
-                .token = w.token,
-                .protocol = w.protocol,
-                .model = w.model,
-                .tags_json = w.tags_json,
-                .max_concurrent = w.max_concurrent,
-                .status = w.status,
-                .current_tasks = current_tasks,
-            });
+        if (self.metrics) |m| {
+            metrics_mod.Metrics.incr(&m.steps_claimed_total);
         }
 
-        const required_tags = try getStepTags(alloc, run_row.workflow_json, step.def_step_id);
-        const selected_worker = try dispatch.selectWorker(alloc, worker_infos.items, required_tags);
-        if (selected_worker == null) {
-            log.debug("no worker available for reduce step {s}, will retry", .{step.id});
-            return;
+        // 5. Dispatch to worker (A2A protocol for agent nodes with A2A workers,
+        //    or standard protocol dispatch for task nodes / fallback)
+        if (is_agent_node and std.mem.eql(u8, worker.protocol, "a2a")) {
+            log.info("agent node {s} dispatching via A2A to worker {s}", .{ node_name, worker.id });
         }
-        const worker = selected_worker.?;
-
-        try self.store.updateStepStatus(step.id, "running", worker.id, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.running", "{}");
-
         const result = try dispatch.dispatchStep(
             alloc,
             worker.url,
@@ -831,3471 +1202,1734 @@ pub const Engine = struct {
             worker.protocol,
             worker.model,
             run_row.id,
-            step.id,
+            step_id,
             rendered_prompt,
         );
 
-        if (result.success) {
-            const output_json = try wrapOutput(alloc, result.output);
-            try self.store.updateStepStatus(step.id, "completed", worker.id, output_json, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics);
-            log.info("reduce step {s} completed", .{step.id});
-        } else {
-            const err_text = result.error_text orelse "dispatch failed";
-            if (step.attempt < step.max_attempts) {
-                try self.store.updateStepStatus(step.id, "ready", null, null, err_text, step.attempt + 1);
-                try self.store.insertEvent(run_row.id, step.id, "step.retry", "{}");
-            } else {
-                try self.store.updateStepStatus(step.id, "failed", worker.id, null, err_text, step.attempt);
-                try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-                callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-            }
-        }
-    }
-
-    // ── executeConditionStep ─────────────────────────────────────────
-
-    fn executeConditionStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, all_steps: []const types.StepRow) !void {
-        // 1. Get the dependency step's output
-        const dep_ids = try self.store.getStepDeps(alloc, step.id);
-        if (dep_ids.len == 0) {
-            log.warn("condition step {s} has no dependencies", .{step.id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "condition step has no dependencies", step.attempt);
-            return;
+        // 6. Handle async dispatch
+        if (result.async_pending) {
+            const async_state = try mergeAsyncState(alloc, state_json, result.correlation_id orelse "");
+            try self.store.updateStepInputJson(step_id, async_state);
+            log.info("step {s} dispatched async, correlation_id={s}", .{ step_id, result.correlation_id orelse "?" });
+            return TaskNodeResult{ .async_pending = {} };
         }
 
-        const dep_step_id = dep_ids[0];
-        const dep_output = findStepOutput(all_steps, dep_step_id) orelse "";
-
-        // 2. Parse the "expression" from step definition
-        const expression = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "expression") orelse "true";
-
-        // 3. Evaluate: for MVP, support simple "contains" check
-        //    Expression format: check if the dependency output contains a certain substring
-        //    If expression is "true", always take true branch
-        //    Otherwise, check if dep output contains the expression text
-        const condition_met = if (std.mem.eql(u8, expression, "true"))
-            true
-        else if (std.mem.eql(u8, expression, "false"))
-            false
-        else
-            std.mem.indexOf(u8, dep_output, expression) != null;
-
-        // 4. Determine branch
-        const true_target = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "true_target");
-        const false_target = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "false_target");
+        // 7. Handle result
+        if (result.success) {
+            var final_output = result.output;
+
+            // Track cumulative token usage (Gap 2)
+            var total_input_tokens: i64 = 0;
+            var total_output_tokens: i64 = 0;
+            if (result.usage) |usage| {
+                total_input_tokens += usage.input_tokens;
+                total_output_tokens += usage.output_tokens;
+            }
+
+            // 7a. Multi-turn continuation for agent nodes
+            if (is_agent_node) {
+                const max_turns_val = getNodeFieldInt(alloc, node_json, "max_turns");
+                const continuation_prompt = getNodeField(alloc, node_json, "continuation_prompt");
+                const turn_timeout_ms_val = getNodeFieldInt(alloc, node_json, "turn_timeout_ms");
+                const turn_start_ms = ids.nowMs();
+
+                if (max_turns_val != null and continuation_prompt != null) {
+                    const mt = max_turns_val.?;
+                    const max_turns: u32 = @intCast(@min(@max(mt, 1), 100));
+                    if (max_turns > 1) {
+                        var turn: u32 = 1;
+                        while (turn < max_turns) : (turn += 1) {
+                            // Check turn timeout (Gap 4)
+                            if (turn_timeout_ms_val) |timeout_ms| {
+                                const elapsed = ids.nowMs() - turn_start_ms;
+                                if (elapsed > timeout_ms) {
+                                    log.info("agent node {s} turn timeout after {d}ms (limit={d}ms)", .{ node_name, elapsed, timeout_ms });
+                                    break;
+                                }
+                            }
 
-        // 5. Determine the winning target and check for graph cycles
-        const winning_target: ?[]const u8 = if (condition_met) true_target else false_target;
+                            // Consume pending injections between turns — these are
+                            // queued but cannot be applied mid-node. Re-save them so
+                            // they are applied after the full node completes.
+                            const mid_injections = self.store.consumePendingInjections(alloc, run_row.id, node_name) catch &.{};
+                            for (mid_injections) |inj| {
+                                self.store.createPendingInjection(run_row.id, inj.updates_json, node_name) catch {};
+                            }
 
-        // Check if the winning target is a backward edge (cycle)
-        if (winning_target) |target| {
-            const cycle_handled = try self.handleCycleBack(alloc, run_row, step, target, all_steps);
-            if (cycle_handled) return; // Cycle was handled, step is already completed
-        }
+                            // Render continuation prompt
+                            const cont_rendered = self.renderWorkflowTemplate(alloc, continuation_prompt.?, state_json, runtime, null) catch break;
+
+                            const cont_result = try dispatch.dispatchStep(
+                                alloc,
+                                worker.url,
+                                worker.token,
+                                worker.protocol,
+                                worker.model,
+                                run_row.id,
+                                step_id,
+                                cont_rendered,
+                            );
+
+                            if (!cont_result.success) break;
+                            final_output = cont_result.output;
+
+                            // Accumulate token usage from continuation turns
+                            if (cont_result.usage) |usage| {
+                                total_input_tokens += usage.input_tokens;
+                                total_output_tokens += usage.output_tokens;
+                            }
+                        }
+                        log.info("agent node {s} completed {d} turns", .{ node_name, turn });
+                    }
+                }
+            }
 
-        // 6. For the losing branch target: mark steps as "skipped"
-        if (condition_met) {
-            // Skip the false branch target
-            if (false_target) |target_def_id| {
-                try self.skipStepByDefId(alloc, all_steps, run_row.id, target_def_id);
+            // Record token usage (Gap 2)
+            if (total_input_tokens > 0 or total_output_tokens > 0) {
+                self.store.updateStepTokens(step_id, total_input_tokens, total_output_tokens) catch |err| {
+                    log.warn("failed to update step tokens: {}", .{err});
+                };
+                self.store.updateRunTokens(run_row.id, total_input_tokens, total_output_tokens) catch |err| {
+                    log.warn("failed to update run tokens: {}", .{err});
+                };
             }
-        } else {
-            // Skip the true branch target
-            if (true_target) |target_def_id| {
-                try self.skipStepByDefId(alloc, all_steps, run_row.id, target_def_id);
+
+            // Store rate limit info (Gap 3)
+            if (result.rate_limit) |rl| {
+                self.rate_limits.put(worker.id, RateLimitInfo{
+                    .worker_id = worker.id,
+                    .remaining = rl.remaining,
+                    .limit = rl.limit,
+                    .reset_ms = rl.reset_ms,
+                    .updated_at_ms = ids.nowMs(),
+                }) catch {};
             }
-        }
 
-        // 7. Mark condition step as "completed"
-        const branch_result = if (condition_met) "true" else "false";
-        const output_json = try std.fmt.allocPrint(alloc, "{{\"branch\":\"{s}\"}}", .{branch_result});
-        try self.store.updateStepStatus(step.id, "completed", null, output_json, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
-        log.info("condition step {s} evaluated to {s}", .{ step.id, branch_result });
-    }
+            const output_json = try wrapOutput(alloc, final_output);
+            try self.store.updateStepStatus(step_id, "completed", worker.id, output_json, null, 1);
+            try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}");
+            self.emitEvent(alloc, .step_completed, run_row.id, step_id, node_name, null);
+            try self.store.markWorkerSuccess(worker.id, ids.nowMs());
 
-    // ── executeApprovalStep ──────────────────────────────────────────
+            if (self.metrics) |m| {
+                metrics_mod.Metrics.incr(&m.worker_dispatch_success_total);
+            }
+            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step_id, output_json, self.metrics);
 
-    fn executeApprovalStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        _ = alloc;
-        // 1. Mark step as "waiting_approval"
-        try self.store.updateStepStatus(step.id, "waiting_approval", null, null, null, step.attempt);
-        // 2. Insert event
-        try self.store.insertEvent(run_row.id, step.id, "step.waiting_approval", "{}");
-        log.info("approval step {s} waiting for approval", .{step.id});
-    }
+            // Process UI messages and stream messages from worker response
+            if (self.sse_hub) |hub| {
+                processUiMessages(hub, alloc, run_row.id, step_id, final_output);
+                processStreamMessages(hub, alloc, run_row.id, step_id, node_type, final_output);
+            }
 
-    // ── executeTransformStep ────────────────────────────────────────
+            // Build state_updates from output. Prefer explicit state_updates
+            // from the worker, otherwise honor node-level output_key /
+            // output_mapping before falling back to the legacy "output" key.
+            const state_updates = try buildTaskStateUpdates(alloc, node_json, final_output);
 
-    fn executeTransformStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Get output_template from workflow_json
-        const output_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "output_template") orelse {
-            log.warn("no output_template for transform step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing output_template", step.attempt);
-            return;
-        };
+            // Extract goto targets from output (command primitive)
+            const goto_targets = extractGotoTargets(alloc, final_output);
 
-        // 2. Build template context (same as task step)
-        const ctx = try buildTemplateContext(alloc, run_row, step, self.store);
+            return TaskNodeResult{ .completed = .{ .state_updates = state_updates, .goto_targets = goto_targets, .raw_output = final_output } };
+        } else {
+            const err_text = result.error_text orelse "dispatch failed";
+            try self.store.updateStepStatus(step_id, "failed", worker.id, null, err_text, 1);
+            try self.store.insertEvent(run_row.id, step_id, "step.failed", "{}");
+            self.emitEvent(alloc, .step_failed, run_row.id, step_id, node_name, null);
 
-        // 3. Render template
-        const rendered = templates.render(alloc, output_template, ctx) catch |err| {
-            const err_msg = std.fmt.allocPrint(alloc, "template render error: {}", .{err}) catch "template render error";
-            try self.store.updateStepStatus(step.id, "failed", null, null, err_msg, step.attempt);
-            return;
-        };
+            const now_ms = ids.nowMs();
+            const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms;
+            try self.store.markWorkerFailure(
+                worker.id,
+                err_text,
+                now_ms,
+                self.runtime_cfg.worker_failure_threshold,
+                circuit_until,
+            );
 
-        // 4. Wrap as output and mark completed
-        const output = try wrapOutput(alloc, rendered);
-        try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
+            if (self.metrics) |m| {
+                metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total);
+            }
+            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step_id, "{}", self.metrics);
 
-        // 5. Fire callback + event
-        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-        try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-        log.info("transform step {s} completed", .{step.id});
+            return TaskNodeResult{ .failed = err_text };
+        }
     }
 
-    // ── executeWaitStep ──────────────────────────────────────────────
+    // ── executeSubgraphNode ─────────────────────────────────────────
 
-    fn executeWaitStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        const now = ids.nowMs();
-
-        // Check signal mode first
-        if (try getStepField(alloc, run_row.workflow_json, step.def_step_id, "signal")) |_| {
-            // Signal mode: set to waiting_approval and wait for external POST /signal
-            try self.store.updateStepStatus(step.id, "waiting_approval", null, null, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.waiting_signal", "{}");
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.waiting_signal", run_row.id, step.id, "{}", self.metrics);
-            log.info("wait step {s} waiting for signal", .{step.id});
-            return;
+    fn executeSubgraphNode(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, node_name: []const u8, node_json: []const u8, state_json: []const u8, recursion_depth: u32) !TaskNodeResult {
+        if (recursion_depth >= max_subgraph_depth) {
+            log.err("subgraph node {s}: max recursion depth ({d}) exceeded", .{ node_name, max_subgraph_depth });
+            return TaskNodeResult{ .failed = "subgraph max recursion depth exceeded" };
         }
 
-        // Duration mode
-        const duration_opt: ?i64 = blk: {
-            const duration_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "duration_ms");
-            if (duration_raw != null) {
-                const dur_int = (try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "duration_ms")) orelse {
-                    try self.failStepWithError(alloc, run_row, step, "duration_ms must be an integer");
-                    return;
-                };
-                if (dur_int < 0) {
-                    try self.failStepWithError(alloc, run_row, step, "duration_ms must be >= 0");
-                    return;
-                }
-                break :blk dur_int;
-            }
-            break :blk null;
+        // Get workflow_id
+        const workflow_id = getNodeField(alloc, node_json, "workflow_id") orelse {
+            log.err("subgraph node {s}: missing workflow_id", .{node_name});
+            return TaskNodeResult{ .failed = "subgraph missing workflow_id" };
         };
-        if (duration_opt) |duration| {
-            if (step.started_at_ms) |started| {
-                // Already running -- check if duration elapsed
-                if (now - started >= duration) {
-                    const waited = now - started;
-                    const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"waited\",\"waited_ms\":{d}}}", .{waited});
-                    try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-                    try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-                    callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-                    log.info("wait step {s} completed after {d}ms", .{ step.id, waited });
-                    return;
-                }
-                // Not yet -- stay running (do nothing, will be checked next tick)
-                return;
-            }
-            // First time -- mark running and set started_at_ms
-            try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt);
-            try self.store.setStepStartedAt(step.id, now);
-            return;
-        }
 
-        // Until_ms mode (check integer field)
-        if (try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "until_ms")) |until| {
-            if (until < 0) {
-                try self.failStepWithError(alloc, run_row, step, "until_ms must be >= 0");
-                return;
-            }
-            if (now >= until) {
-                const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"waited\",\"waited_ms\":{d}}}", .{now - (step.started_at_ms orelse now)});
-                try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-                try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-                log.info("wait step {s} completed (until_ms reached)", .{step.id});
-                return;
-            }
-            if (step.started_at_ms == null) {
-                try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt);
-                try self.store.setStepStartedAt(step.id, now);
-            }
-            return;
-        }
+        // Load workflow definition from store
+        const workflow_row = try self.store.getWorkflow(alloc, workflow_id);
+        if (workflow_row == null) {
+            log.err("subgraph node {s}: workflow {s} not found", .{ node_name, workflow_id });
+            return TaskNodeResult{ .failed = "subgraph workflow not found" };
+        }
+        const definition = workflow_row.?.definition_json;
+
+        // Build input state from parent state using input_mapping
+        const input_mapping_json = getNodeField(alloc, node_json, "input_mapping") orelse "{}";
+        const child_input = buildSubgraphInput(alloc, state_json, input_mapping_json) catch "{}";
+
+        // Get schema from child workflow for initState
+        const child_schema = getSchemaJson(alloc, definition);
+        const child_state = state_mod.initState(alloc, child_input, child_schema) catch try alloc.dupe(u8, child_input);
+
+        // Create child run
+        const child_id_buf = ids.generateId();
+        const child_id = try alloc.dupe(u8, &child_id_buf);
+        try self.store.createRunWithState(child_id, workflow_id, definition, child_input, child_state);
+        try self.store.setParentRunId(child_id, run_row.id);
+        try self.store.updateRunStatus(child_id, "running", null);
+
+        // Create step record for the subgraph node
+        const step_id_buf = ids.generateId();
+        const step_id = try alloc.dupe(u8, &step_id_buf);
+        try self.store.insertStep(step_id, run_row.id, node_name, "subgraph", "running", "{}", 1, null, null, null);
+        try self.store.insertEvent(run_row.id, step_id, "step.running", "{}");
+
+        // Execute child run inline (recursive call to processRunWithDepth)
+        const child_run = (try self.store.getRun(alloc, child_id)).?;
+        self.processRunInline(alloc, child_run, recursion_depth + 1);
+
+        // Check child run result
+        const completed_child = (try self.store.getRun(alloc, child_id)).?;
+        if (!std.mem.eql(u8, completed_child.status, "completed")) {
+            const child_error = completed_child.error_text orelse "subgraph did not complete";
+            try self.store.updateStepStatus(step_id, "failed", null, null, child_error, 1);
+            return TaskNodeResult{ .failed = child_error };
+        }
+
+        // Extract output_key from child's final state
+        const output_key = getNodeField(alloc, node_json, "output_key") orelse "output";
+        const child_final_state = completed_child.state_json orelse "{}";
+
+        // Get the value at output_key from child state
+        const output_path = try std.fmt.allocPrint(alloc, "state.{s}", .{output_key});
+        const output_value = state_mod.getStateValue(alloc, child_final_state, output_path) catch null;
+
+        // Build state_updates: {output_key: value}
+        const state_updates = if (output_value) |val|
+            try std.fmt.allocPrint(alloc, "{{\"{s}\":{s}}}", .{ output_key, val })
+        else
+            try std.fmt.allocPrint(alloc, "{{\"{s}\":null}}", .{output_key});
 
-        // No wait configuration -- fail
-        try self.failStepWithError(alloc, run_row, step, "wait step missing duration_ms, until_ms, or signal");
-    }
+        try self.store.updateStepStatus(step_id, "completed", null, state_updates, null, 1);
+        try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}");
 
-    // ── executeRouterStep ────────────────────────────────────────────
+        log.info("subgraph node {s} completed (child run {s})", .{ node_name, child_id });
+        return TaskNodeResult{ .completed = .{ .state_updates = state_updates } };
+    }
 
-    fn executeRouterStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, all_steps: []const types.StepRow) !void {
-        // 1. Get dependency output
-        const deps = try self.store.getStepDeps(alloc, step.id);
-        if (deps.len == 0) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "router has no dependencies", step.attempt);
-            return;
-        }
+    // ── executeSendNode ──────────────────────────────────────────────
 
-        const dep_step = (try self.store.getStep(alloc, deps[0])) orelse {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "dependency step not found", step.attempt);
-            return;
+    fn executeSendNode(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, runtime: RuntimeBindings, node_name: []const u8, node_json: []const u8, state_json: []const u8) !SendNodeResult {
+        // Read items_key state path, with items_from kept as a legacy alias.
+        const items_path = getSendItemsPath(alloc, node_json) orelse {
+            log.warn("send node {s} missing items_key/items_from", .{node_name});
+            return SendNodeResult{ .state_updates = null };
         };
-        const dep_output = extractOutputField(alloc, dep_step.output_json orelse "") catch "";
 
-        // 2. Parse routes from workflow definition (routes is a JSON object, not a string)
-        const routes_str = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "routes") orelse {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "router missing routes", step.attempt);
-            return;
+        // Get the target_node
+        const target_node = getNodeField(alloc, node_json, "target_node") orelse {
+            log.warn("send node {s} missing target_node", .{node_name});
+            return SendNodeResult{ .state_updates = null };
         };
 
-        const default_target = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "default");
-
-        // 3. Parse routes JSON object and find match
-        var matched_target: ?[]const u8 = null;
-        var all_targets: std.ArrayListUnmanaged([]const u8) = .empty;
-
-        const parsed = std.json.parseFromSlice(std.json.Value, alloc, routes_str, .{}) catch {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "invalid routes JSON", step.attempt);
-            return;
+        // Get target node definition from workflow
+        const target_json = getNodeJson(alloc, run_row.workflow_json, target_node) orelse {
+            log.warn("send node {s} target {s} not found", .{ node_name, target_node });
+            return SendNodeResult{ .state_updates = null };
         };
 
-        if (parsed.value == .object) {
-            var it = parsed.value.object.iterator();
-            while (it.next()) |entry| {
-                const target = switch (entry.value_ptr.*) {
-                    .string => |s| s,
-                    else => continue,
-                };
-                try all_targets.append(alloc, target);
-
-                if (matched_target == null) {
-                    // Check if dep_output contains the route key
-                    if (std.mem.indexOf(u8, dep_output, entry.key_ptr.*) != null) {
-                        matched_target = target;
-                    }
-                }
-            }
+        // Read items from state
+        const items_json = state_mod.getStateValue(alloc, state_json, items_path) catch null;
+        if (items_json == null) {
+            log.warn("send node {s}: no items at path {s}", .{ node_name, items_path });
+            return SendNodeResult{ .state_updates = null };
         }
 
-        // 4. Use default if no match
-        if (matched_target == null) {
-            matched_target = default_target;
+        // Parse items as array
+        const items_parsed = json.parseFromSlice(json.Value, alloc, items_json.?, .{}) catch {
+            log.warn("send node {s}: items not valid JSON", .{node_name});
+            return SendNodeResult{ .state_updates = null };
+        };
+        if (items_parsed.value != .array) {
+            log.warn("send node {s}: items not an array", .{node_name});
+            return SendNodeResult{ .state_updates = null };
         }
 
-        if (matched_target == null) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "no matching route and no default", step.attempt);
-            return;
-        }
+        // Build worker list once before iterating items
+        const worker_infos = try self.buildWorkerInfos(alloc);
+        const required_tags = getNodeTags(alloc, target_json);
 
-        // 5. Check if matched target is a backward edge (cycle)
-        const cycle_handled = try self.handleCycleBack(alloc, run_row, step, matched_target.?, all_steps);
-        if (cycle_handled) return; // Cycle was handled, step is already completed
+        // For each item, execute the target node
+        var results: std.ArrayListUnmanaged([]const u8) = .empty;
+        for (items_parsed.value.array.items, 0..) |item, idx| {
+            // Serialize item
+            const item_str = serializeJsonValue(alloc, item) catch continue;
 
-        // 6. Skip all non-matched targets
-        for (all_targets.items) |target| {
-            if (!std.mem.eql(u8, target, matched_target.?)) {
-                self.skipStepByDefId(alloc, all_steps, run_row.id, target) catch {};
-            }
-        }
+            // Get prompt template from target node
+            const prompt_template = getNodeField(alloc, target_json, "prompt_template") orelse continue;
 
-        // 7. Mark router completed
-        const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"routed\",\"routed_to\":\"{s}\"}}", .{matched_target.?});
-        try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-        log.info("router step {s} routed to {s}", .{ step.id, matched_target.? });
-    }
+            // Render with item
+            const rendered = self.renderWorkflowTemplate(alloc, prompt_template, state_json, runtime, item_str) catch continue;
 
-    // ── executeLoopStep ─────────────────────────────────────────────
-    //
-    // First tick (step is "ready", no children exist):
-    //   - Parse body array from workflow definition
-    //   - Create child step instances for iteration 0
-    //   - Chain body steps sequentially within the iteration
-    //   - Mark loop step as "running"
-
-    fn executeLoopStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // Parse body array from step definition
-        const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse {
-            log.warn("no body for loop step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing body in loop definition", step.attempt);
-            return;
-        };
+            const selected_worker = try dispatch.selectWorker(alloc, worker_infos, required_tags);
+            if (selected_worker == null) {
+                try results.append(alloc, "null");
+                continue;
+            }
+            const worker = selected_worker.?;
 
-        const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "invalid body JSON in loop definition", step.attempt);
-            return;
-        };
+            // Create child step
+            const child_step_id_buf = ids.generateId();
+            const child_step_id = try alloc.dupe(u8, &child_step_id_buf);
+            const child_def_id = try std.fmt.allocPrint(alloc, "{s}_{d}", .{ node_name, idx });
+            try self.store.insertStep(child_step_id, run_row.id, child_def_id, "task", "running", item_str, 1, null, null, @as(?i64, @intCast(idx)));
+            try self.store.insertEvent(run_row.id, child_step_id, "step.running", "{}");
 
-        if (body_parsed.value != .array or body_parsed.value.array.items.len == 0) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "body must be a non-empty array", step.attempt);
-            return;
+            const dr = try dispatch.dispatchStep(
+                alloc,
+                worker.url,
+                worker.token,
+                worker.protocol,
+                worker.model,
+                run_row.id,
+                child_step_id,
+                rendered,
+            );
+
+            if (dr.success) {
+                const output_json = try wrapOutput(alloc, dr.output);
+                try self.store.updateStepStatus(child_step_id, "completed", worker.id, output_json, null, 1);
+                try self.store.insertEvent(run_row.id, child_step_id, "step.completed", "{}");
+                try results.append(alloc, try jsonStringify(alloc, dr.output));
+            } else {
+                try self.store.updateStepStatus(child_step_id, "failed", worker.id, null, dr.error_text, 1);
+                try results.append(alloc, "null");
+            }
         }
 
-        const body_items = body_parsed.value.array.items;
+        // Build state_updates from collected results
+        const results_json = try serializeStringArray(alloc, results.items);
+        const output_key = getNodeField(alloc, node_json, "output_key") orelse "send_results";
+        const state_updates = try std.fmt.allocPrint(alloc, "{{\"{s}\":{s}}}", .{ output_key, results_json });
 
-        // Create child steps for iteration 0
-        try self.createLoopIterationChildren(alloc, run_row, step, body_items, 0);
+        // Create parent step record
+        const step_id_buf = ids.generateId();
+        const step_id = try alloc.dupe(u8, &step_id_buf);
+        try self.store.insertStep(step_id, run_row.id, node_name, "send", "completed", "{}", 1, null, null, null);
+        try self.store.updateStepStatus(step_id, "completed", null, state_updates, null, 1);
+        try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}");
 
-        // Mark loop step as "running"
-        try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.running", "{}");
-        log.info("loop step {s} started iteration 0", .{step.id});
+        return SendNodeResult{ .state_updates = state_updates };
     }
 
-    // ── pollRunningLoopStep ─────────────────────────────────────────
-    //
-    // Checks progress of a running loop step each tick:
-    //   - Find current iteration (max iteration_index)
-    //   - Check if all children in current iteration are done
-    //   - If any failed -> loop fails
-    //   - If all done: evaluate exit_condition
-    //   - If met -> loop completes
-    //   - If max_iterations reached -> loop completes
-    //   - Else -> create next iteration
-
-    fn pollRunningLoopStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // Get all children of this loop step
-        const children = try self.store.getChildSteps(alloc, step.id);
-        if (children.len == 0) return; // No children yet, wait
-
-        // Find the current (max) iteration_index
-        var max_iter: i64 = 0;
-        for (children) |child| {
-            if (child.iteration_index > max_iter) {
-                max_iter = child.iteration_index;
-            }
-        }
+    fn renderWorkflowTemplate(
+        self: *Engine,
+        alloc: std.mem.Allocator,
+        template: []const u8,
+        state_json: []const u8,
+        runtime: RuntimeBindings,
+        item_json: ?[]const u8,
+    ) ![]const u8 {
+        return templates.renderTemplateWithStore(alloc, template, state_json, runtime.input_json, item_json, runtime.storeAccess(self.store_fetcher));
+    }
 
-        // Check if all children in the current iteration are in terminal states
-        var all_done = true;
-        var any_failed = false;
-        var last_child_output: ?[]const u8 = null;
+    fn buildRuntimeBindings(self: *Engine, alloc: std.mem.Allocator, workflow_json: []const u8, state_json: []const u8, input_json: ?[]const u8) RuntimeBindings {
+        return .{
+            .input_json = input_json,
+            .task_id = getRuntimeStringSetting(alloc, state_json, workflow_json, &.{"task_id"}),
+            .tracker = if (self.trusted_tracker_url) |base_url|
+                .{
+                    .base_url = base_url,
+                    .api_token = self.trusted_tracker_api_token,
+                }
+            else
+                null,
+        };
+    }
 
-        for (children) |child| {
-            if (child.iteration_index != max_iter) continue;
+    fn applyStoreUpdates(self: *Engine, alloc: std.mem.Allocator, state_json: []const u8, store_updates_json: []const u8, runtime: RuntimeBindings) !void {
+        const access = runtime.storeAccess(self.store_fetcher) orelse return error.StoreNotConfigured;
+        const parsed = try json.parseFromSlice(json.Value, alloc, store_updates_json, .{});
 
-            if (std.mem.eql(u8, child.status, "failed")) {
-                any_failed = true;
-                continue;
-            }
-            if (std.mem.eql(u8, child.status, "completed") or std.mem.eql(u8, child.status, "skipped")) {
-                // Track the last completed child's output (by item_index order)
-                if (child.output_json != null) {
-                    last_child_output = child.output_json;
+        switch (parsed.value) {
+            .object => try self.applySingleStoreUpdate(alloc, access, state_json, parsed.value.object),
+            .array => |arr| {
+                for (arr.items) |item| {
+                    if (item != .object) return error.InvalidStoreUpdates;
+                    try self.applySingleStoreUpdate(alloc, access, state_json, item.object);
                 }
-                continue;
-            }
-            // Still pending/ready/running
-            all_done = false;
+            },
+            else => return error.InvalidStoreUpdates,
         }
+    }
 
-        if (!all_done) return; // Not done yet, wait
-
-        if (any_failed) {
-            // Loop fails if any child fails
-            try self.store.updateStepStatus(step.id, "failed", null, null, "loop child step failed", step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-            log.info("loop step {s} failed (child failed)", .{step.id});
-            return;
-        }
+    fn applySingleStoreUpdate(self: *Engine, alloc: std.mem.Allocator, access: templates.StoreAccess, state_json: []const u8, obj: json.ObjectMap) !void {
+        const namespace_val = obj.get("namespace") orelse return error.InvalidStoreUpdates;
+        const key_val = obj.get("key") orelse return error.InvalidStoreUpdates;
+        const value_val = obj.get("value") orelse return error.InvalidStoreUpdates;
 
-        // All children in current iteration are done. Evaluate exit_condition.
-        const exit_condition = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "exit_condition");
-        const max_iterations = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "max_iterations") orelse 10;
+        if (namespace_val != .string or key_val != .string) return error.InvalidStoreUpdates;
 
-        // Extract output text from last child for condition matching
-        const last_output_text = if (last_child_output) |oj|
-            (extractOutputField(alloc, oj) catch oj)
-        else
-            "";
+        const value_json = try resolveStoreUpdateValue(alloc, state_json, value_val);
+        try self.store_writer(alloc, access.base_url, access.api_token, namespace_val.string, key_val.string, value_json);
+    }
 
-        // Check exit condition (substring match, same as condition step)
-        const condition_met = if (exit_condition) |cond|
-            std.mem.indexOf(u8, last_output_text, cond) != null
-        else
-            false;
-
-        if (condition_met) {
-            // Exit condition met -- loop completes with last child's output
-            const output = last_child_output orelse try wrapOutput(alloc, "loop completed");
-            try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-            log.info("loop step {s} completed (exit condition met at iteration {d})", .{ step.id, max_iter });
-            return;
-        }
+    // ── Async polling ────────────────────────────────────────────────
 
-        // Check if max_iterations reached
-        if (max_iter + 1 >= max_iterations) {
-            // Max iterations reached -- loop completes with last child's output
-            const output = last_child_output orelse try wrapOutput(alloc, "loop completed (max iterations)");
-            try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-            log.info("loop step {s} completed (max iterations {d} reached)", .{ step.id, max_iterations });
-            return;
-        }
+    fn pollAsyncTaskStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
+        const input_json = step.input_json;
+        if (input_json.len == 0) return;
 
-        // Create next iteration
-        const next_iter = max_iter + 1;
+        const parsed = json.parseFromSlice(json.Value, alloc, input_json, .{}) catch return;
+        if (parsed.value != .object) return;
 
-        // Re-parse body to get the body step def IDs
-        const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse return;
-        const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch return;
-        if (body_parsed.value != .array) return;
-        const body_items = body_parsed.value.array.items;
-
-        try self.createLoopIterationChildren(alloc, run_row, step, body_items, next_iter);
-        log.info("loop step {s} started iteration {d}", .{ step.id, next_iter });
-    }
-
-    /// Create child steps for one iteration of a loop.
-    fn createLoopIterationChildren(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, loop_step: types.StepRow, body_items: []const std.json.Value, iteration: i64) !void {
-        var prev_child_id: ?[]const u8 = null;
-
-        for (body_items, 0..) |body_item, i| {
-            // Each body_item should be a string (step def ID)
-            const body_def_id = switch (body_item) {
-                .string => |s| s,
-                else => continue,
-            };
-
-            // Look up the body step's type from the workflow definition
-            const body_step_type = try getStepField(alloc, run_row.workflow_json, body_def_id, "type") orelse "task";
-
-            // Generate unique child step ID
-            const child_id_buf = ids.generateId();
-            const child_id = try alloc.dupe(u8, &child_id_buf);
-
-            // First step in chain is "ready", rest are "pending"
-            const initial_status: []const u8 = if (i == 0) "ready" else "pending";
-            const idx: i64 = @intCast(i);
+        const async_flag = parsed.value.object.get("async_pending") orelse return;
+        if (async_flag != .bool or !async_flag.bool) return;
 
-            try self.store.insertStepWithIteration(
-                child_id,
-                run_row.id,
-                body_def_id, // original def_step_id for template/tag lookup
-                body_step_type,
-                initial_status,
-                "{}", // input_json
-                1, // max_attempts
-                null, // timeout_ms
-                loop_step.id, // parent_step_id
-                idx, // item_index (position in body)
-                iteration, // iteration_index
-            );
+        const corr_val = parsed.value.object.get("correlation_id") orelse return;
+        if (corr_val != .string) return;
+        const correlation_id = corr_val.string;
 
-            // Chain: this step depends on previous step in the body
-            if (prev_child_id) |prev_id| {
-                try self.store.insertStepDep(child_id, prev_id);
+        const queue = self.response_queue orelse return;
+        const response = queue.take(correlation_id) orelse {
+            if (step.timeout_ms) |timeout_ms| {
+                if (step.started_at_ms) |started_at| {
+                    const elapsed = ids.nowMs() - started_at;
+                    if (elapsed > timeout_ms) {
+                        const err_text = try std.fmt.allocPrint(alloc, "async step timed out after {d}ms", .{timeout_ms});
+                        try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt);
+                        try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
+                        if (self.metrics) |m| {
+                            metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total);
+                        }
+                        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
+                        log.err("async step {s} timed out", .{step.id});
+                    }
+                }
             }
-
-            prev_child_id = child_id;
-        }
-    }
-
-    // ── executeSubWorkflowStep ──────────────────────────────────────
-    //
-    // First tick (step is "ready", child_run_id is null):
-    //   - Get nested workflow definition
-    //   - Create a child run with the nested workflow
-    //   - Create child run's steps
-    //   - Store child_run_id on the parent step
-    //   - Mark step as "running"
-
-    fn executeSubWorkflowStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Get nested workflow definition from the step def
-        const workflow_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "workflow") orelse {
-            log.warn("no workflow for sub_workflow step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing workflow in sub_workflow definition", step.attempt);
-            return;
-        };
-
-        // 2. Parse the nested workflow to extract steps
-        const nested_parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_raw, .{}) catch {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "invalid workflow JSON in sub_workflow definition", step.attempt);
             return;
         };
 
-        if (nested_parsed.value != .object) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "workflow must be a JSON object", step.attempt);
-            return;
+        if (response.success) {
+            const output_json = try wrapOutput(alloc, response.output);
+            try self.store.updateStepStatus(step.id, "completed", step.worker_id, output_json, null, step.attempt);
+            try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}");
+            if (step.worker_id) |wid| {
+                try self.store.markWorkerSuccess(wid, ids.nowMs());
+            }
+            if (self.metrics) |m| {
+                metrics_mod.Metrics.incr(&m.worker_dispatch_success_total);
+            }
+            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics);
+            log.info("async step {s} completed", .{step.id});
+        } else {
+            const err_text = response.error_text orelse "async dispatch failed";
+            try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt);
+            try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
+            if (step.worker_id) |wid| {
+                const now_ms = ids.nowMs();
+                const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms;
+                try self.store.markWorkerFailure(wid, err_text, now_ms, self.runtime_cfg.worker_failure_threshold, circuit_until);
+            }
+            if (self.metrics) |m| {
+                metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total);
+            }
+            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
+            log.err("async step {s} failed: {s}", .{ step.id, err_text });
         }
+    }
 
-        const nested_steps_val = nested_parsed.value.object.get("steps") orelse {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "workflow missing steps array", step.attempt);
-            return;
-        };
-        if (nested_steps_val != .array or nested_steps_val.array.items.len == 0) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "workflow steps must be a non-empty array", step.attempt);
-            return;
-        }
+    /// Merge async_pending + correlation_id into existing input_json.
+    fn mergeAsyncState(alloc: std.mem.Allocator, existing_input: []const u8, correlation_id: []const u8) ![]const u8 {
+        var obj = json.ObjectMap.init(alloc);
 
-        // 3. Build input for child run from input_mapping (optional)
-        var child_input_json: []const u8 = run_row.input_json;
-        if (try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "input_mapping")) |mapping_raw| {
-            const mapping_parsed = std.json.parseFromSlice(std.json.Value, alloc, mapping_raw, .{}) catch null;
-            if (mapping_parsed) |mp| {
-                if (mp.value == .object) {
-                    // Render each value in the mapping using template context
-                    const ctx = try buildTemplateContext(alloc, run_row, step, self.store);
-                    var result_buf: std.ArrayListUnmanaged(u8) = .empty;
-                    try result_buf.append(alloc, '{');
-                    var first = true;
-                    var it = mp.value.object.iterator();
+        if (existing_input.len > 0) {
+            const p = json.parseFromSlice(json.Value, alloc, existing_input, .{}) catch null;
+            if (p) |parsed| {
+                if (parsed.value == .object) {
+                    var it = parsed.value.object.iterator();
                     while (it.next()) |entry| {
-                        if (!first) try result_buf.append(alloc, ',');
-                        first = false;
-                        // Write key
-                        try result_buf.append(alloc, '"');
-                        try result_buf.appendSlice(alloc, entry.key_ptr.*);
-                        try result_buf.appendSlice(alloc, "\":");
-                        // Render value as template if it's a string
-                        if (entry.value_ptr.* == .string) {
-                            const rendered = templates.render(alloc, entry.value_ptr.string, ctx) catch entry.value_ptr.string;
-                            try result_buf.append(alloc, '"');
-                            for (rendered) |ch| {
-                                switch (ch) {
-                                    '"' => try result_buf.appendSlice(alloc, "\\\""),
-                                    '\\' => try result_buf.appendSlice(alloc, "\\\\"),
-                                    '\n' => try result_buf.appendSlice(alloc, "\\n"),
-                                    '\r' => try result_buf.appendSlice(alloc, "\\r"),
-                                    '\t' => try result_buf.appendSlice(alloc, "\\t"),
-                                    else => try result_buf.append(alloc, ch),
-                                }
-                            }
-                            try result_buf.append(alloc, '"');
-                        } else {
-                            // Non-string values: serialize as-is
-                            var out: std.io.Writer.Allocating = .init(alloc);
-                            var jw: std.json.Stringify = .{ .writer = &out.writer };
-                            jw.write(entry.value_ptr.*) catch {};
-                            const serialized = out.toOwnedSlice() catch "null";
-                            try result_buf.appendSlice(alloc, serialized);
-                        }
+                        try obj.put(entry.key_ptr.*, entry.value_ptr.*);
                     }
-                    try result_buf.append(alloc, '}');
-                    child_input_json = try result_buf.toOwnedSlice(alloc);
                 }
             }
         }
 
-        // 4. Create child run
-        const child_run_id_buf = ids.generateId();
-        const child_run_id = try alloc.dupe(u8, &child_run_id_buf);
-
-        // Build the child workflow_json: wrap the nested workflow with its steps
-        // The child run's workflow_json should be the workflow_raw itself
-        try self.store.insertRun(child_run_id, null, "running", workflow_raw, child_input_json, run_row.callbacks_json);
-
-        // 5. Create child run's steps from the nested workflow definition
-        const nested_steps = nested_steps_val.array.items;
-
-        // Build mapping from def_step_id -> generated step_id
-        var def_ids: std.ArrayListUnmanaged([]const u8) = .empty;
-        var gen_ids: std.ArrayListUnmanaged([]const u8) = .empty;
-
-        // First pass: create all steps
-        for (nested_steps) |step_val| {
-            if (step_val != .object) continue;
-            const step_obj = step_val.object;
-
-            const def_step_id = if (step_obj.get("id")) |id_val| blk: {
-                if (id_val == .string) break :blk id_val.string;
-                break :blk null;
-            } else null;
-            if (def_step_id == null) continue;
-
-            const step_type_str = if (step_obj.get("type")) |t| blk: {
-                if (t == .string) break :blk t.string;
-                break :blk "task";
-            } else "task";
-
-            const child_step_id_buf = ids.generateId();
-            const child_step_id = try alloc.dupe(u8, &child_step_id_buf);
+        try obj.put("async_pending", .{ .bool = true });
+        try obj.put("correlation_id", .{ .string = correlation_id });
 
-            // Determine initial status
-            const has_deps = if (step_obj.get("depends_on")) |deps| blk: {
-                if (deps == .array and deps.array.items.len > 0) break :blk true;
-                break :blk false;
-            } else false;
-            const initial_status: []const u8 = if (has_deps) "pending" else "ready";
+        return json.Stringify.valueAlloc(alloc, json.Value{ .object = obj }, .{});
+    }
+};
 
-            try self.store.insertStep(
-                child_step_id,
-                child_run_id,
-                def_step_id.?,
-                step_type_str,
-                initial_status,
-                "{}",
-                1, // max_attempts
-                null, // timeout_ms
-                null, // parent_step_id
-                null, // item_index
-            );
+// ── findReadyNodes ──────────────────────────────────────────────────
+
+/// Find nodes that are ready to execute.
+/// A node is ready when ALL its inbound edges have their source in completed_nodes.
+/// __start__ is always "completed" (synthetic).
+/// For conditional edges "source:value", the source is just "source" (strip after `:`)
+/// and the edge is only satisfied if route_results[source] == value.
+pub fn findReadyNodes(
+    alloc: std.mem.Allocator,
+    workflow_json: []const u8,
+    completed_nodes: *std.StringHashMap(void),
+    route_results: *std.StringHashMap([]const u8),
+) ![]const []const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch {
+        return &.{};
+    };
+    return findReadyNodesFromRoot(alloc, parsed.value, completed_nodes, route_results);
+}
 
-            try def_ids.append(alloc, def_step_id.?);
-            try gen_ids.append(alloc, child_step_id);
-        }
+fn findReadyNodesFromRoot(
+    alloc: std.mem.Allocator,
+    root: json.Value,
+    completed_nodes: *std.StringHashMap(void),
+    route_results: *std.StringHashMap([]const u8),
+) ![]const []const u8 {
+    if (root != .object) return &.{};
 
-        // Second pass: insert step dependencies
-        for (nested_steps) |step_val| {
-            if (step_val != .object) continue;
-            const step_obj = step_val.object;
+    // Get edges array
+    const edges_val = root.object.get("edges") orelse return &.{};
+    if (edges_val != .array) return &.{};
 
-            const def_step_id = if (step_obj.get("id")) |id_val| blk: {
-                if (id_val == .string) break :blk id_val.string;
-                break :blk null;
-            } else null;
-            if (def_step_id == null) continue;
+    // Get all node names from "nodes" object
+    const nodes_val = root.object.get("nodes") orelse return &.{};
+    if (nodes_val != .object) return &.{};
 
-            // Find generated step_id
-            const gen_step_id = lookupId(def_ids.items, gen_ids.items, def_step_id.?) orelse continue;
+    // Build inbound edge map: target -> list of (source, condition_value?)
+    const EdgeInfo = struct {
+        source: []const u8,
+        condition: ?[]const u8, // null for unconditional, "value" for conditional
+    };
 
-            const deps_val = step_obj.get("depends_on") orelse continue;
-            if (deps_val != .array) continue;
+    var inbound = std.StringHashMap(std.ArrayListUnmanaged(EdgeInfo)).init(alloc);
+
+    // Also collect all target nodes mentioned in edges
+    for (edges_val.array.items) |edge_item| {
+        if (edge_item != .array) continue;
+        if (edge_item.array.items.len < 2) continue;
+
+        const source_raw = if (edge_item.array.items[0] == .string) edge_item.array.items[0].string else continue;
+        const target = if (edge_item.array.items[1] == .string) edge_item.array.items[1].string else continue;
+
+        // Parse source: might be "node:value" for conditional edges
+        var source: []const u8 = source_raw;
+        var condition: ?[]const u8 = null;
+        if (std.mem.indexOfScalar(u8, source_raw, ':')) |colon_pos| {
+            source = source_raw[0..colon_pos];
+            condition = source_raw[colon_pos + 1 ..];
+        }
+
+        var entry = inbound.getPtr(target);
+        if (entry == null) {
+            try inbound.put(target, std.ArrayListUnmanaged(EdgeInfo){});
+            entry = inbound.getPtr(target);
+        }
+        try entry.?.append(alloc, .{
+            .source = source,
+            .condition = condition,
+        });
+    }
+
+    // Detect dead nodes: nodes that are unreachable because a conditional
+    // edge was not taken. A node is dead if ALL its inbound edges are
+    // conditional and none match the route result. Dead nodes propagate:
+    // any node whose only inbound edges come from dead nodes is also dead.
+    var dead_nodes = std.StringHashMap(void).init(alloc);
+
+    // Iterative dead node detection (propagate through the graph)
+    var changed = true;
+    while (changed) {
+        changed = false;
+        var dead_it = inbound.iterator();
+        while (dead_it.next()) |kv| {
+            const target = kv.key_ptr.*;
+            const edges = kv.value_ptr.items;
+
+            if (dead_nodes.get(target) != null) continue;
+            if (completed_nodes.get(target) != null) continue;
+
+            var all_dead_or_unsat = true;
+            for (edges) |edge| {
+                if (std.mem.eql(u8, edge.source, "__start__")) {
+                    // __start__ is never dead
+                    all_dead_or_unsat = false;
+                    break;
+                }
 
-            for (deps_val.array.items) |dep_item| {
-                if (dep_item != .string) continue;
-                const dep_gen_id = lookupId(def_ids.items, gen_ids.items, dep_item.string) orelse continue;
-                try self.store.insertStepDep(gen_step_id, dep_gen_id);
+                // If source is dead, this edge is dead
+                if (dead_nodes.get(edge.source) != null) continue;
+
+                if (edge.condition) |cond| {
+                    // Conditional edge: check if source completed and condition matched
+                    if (completed_nodes.get(edge.source) != null) {
+                        if (route_results.get(edge.source)) |actual| {
+                            if (std.mem.eql(u8, actual, cond)) {
+                                // This edge IS satisfied
+                                all_dead_or_unsat = false;
+                                break;
+                            }
+                        }
+                        // Source completed but condition didn't match -> dead edge
+                    } else {
+                        // Source not completed yet and not dead -> not dead yet
+                        all_dead_or_unsat = false;
+                        break;
+                    }
+                } else {
+                    // Non-conditional edge from a live, non-dead source
+                    all_dead_or_unsat = false;
+                    break;
+                }
             }
-        }
-
-        // 6. Store child_run_id on the parent step
-        try self.store.updateStepChildRunId(step.id, child_run_id);
-
-        // 7. Mark sub_workflow step as "running"
-        try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.running", "{}");
-        log.info("sub_workflow step {s} created child run {s}", .{ step.id, child_run_id });
-    }
-
-    // ── pollRunningSubWorkflowStep ──────────────────────────────────
-    //
-    // Checks the child run's status each tick:
-    //   - If completed -> mark parent step completed with child's output
-    //   - If failed -> mark parent step failed
-    //   - Otherwise -> wait
-
-    fn pollRunningSubWorkflowStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        const child_run_id = step.child_run_id orelse return; // No child run yet
 
-        // Get child run
-        const child_run = (try self.store.getRun(alloc, child_run_id)) orelse {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "child run not found", step.attempt);
-            return;
-        };
-
-        if (std.mem.eql(u8, child_run.status, "completed")) {
-            // Get the child run's last completed step output
-            const child_steps = try self.store.getStepsByRun(alloc, child_run_id);
-            var last_output: ?[]const u8 = null;
-            for (child_steps) |cs| {
-                if (std.mem.eql(u8, cs.status, "completed") and cs.output_json != null) {
-                    last_output = cs.output_json;
-                }
+            if (all_dead_or_unsat) {
+                try dead_nodes.put(target, {});
+                changed = true;
             }
-            const output = last_output orelse try wrapOutput(alloc, "sub_workflow completed");
-            try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-            log.info("sub_workflow step {s} completed (child run {s})", .{ step.id, child_run_id });
-        } else if (std.mem.eql(u8, child_run.status, "failed")) {
-            const err_text = child_run.error_text orelse "child run failed";
-            try self.store.updateStepStatus(step.id, "failed", null, null, err_text, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-            log.info("sub_workflow step {s} failed (child run {s})", .{ step.id, child_run_id });
         }
-        // Otherwise: child run still in progress, wait
     }
 
-    // ── executeDebateStep ──────────────────────────────────────────
-    //
-    // Phase 1 (step is "ready"): Create N participant child steps
-    // Phase 2 (step is "running"): polled by pollRunningDebateStep
-
-    fn executeDebateStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Parse count from workflow_json
-        const count_val = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "count") orelse {
-            log.warn("no count for debate step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing count in debate definition", step.attempt);
-            return;
-        };
-        const count: usize = @intCast(count_val);
-
-        // 2. Get prompt_template and render it
-        const prompt_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template") orelse {
-            log.warn("no prompt_template for debate step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing prompt_template in debate definition", step.attempt);
-            return;
-        };
-
-        const ctx = try buildTemplateContext(alloc, run_row, step, self.store);
-        const rendered_prompt = templates.render(alloc, prompt_template, ctx) catch |err| {
-            log.err("template render failed for debate step {s}: {}", .{ step.id, err });
-            try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt);
-            return;
-        };
-
-        // 3. Create N participant child steps
-        for (0..count) |i| {
-            const child_id_buf = ids.generateId();
-            const child_id = try alloc.dupe(u8, &child_id_buf);
-            const child_def_id = try std.fmt.allocPrint(alloc, "{s}_participant_{d}", .{ step.def_step_id, i });
-            const idx: i64 = @intCast(i);
+    // Find ready nodes: for each node, check if all inbound edges are satisfied
+    // (treating dead source nodes as satisfied)
+    var ready: std.ArrayListUnmanaged([]const u8) = .empty;
 
-            // Store rendered prompt in input_json so participant children can be dispatched.
-            const input_json = try buildRenderedPromptInputJson(alloc, rendered_prompt);
+    var inbound_it = inbound.iterator();
+    while (inbound_it.next()) |kv| {
+        const target = kv.key_ptr.*;
+        const edges = kv.value_ptr.items;
 
-            try self.store.insertStep(
-                child_id,
-                run_row.id,
-                child_def_id,
-                "task",
-                "ready",
-                input_json,
-                step.max_attempts,
-                step.timeout_ms,
-                step.id, // parent_step_id
-                idx,
-            );
-            log.info("created debate participant child step {s} (index {d})", .{ child_id, i });
-        }
+        // Skip if already completed or dead
+        if (completed_nodes.get(target) != null) continue;
+        if (dead_nodes.get(target) != null) continue;
 
-        // 4. Mark debate step as "running"
-        try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.running", "{}");
-        log.info("debate step {s} started with {d} participants", .{ step.id, count });
-    }
+        var all_satisfied = true;
+        var any_conditional_edge = false;
+        var any_conditional_satisfied = false;
 
-    // ── pollRunningDebateStep ────────────────────────────────────────
-    //
-    // Checks if all participant children are done, then dispatches judge.
+        for (edges) |edge| {
+            // __start__ is always satisfied
+            if (std.mem.eql(u8, edge.source, "__start__")) continue;
 
-    fn pollRunningDebateStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        const children = try self.store.getChildSteps(alloc, step.id);
-        if (children.len == 0) return;
+            // Dead sources are considered satisfied (their branch was skipped)
+            if (dead_nodes.get(edge.source) != null) continue;
 
-        // Separate participants from judge child
-        var participants: std.ArrayListUnmanaged(types.StepRow) = .empty;
-        var judge_child: ?types.StepRow = null;
+            const source_completed = completed_nodes.get(edge.source) != null;
 
-        for (children) |child| {
-            if (std.mem.indexOf(u8, child.def_step_id, "_judge") != null) {
-                judge_child = child;
-            } else {
-                try participants.append(alloc, child);
+            if (!source_completed) {
+                all_satisfied = false;
+                break;
             }
-        }
 
-        // Check if judge child exists and is terminal
-        if (judge_child) |judge| {
-            if (std.mem.eql(u8, judge.status, "completed")) {
-                // Debate completes with judge output
-                const output = judge.output_json orelse try wrapOutput(alloc, "debate completed");
-                try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-                try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-                callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-                log.info("debate step {s} completed (judge decided)", .{step.id});
-                return;
-            } else if (std.mem.eql(u8, judge.status, "failed")) {
-                const err_text = judge.error_text orelse "judge failed";
-                try self.store.updateStepStatus(step.id, "failed", null, null, err_text, step.attempt);
-                try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-                callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-                log.info("debate step {s} failed (judge failed)", .{step.id});
-                return;
+            if (edge.condition) |cond| {
+                any_conditional_edge = true;
+                if (route_results.get(edge.source)) |actual| {
+                    if (std.mem.eql(u8, actual, cond)) {
+                        any_conditional_satisfied = true;
+                    }
+                }
             }
-            // Judge still in progress, wait
-            return;
         }
 
-        // No judge child yet — check if all participants are done
-        var all_done = true;
-        var any_failed = false;
-        for (participants.items) |child| {
-            if (std.mem.eql(u8, child.status, "failed")) {
-                any_failed = true;
-                continue;
-            }
-            if (!std.mem.eql(u8, child.status, "completed") and !std.mem.eql(u8, child.status, "skipped")) {
-                all_done = false;
-            }
-        }
+        if (!all_satisfied) continue;
 
-        if (!all_done) return; // Still waiting for participants
+        // If there are conditional edges, at least one must be satisfied
+        if (any_conditional_edge and !any_conditional_satisfied) continue;
 
-        if (any_failed) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "debate participant failed", step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-            return;
-        }
+        try ready.append(alloc, target);
+    }
 
-        // All participants done — collect outputs and create judge child
-        var response_items: std.ArrayListUnmanaged([]const u8) = .empty;
-        for (participants.items) |child| {
-            if (child.output_json) |oj| {
-                const extracted = extractOutputField(alloc, oj) catch oj;
-                try response_items.append(alloc, extracted);
-            } else {
-                try response_items.append(alloc, "");
-            }
-        }
+    return ready.toOwnedSlice(alloc);
+}
 
-        // Build debate_responses as JSON array
-        const debate_responses = try serializeStringArray(alloc, response_items.items);
-
-        // Get judge_template
-        const judge_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "judge_template") orelse {
-            // No judge template — complete with collected responses
-            const output = try wrapOutput(alloc, debate_responses);
-            try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-            log.info("debate step {s} completed (no judge template, returning responses)", .{step.id});
-            return;
-        };
+// ── Workflow JSON Helpers ────────────────────────────────────────────
 
-        // Render judge_template: replace {{debate_responses}} with actual responses
-        // Simple string replacement since it's a special variable
-        var rendered_judge_prompt: []const u8 = judge_template;
-        if (std.mem.indexOf(u8, judge_template, "{{debate_responses}}")) |_| {
-            rendered_judge_prompt = try std.mem.replaceOwned(u8, alloc, judge_template, "{{debate_responses}}", debate_responses);
-        }
+/// Get the JSON string for a specific node from workflow_json.
+/// Workflow format: {"nodes": {"node_name": {...}}, "edges": [...]}
+fn getNodeJson(alloc: std.mem.Allocator, workflow_json: []const u8, node_name: []const u8) ?[]const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return null;
+    return getNodeJsonFromRoot(alloc, parsed.value, node_name);
+}
 
-        // Create judge child step with rendered prompt in input_json
-        const judge_id_buf = ids.generateId();
-        const judge_id = try alloc.dupe(u8, &judge_id_buf);
-        const judge_def_id = try std.fmt.allocPrint(alloc, "{s}_judge", .{step.def_step_id});
+fn getNodeJsonFromRoot(alloc: std.mem.Allocator, root: json.Value, node_name: []const u8) ?[]const u8 {
+    if (root != .object) return null;
 
-        const judge_input = try buildRenderedPromptInputJson(alloc, rendered_judge_prompt);
-        const judge_idx: i64 = @intCast(participants.items.len);
+    const nodes = root.object.get("nodes") orelse return null;
+    if (nodes != .object) return null;
 
-        try self.store.insertStep(
-            judge_id,
-            run_row.id,
-            judge_def_id,
-            "task",
-            "ready",
-            judge_input,
-            step.max_attempts,
-            step.timeout_ms,
-            step.id, // parent_step_id
-            judge_idx,
-        );
+    const node = nodes.object.get(node_name) orelse return null;
+    return serializeJsonValue(alloc, node) catch null;
+}
 
-        log.info("debate step {s} created judge child {s}", .{ step.id, judge_id });
-    }
+fn workflowHasNode(root: json.Value, node_name: []const u8) bool {
+    if (root != .object) return false;
+    const nodes = root.object.get("nodes") orelse return false;
+    if (nodes != .object) return false;
+    return nodes.object.get(node_name) != null;
+}
 
-    // ── executeGroupChatStep ─────────────────────────────────────────
-    //
-    // First tick: parse participants, mark as running, start round 1.
-    // Dispatch is attempted but may fail (no workers in test).
+/// Get a string field from a node's JSON.
+fn getNodeField(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?[]const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null;
+    if (parsed.value != .object) return null;
+    const val = parsed.value.object.get(field) orelse return null;
+    if (val == .string) return alloc.dupe(u8, val.string) catch null;
+    return serializeJsonValue(alloc, val) catch null;
+}
 
-    fn executeGroupChatStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Parse participants from workflow_json
-        const participants_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "participants") orelse {
-            log.warn("no participants for group_chat step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing participants in group_chat definition", step.attempt);
-            return;
-        };
+/// Get the state schema JSON from a workflow definition.
+/// Looks up "state_schema" first (canonical key used by API/validation),
+/// then falls back to "schema" for inline workflow definitions in tests.
+fn getSchemaJson(alloc: std.mem.Allocator, workflow_json: []const u8) []const u8 {
+    return getWorkflowField(alloc, workflow_json, "state_schema") orelse
+        getWorkflowField(alloc, workflow_json, "schema") orelse
+        "{}";
+}
 
-        const parsed_participants = std.json.parseFromSlice(std.json.Value, alloc, participants_raw, .{}) catch {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "invalid participants JSON", step.attempt);
-            return;
-        };
+/// Get a top-level field from workflow_json.
+fn getWorkflowField(alloc: std.mem.Allocator, workflow_json: []const u8, field: []const u8) ?[]const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return null;
+    if (parsed.value != .object) return null;
+    const val = parsed.value.object.get(field) orelse return null;
+    if (val == .string) return alloc.dupe(u8, val.string) catch null;
+    return serializeJsonValue(alloc, val) catch null;
+}
 
-        if (parsed_participants.value != .array or parsed_participants.value.array.items.len == 0) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "participants must be a non-empty array", step.attempt);
-            return;
-        }
+fn getRuntimeStringSetting(
+    alloc: std.mem.Allocator,
+    state_json: []const u8,
+    workflow_json: []const u8,
+    field_names: []const []const u8,
+) ?[]const u8 {
+    for (field_names) |field_name| {
+        if (getConfigString(alloc, state_json, field_name)) |value| return value;
+    }
+    for (field_names) |field_name| {
+        if (getWorkflowField(alloc, workflow_json, field_name)) |value| return value;
+    }
+    return null;
+}
 
-        // 2. Get prompt_template for round 1
-        const prompt_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template") orelse {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing prompt_template in group_chat definition", step.attempt);
-            return;
-        };
+fn getConfigString(alloc: std.mem.Allocator, state_json: []const u8, field_name: []const u8) ?[]const u8 {
+    const path = std.fmt.allocPrint(alloc, "state.__config.{s}", .{field_name}) catch return null;
+    defer alloc.free(path);
 
-        // 3. Render prompt template
-        const ctx = try buildTemplateContext(alloc, run_row, step, self.store);
-        const rendered_prompt = templates.render(alloc, prompt_template, ctx) catch |err| {
-            log.err("template render failed for group_chat step {s}: {}", .{ step.id, err });
-            try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt);
-            return;
-        };
+    const raw = state_mod.getStateValue(alloc, state_json, path) catch return null;
+    const raw_value = raw orelse return null;
+    defer alloc.free(raw_value);
 
-        // 4. Mark step as "running"
-        try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.running", "{}");
-
-        // 5. Dispatch round 1 to each participant (best-effort, failures logged)
-        const participant_items = parsed_participants.value.array.items;
-        for (participant_items) |p_val| {
-            if (p_val != .object) continue;
-            const p_obj = p_val.object;
-
-            const role = if (p_obj.get("role")) |r| blk: {
-                if (r == .string) break :blk r.string;
-                break :blk "participant";
-            } else "participant";
-
-            // Try to dispatch to a worker matching participant tags
-            const tags_val = p_obj.get("tags");
-            var tag_list: std.ArrayListUnmanaged([]const u8) = .empty;
-            if (tags_val) |tv| {
-                if (tv == .array) {
-                    for (tv.array.items) |tag_item| {
-                        if (tag_item == .string) {
-                            try tag_list.append(alloc, tag_item.string);
-                        }
-                    }
-                }
-            }
+    const parsed = json.parseFromSlice(json.Value, alloc, raw_value, .{}) catch return null;
+    defer parsed.deinit();
+    if (parsed.value != .string) return null;
+    return alloc.dupe(u8, parsed.value.string) catch null;
+}
 
-            // Get workers
-            const workers = try self.store.listWorkers(alloc);
-            var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty;
-            for (workers) |w| {
-                const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0;
-                try worker_infos.append(alloc, .{
-                    .id = w.id,
-                    .url = w.url,
-                    .token = w.token,
-                    .protocol = w.protocol,
-                    .model = w.model,
-                    .tags_json = w.tags_json,
-                    .max_concurrent = w.max_concurrent,
-                    .status = w.status,
-                    .current_tasks = current_tasks,
-                });
-            }
+fn resolveStoreUpdateValue(alloc: std.mem.Allocator, state_json: []const u8, value: json.Value) ![]const u8 {
+    if (value == .string and std.mem.startsWith(u8, value.string, "state.")) {
+        const raw = try state_mod.getStateValue(alloc, state_json, value.string);
+        return raw orelse try alloc.dupe(u8, "null");
+    }
+    return serializeJsonValue(alloc, value);
+}
 
-            const selected = try dispatch.selectWorker(alloc, worker_infos.items, tag_list.items);
-            if (selected) |worker| {
-                const result = try dispatch.dispatchStep(
-                    alloc,
-                    worker.url,
-                    worker.token,
-                    worker.protocol,
-                    worker.model,
-                    run_row.id,
-                    step.id,
-                    rendered_prompt,
-                );
-                if (result.success) {
-                    try self.store.insertChatMessage(run_row.id, step.id, 1, role, worker.id, result.output);
-                } else {
-                    log.warn("group_chat dispatch failed for role {s}: {s}", .{ role, result.error_text orelse "unknown" });
-                }
-            } else {
-                log.debug("no worker available for group_chat participant role {s}", .{role});
-            }
-        }
+fn putStoreValueViaHttp(
+    alloc: std.mem.Allocator,
+    base_url: []const u8,
+    api_token: ?[]const u8,
+    namespace: []const u8,
+    key: []const u8,
+    value_json: []const u8,
+) !void {
+    var client = tracker_client.TrackerClient.init(alloc, base_url, api_token);
+    const ok = try client.storePutValue(namespace, key, value_json);
+    if (!ok) return error.StoreWriteFailed;
+}
 
-        log.info("group_chat step {s} started round 1 with {d} participants", .{ step.id, participant_items.len });
+fn encodePathSegment(allocator: std.mem.Allocator, value: []const u8) ![]const u8 {
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
+    errdefer buf.deinit(allocator);
+
+    for (value) |byte| {
+        if ((byte >= 'A' and byte <= 'Z') or
+            (byte >= 'a' and byte <= 'z') or
+            (byte >= '0' and byte <= '9') or
+            byte == '-' or
+            byte == '_' or
+            byte == '.' or
+            byte == '~')
+        {
+            try buf.append(allocator, byte);
+        } else {
+            try buf.writer(allocator).print("%{X:0>2}", .{byte});
+        }
     }
 
-    // ── pollRunningGroupChatStep ─────────────────────────────────────
-    //
-    // Each tick: check current round, dispatch next round or complete.
-
-    fn pollRunningGroupChatStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Get all chat messages for this step
-        const messages = try self.store.getChatMessages(alloc, step.id);
-
-        // 2. Parse configuration
-        const max_rounds = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "max_rounds") orelse 5;
-        const exit_condition = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "exit_condition");
-
-        // 3. Parse participants to know expected count per round
-        const participants_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "participants") orelse return;
-        const parsed_participants = std.json.parseFromSlice(std.json.Value, alloc, participants_raw, .{}) catch return;
-        if (parsed_participants.value != .array) return;
-        const num_participants: i64 = @intCast(parsed_participants.value.array.items.len);
-
-        // 4. Determine current round from messages
-        var current_round: i64 = 0;
-        var current_round_count: i64 = 0;
-        for (messages) |msg| {
-            if (msg.round > current_round) {
-                current_round = msg.round;
-                current_round_count = 1;
-            } else if (msg.round == current_round) {
-                current_round_count += 1;
-            }
-        }
+    return buf.toOwnedSlice(allocator);
+}
 
-        if (current_round == 0) return; // No messages yet, wait for initial dispatch
+var test_store_write_base_url: []const u8 = "";
+var test_store_write_api_token: ?[]const u8 = null;
+var test_store_write_namespace: []const u8 = "";
+var test_store_write_key: []const u8 = "";
+var test_store_write_value_json: []const u8 = "";
 
-        // 5. Check if current round is complete (all participants responded)
-        if (current_round_count < num_participants) {
-            // Round not complete, wait
-            return;
-        }
+fn mockStoreWriter(
+    alloc: std.mem.Allocator,
+    base_url: []const u8,
+    api_token: ?[]const u8,
+    namespace: []const u8,
+    key: []const u8,
+    value_json: []const u8,
+) !void {
+    _ = alloc;
+    test_store_write_base_url = base_url;
+    test_store_write_api_token = api_token;
+    test_store_write_namespace = namespace;
+    test_store_write_key = key;
+    test_store_write_value_json = value_json;
+}
 
-        // 6. Check exit condition in latest round's messages
-        if (exit_condition) |cond| {
-            for (messages) |msg| {
-                if (msg.round == current_round) {
-                    if (std.mem.indexOf(u8, msg.message, cond) != null) {
-                        // Exit condition met — complete with transcript
-                        const transcript = try buildChatTranscript(alloc, messages);
-                        const output = try wrapOutput(alloc, transcript);
-                        try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-                        try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-                        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-                        log.info("group_chat step {s} completed (exit condition met at round {d})", .{ step.id, current_round });
-                        return;
-                    }
-                }
-            }
-        }
+/// Get worker tags from node definition.
+fn getNodeTags(alloc: std.mem.Allocator, node_json: []const u8) []const []const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return &.{};
+    if (parsed.value != .object) return &.{};
+    const tags = parsed.value.object.get("worker_tags") orelse return &.{};
+    if (tags != .array) return &.{};
 
-        // 7. Check if max rounds reached
-        if (current_round >= max_rounds) {
-            const transcript = try buildChatTranscript(alloc, messages);
-            const output = try wrapOutput(alloc, transcript);
-            try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-            log.info("group_chat step {s} completed (max rounds {d} reached)", .{ step.id, max_rounds });
-            return;
+    var result: std.ArrayListUnmanaged([]const u8) = .empty;
+    for (tags.array.items) |item| {
+        if (item == .string) {
+            result.append(alloc, item.string) catch continue;
         }
+    }
+    return result.toOwnedSlice(alloc) catch &.{};
+}
 
-        // 8. Start next round — build chat history and dispatch
-        const next_round = current_round + 1;
-        const chat_history = try buildChatTranscript(alloc, messages);
+// ── JSON / Serialization Helpers ────────────────────────────────────
 
-        const round_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "round_template") orelse {
-            // No round_template — complete with what we have
-            const output = try wrapOutput(alloc, chat_history);
-            try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-            return;
-        };
+fn serializeJsonValue(alloc: std.mem.Allocator, value: json.Value) ![]const u8 {
+    var out: std.io.Writer.Allocating = .init(alloc);
+    var jw: json.Stringify = .{ .writer = &out.writer };
+    try jw.write(value);
+    return try out.toOwnedSlice();
+}
 
-        // Dispatch to each participant with round_template
-        const participant_items = parsed_participants.value.array.items;
-        for (participant_items) |p_val| {
-            if (p_val != .object) continue;
-            const p_obj = p_val.object;
-
-            const role = if (p_obj.get("role")) |r| blk: {
-                if (r == .string) break :blk r.string;
-                break :blk "participant";
-            } else "participant";
-
-            // Render round_template with {{chat_history}} and {{role}}
-            var rendered = try std.mem.replaceOwned(u8, alloc, round_template, "{{chat_history}}", chat_history);
-            rendered = try std.mem.replaceOwned(u8, alloc, rendered, "{{role}}", role);
-
-            // Get participant tags
-            const tags_val = p_obj.get("tags");
-            var tag_list: std.ArrayListUnmanaged([]const u8) = .empty;
-            if (tags_val) |tv| {
-                if (tv == .array) {
-                    for (tv.array.items) |tag_item| {
-                        if (tag_item == .string) {
-                            try tag_list.append(alloc, tag_item.string);
-                        }
-                    }
-                }
-            }
+/// Wrap a raw output string as {"output": "..."} JSON.
+fn wrapOutput(alloc: std.mem.Allocator, output: []const u8) ![]const u8 {
+    return json.Stringify.valueAlloc(alloc, .{
+        .output = output,
+    }, .{});
+}
 
-            // Select worker and dispatch
-            const workers = try self.store.listWorkers(alloc);
-            var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty;
-            for (workers) |w| {
-                const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0;
-                try worker_infos.append(alloc, .{
-                    .id = w.id,
-                    .url = w.url,
-                    .token = w.token,
-                    .protocol = w.protocol,
-                    .model = w.model,
-                    .tags_json = w.tags_json,
-                    .max_concurrent = w.max_concurrent,
-                    .status = w.status,
-                    .current_tasks = current_tasks,
-                });
-            }
+/// Escape a string as a JSON string literal (with quotes).
+fn jsonStringify(alloc: std.mem.Allocator, s: []const u8) ![]const u8 {
+    return json.Stringify.valueAlloc(alloc, s, .{});
+}
 
-            const selected = try dispatch.selectWorker(alloc, worker_infos.items, tag_list.items);
-            if (selected) |worker| {
-                const result = try dispatch.dispatchStep(
-                    alloc,
-                    worker.url,
-                    worker.token,
-                    worker.protocol,
-                    worker.model,
-                    run_row.id,
-                    step.id,
-                    rendered,
-                );
-                if (result.success) {
-                    try self.store.insertChatMessage(run_row.id, step.id, next_round, role, worker.id, result.output);
-                } else {
-                    log.warn("group_chat round {d} dispatch failed for role {s}", .{ next_round, role });
-                }
-            } else {
-                log.debug("no worker for group_chat round {d} participant role {s}", .{ next_round, role });
-            }
-        }
+/// Resolve the state path used by a send node. `items_key` is the canonical
+/// field; `items_from` is accepted as a compatibility alias.
+fn getSendItemsPath(alloc: std.mem.Allocator, node_json: []const u8) ?[]const u8 {
+    return getNodeField(alloc, node_json, "items_key") orelse
+        getNodeField(alloc, node_json, "items_from");
+}
 
-        log.info("group_chat step {s} dispatched round {d}", .{ step.id, next_round });
+/// Build the state update payload for a task/agent node result.
+///
+/// Precedence:
+/// 1. explicit worker-provided `state_updates`
+/// 2. node `output_key` / `output_mapping`
+/// 3. legacy fallback to `{"output": "..."}`
+fn buildTaskStateUpdates(alloc: std.mem.Allocator, node_json: []const u8, output: []const u8) ![]const u8 {
+    if (extractStateUpdates(alloc, output)) |updates| {
+        return updates;
     }
 
-    // ── executeSagaStep ─────────────────────────────────────────────
-    //
-    // First tick (step is "ready"):
-    //   - Parse body array and compensations map from workflow definition
-    //   - Create first body step as child (status="ready")
-    //   - Initialize saga_state entries for all body steps
-    //   - Mark saga step as "running"
-
-    fn executeSagaStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        // 1. Parse body array from step definition
-        const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse {
-            log.warn("no body for saga step {s}", .{step.def_step_id});
-            try self.store.updateStepStatus(step.id, "failed", null, null, "missing body in saga definition", step.attempt);
-            return;
-        };
-
-        const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "invalid body JSON in saga definition", step.attempt);
-            return;
-        };
-
-        if (body_parsed.value != .array or body_parsed.value.array.items.len == 0) {
-            try self.store.updateStepStatus(step.id, "failed", null, null, "body must be a non-empty array", step.attempt);
-            return;
-        }
-
-        const body_items = body_parsed.value.array.items;
+    const output_key = getNodeField(alloc, node_json, "output_key");
+    const output_mapping_json = getNodeObjectField(alloc, node_json, "output_mapping");
+    if (output_key == null and output_mapping_json == null) {
+        return std.fmt.allocPrint(alloc, "{{\"output\":{s}}}", .{try jsonStringify(alloc, output)});
+    }
 
-        // 2. Parse compensations map (optional)
-        const comp_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "compensations");
-        var comp_map: ?std.json.ObjectMap = null;
-        if (comp_raw) |cr| {
-            const comp_parsed = std.json.parseFromSlice(std.json.Value, alloc, cr, .{}) catch null;
-            if (comp_parsed) |cp| {
-                if (cp.value == .object) {
-                    comp_map = cp.value.object;
-                }
-            }
-        }
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
 
-        // 3. Initialize saga_state for all body steps and create first child
-        for (body_items, 0..) |body_item, i| {
-            const body_def_id = switch (body_item) {
-                .string => |s| s,
-                else => continue,
-            };
+    var result = json.ObjectMap.init(arena_alloc);
+    const parsed_output = json.parseFromSlice(json.Value, arena_alloc, output, .{}) catch null;
 
-            // Look up compensation for this body step
-            var comp_def_id: ?[]const u8 = null;
-            if (comp_map) |cm| {
-                if (cm.get(body_def_id)) |cv| {
-                    if (cv == .string) {
-                        comp_def_id = cv.string;
+    if (output_key) |key| {
+        if (parsed_output) |parsed| {
+            try result.put(key, parsed.value);
+        } else {
+            try result.put(key, .{ .string = output });
+        }
+    }
+
+    if (output_mapping_json) |mapping_json| {
+        const parsed_mapping = json.parseFromSlice(json.Value, arena_alloc, mapping_json, .{}) catch null;
+        if (parsed_mapping) |mapping| {
+            if (mapping.value == .object and parsed_output != null) {
+                var it = mapping.value.object.iterator();
+                while (it.next()) |entry| {
+                    if (entry.value_ptr.* != .string) continue;
+                    const source_path = entry.value_ptr.string;
+                    const raw_val = state_mod.getStateValue(arena_alloc, output, source_path) catch null;
+                    if (raw_val) |value_json| {
+                        const parsed_value = json.parseFromSlice(json.Value, arena_alloc, value_json, .{}) catch continue;
+                        try result.put(entry.key_ptr.*, parsed_value.value);
                     }
                 }
             }
-
-            // Insert saga_state entry
-            try self.store.insertSagaState(run_row.id, step.id, body_def_id, comp_def_id);
-
-            // Create child step for first body step only (rest created sequentially)
-            if (i == 0) {
-                const body_step_type = try getStepField(alloc, run_row.workflow_json, body_def_id, "type") orelse "task";
-                const child_id_buf = ids.generateId();
-                const child_id = try alloc.dupe(u8, &child_id_buf);
-
-                try self.store.insertStep(
-                    child_id,
-                    run_row.id,
-                    body_def_id,
-                    body_step_type,
-                    "ready",
-                    step.input_json,
-                    step.max_attempts,
-                    step.timeout_ms,
-                    step.id, // parent_step_id
-                    0, // item_index
-                );
-                log.info("saga step {s} created first body child {s} (def: {s})", .{ step.id, child_id, body_def_id });
-            }
         }
-
-        // 4. Mark saga step as "running"
-        try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.running", "{}");
-        log.info("saga step {s} started with {d} body steps", .{ step.id, body_items.len });
     }
 
-    // ── pollRunningSagaStep ──────────────────────────────────────────
-    //
-    // Each tick:
-    //   - Get saga_state entries to understand progress
-    //   - Find current body step child and check its status
-    //   - If completed: update saga_state, create next body step
-    //   - If all body steps completed: mark saga completed
-    //   - If body step failed: enter compensation mode
-    //   - Track compensation progress
-
-    fn pollRunningSagaStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void {
-        const children = try self.store.getChildSteps(alloc, step.id);
-        if (children.len == 0) return;
-
-        const saga_states = try self.store.getSagaStates(alloc, run_row.id, step.id);
-        if (saga_states.len == 0) return;
-
-        // Parse body array to know the order
-        const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse return;
-        const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch return;
-        if (body_parsed.value != .array) return;
-        const body_items = body_parsed.value.array.items;
-
-        // Build body def IDs list in order
-        var body_def_ids: std.ArrayListUnmanaged([]const u8) = .empty;
-        for (body_items) |bi| {
-            if (bi == .string) {
-                try body_def_ids.append(alloc, bi.string);
-            }
-        }
-
-        // Check if we're in compensation mode (any saga_state has status "compensating")
-        var in_compensation = false;
-        for (saga_states) |ss| {
-            if (std.mem.eql(u8, ss.status, "compensating")) {
-                in_compensation = true;
-                break;
-            }
-        }
-
-        if (in_compensation) {
-            // In compensation mode: check if current compensation child is done
-            try self.pollSagaCompensation(alloc, run_row, step, children, saga_states, body_def_ids.items);
-            return;
-        }
-
-        // Forward mode: check the current body step child
-        // Find which body step we're on by looking at saga_states
-        var current_body_idx: ?usize = null;
-        var failed_body_def_id: ?[]const u8 = null;
-
-        for (saga_states, 0..) |ss, i| {
-            if (std.mem.eql(u8, ss.status, "pending")) {
-                // This is the next body step to process or the current one
-                // Check if there's a child for this body step
-                var has_child = false;
-                for (children) |child| {
-                    if (std.mem.eql(u8, child.def_step_id, ss.body_step_id)) {
-                        has_child = true;
-                        if (std.mem.eql(u8, child.status, "completed")) {
-                            // Body step completed — update saga_state
-                            try self.store.updateSagaState(run_row.id, step.id, ss.body_step_id, "completed");
-                            log.info("saga body step {s} completed", .{ss.body_step_id});
-                            // Create next body step if there is one
-                            if (i + 1 < saga_states.len) {
-                                const next_def_id = saga_states[i + 1].body_step_id;
-                                const next_type = try getStepField(alloc, run_row.workflow_json, next_def_id, "type") orelse "task";
-                                const next_id_buf = ids.generateId();
-                                const next_id = try alloc.dupe(u8, &next_id_buf);
-                                const next_idx: i64 = @intCast(i + 1);
-
-                                try self.store.insertStep(
-                                    next_id,
-                                    run_row.id,
-                                    next_def_id,
-                                    next_type,
-                                    "ready",
-                                    step.input_json,
-                                    step.max_attempts,
-                                    step.timeout_ms,
-                                    step.id,
-                                    next_idx,
-                                );
-                                log.info("saga step {s} created body child {s} (def: {s})", .{ step.id, next_id, next_def_id });
-                            }
-                            // Don't process further this tick
-                            return;
-                        } else if (std.mem.eql(u8, child.status, "failed")) {
-                            // Body step failed — enter compensation mode
-                            failed_body_def_id = ss.body_step_id;
-                            current_body_idx = i;
-                            break;
-                        }
-                        // Still running/ready — wait
-                        return;
-                    }
-                }
-                if (!has_child) {
-                    // First pending step without a child — this shouldn't happen normally
-                    // since executeSagaStep creates the first and we create subsequent ones
-                    return;
-                }
-                break;
-            }
-        }
-
-        // Check if ALL body steps are completed
-        var all_completed = true;
-        for (saga_states) |ss| {
-            if (!std.mem.eql(u8, ss.status, "completed")) {
-                all_completed = false;
-                break;
-            }
-        }
-
-        if (all_completed) {
-            // Saga completed successfully — output is last body step's output
-            var last_output: ?[]const u8 = null;
-            for (children) |child| {
-                if (std.mem.eql(u8, child.status, "completed") and child.output_json != null) {
-                    // Check if this child is the last body step
-                    if (body_def_ids.items.len > 0 and
-                        std.mem.eql(u8, child.def_step_id, body_def_ids.items[body_def_ids.items.len - 1]))
-                    {
-                        last_output = child.output_json;
-                    }
-                }
-            }
-            const output = last_output orelse try wrapOutput(alloc, "saga completed");
-            try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.completed", output);
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics);
-            log.info("saga step {s} completed successfully", .{step.id});
-            return;
-        }
-
-        // Check if compensation has fully completed (all compensating states
-        // have become "compensated" and at least one is "failed")
-        {
-            var has_failed_state = false;
-            var has_unfinished_compensation = false;
-            for (saga_states) |ss| {
-                if (std.mem.eql(u8, ss.status, "failed")) {
-                    has_failed_state = true;
-                } else if (std.mem.eql(u8, ss.status, "compensating")) {
-                    has_unfinished_compensation = true;
-                }
-            }
-            if (has_failed_state and !has_unfinished_compensation) {
-                try self.finishSagaCompensation(alloc, run_row, step, saga_states);
-                return;
-            }
-        }
-
-        // If a body step failed, start compensation
-        if (failed_body_def_id) |failed_def| {
-            log.info("saga step {s} body step {s} failed, starting compensation", .{ step.id, failed_def });
-
-            // Mark the failed body step in saga_state
-            try self.store.updateSagaState(run_row.id, step.id, failed_def, "failed");
-
-            // Find completed body steps and start compensating in reverse
-            // Mark all completed body steps as "compensating"
-            var completed_steps: std.ArrayListUnmanaged([]const u8) = .empty;
-            for (saga_states) |ss| {
-                if (std.mem.eql(u8, ss.status, "completed")) {
-                    try completed_steps.append(alloc, ss.body_step_id);
-                    try self.store.updateSagaState(run_row.id, step.id, ss.body_step_id, "compensating");
-                }
-            }
-
-            if (completed_steps.items.len == 0) {
-                // No completed steps to compensate — saga fails immediately
-                const output = try std.fmt.allocPrint(alloc, "{{\"failed_at\":\"{s}\",\"compensated\":[]}}", .{failed_def});
-                try self.store.updateStepStatus(step.id, "failed", null, output, null, step.attempt);
-                try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-                callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-                log.info("saga step {s} failed at {s}, no compensations needed", .{ step.id, failed_def });
-                return;
-            }
-
-            // Create the last completed step's compensation child (reverse order)
-            // Start from the last completed body step
-            const last_completed = completed_steps.items[completed_steps.items.len - 1];
-            try self.createCompensationChild(alloc, run_row, step, saga_states, last_completed);
-        }
-    }
-
-    /// Create a compensation child step for a given body step.
-    fn createCompensationChild(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, saga_step: types.StepRow, saga_states: []const types.SagaStateRow, body_def_id: []const u8) !void {
-        // Find the compensation def_id for this body step
-        var comp_def_id: ?[]const u8 = null;
-        for (saga_states) |ss| {
-            if (std.mem.eql(u8, ss.body_step_id, body_def_id)) {
-                comp_def_id = ss.compensation_step_id;
-                break;
-            }
-        }
-
-        if (comp_def_id == null) {
-            // No compensation for this step — mark as compensated immediately
-            try self.store.updateSagaState(run_row.id, saga_step.id, body_def_id, "compensated");
-            log.info("saga body step {s} has no compensation, marking compensated", .{body_def_id});
-            return;
-        }
-
-        const comp_type = try getStepField(alloc, run_row.workflow_json, comp_def_id.?, "type") orelse "task";
-        const comp_child_id_buf = ids.generateId();
-        const comp_child_id = try alloc.dupe(u8, &comp_child_id_buf);
-
-        try self.store.insertStep(
-            comp_child_id,
-            run_row.id,
-            comp_def_id.?,
-            comp_type,
-            "ready",
-            "{}",
-            1, // max_attempts
-            null, // timeout_ms
-            saga_step.id, // parent_step_id
-            null, // item_index
-        );
-        log.info("saga step {s} created compensation child {s} for body {s}", .{ saga_step.id, comp_child_id, body_def_id });
-    }
-
-    /// Poll compensation progress in a saga step.
-    fn pollSagaCompensation(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, children: []const types.StepRow, saga_states: []const types.SagaStateRow, body_def_ids: []const []const u8) !void {
-        // Find the body step currently being compensated (has a running/ready compensation child)
-        // Work backwards through body_def_ids to find the current compensating step
-        var compensating_body: ?[]const u8 = null;
-        var compensating_idx: ?usize = null;
-
-        // Find compensating steps in reverse order (last completed first)
-        var i: usize = body_def_ids.len;
-        while (i > 0) {
-            i -= 1;
-            for (saga_states) |ss| {
-                if (std.mem.eql(u8, ss.body_step_id, body_def_ids[i]) and
-                    std.mem.eql(u8, ss.status, "compensating"))
-                {
-                    compensating_body = body_def_ids[i];
-                    compensating_idx = i;
-                    break;
-                }
-            }
-            if (compensating_body != null) break;
-        }
-
-        if (compensating_body == null) {
-            // All compensations done — build failure output and fail saga
-            try self.finishSagaCompensation(alloc, run_row, step, saga_states);
-            return;
-        }
-
-        // Check if there's a compensation child for this body step
-        var comp_def_id: ?[]const u8 = null;
-        for (saga_states) |ss| {
-            if (std.mem.eql(u8, ss.body_step_id, compensating_body.?)) {
-                comp_def_id = ss.compensation_step_id;
-                break;
-            }
-        }
-
-        if (comp_def_id == null) {
-            // No compensation defined — mark as compensated and move on
-            try self.store.updateSagaState(run_row.id, step.id, compensating_body.?, "compensated");
-            return;
-        }
-
-        // Find the compensation child step
-        var comp_child: ?types.StepRow = null;
-        for (children) |child| {
-            if (std.mem.eql(u8, child.def_step_id, comp_def_id.?)) {
-                comp_child = child;
-            }
-        }
-
-        if (comp_child == null) {
-            // Compensation child not created yet — create it
-            try self.createCompensationChild(alloc, run_row, step, saga_states, compensating_body.?);
-            return;
-        }
-
-        const comp = comp_child.?;
-        if (std.mem.eql(u8, comp.status, "completed")) {
-            // Compensation completed — mark this body step as compensated
-            try self.store.updateSagaState(run_row.id, step.id, compensating_body.?, "compensated");
-            log.info("saga compensation for body step {s} completed", .{compensating_body.?});
-
-            // Find next compensating step (earlier in the list)
-            if (compensating_idx.? > 0) {
-                var next_idx: ?usize = null;
-                var j: usize = compensating_idx.?;
-                while (j > 0) {
-                    j -= 1;
-                    for (saga_states) |ss| {
-                        if (std.mem.eql(u8, ss.body_step_id, body_def_ids[j]) and
-                            std.mem.eql(u8, ss.status, "compensating"))
-                        {
-                            next_idx = j;
-                            break;
-                        }
-                    }
-                    if (next_idx != null) break;
-                }
-
-                // Check if any compensating steps remain. We may have already
-                // updated some to compensated in previous iterations, so re-check.
-                // The next tick will pick them up via pollSagaCompensation.
-            }
-        } else if (std.mem.eql(u8, comp.status, "failed")) {
-            // Compensation itself failed — saga fails with compensation error
-            const err_msg = try std.fmt.allocPrint(alloc, "compensation step {s} failed", .{comp_def_id.?});
-            try self.store.updateStepStatus(step.id, "failed", null, null, err_msg, step.attempt);
-            try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-            callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-            log.info("saga step {s} failed during compensation", .{step.id});
-        }
-        // Otherwise compensation child still running/ready — wait
-    }
-
-    /// Finish saga compensation and mark saga as failed with output.
-    fn finishSagaCompensation(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, saga_states: []const types.SagaStateRow) !void {
-        // Build list of compensated steps and find failed_at step
-        var failed_at: []const u8 = "unknown";
-        var compensated: std.ArrayListUnmanaged([]const u8) = .empty;
-
-        for (saga_states) |ss| {
-            if (std.mem.eql(u8, ss.status, "failed")) {
-                failed_at = ss.body_step_id;
-            } else if (std.mem.eql(u8, ss.status, "compensated")) {
-                try compensated.append(alloc, ss.body_step_id);
-            }
-        }
-
-        // Build output JSON
-        var comp_json: std.ArrayListUnmanaged(u8) = .empty;
-        try comp_json.append(alloc, '[');
-        for (compensated.items, 0..) |c, ci| {
-            if (ci > 0) try comp_json.append(alloc, ',');
-            try comp_json.append(alloc, '"');
-            try comp_json.appendSlice(alloc, c);
-            try comp_json.append(alloc, '"');
-        }
-        try comp_json.append(alloc, ']');
-        const comp_str = try comp_json.toOwnedSlice(alloc);
-
-        const output = try std.fmt.allocPrint(alloc, "{{\"failed_at\":\"{s}\",\"compensated\":{s}}}", .{ failed_at, comp_str });
-
-        try self.store.updateStepStatus(step.id, "failed", null, output, null, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.failed", output);
-        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, output, self.metrics);
-        log.info("saga step {s} failed at {s}, compensated {d} steps", .{ step.id, failed_at, compensated.items.len });
-    }
-
-    // ── handleCycleBack ─────────────────────────────────────────────
-    //
-    // When a condition/router routes to an already-completed step,
-    // detect the cycle and create new step instances for the cycle body.
-
-    fn handleCycleBack(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, routing_step: types.StepRow, target_def_id: []const u8, all_steps: []const types.StepRow) !bool {
-        // 1. Check if target step is already completed/skipped
-        var target_completed = false;
-        for (all_steps) |s| {
-            if (std.mem.eql(u8, s.def_step_id, target_def_id) and
-                (std.mem.eql(u8, s.status, "completed") or std.mem.eql(u8, s.status, "skipped")))
-            {
-                target_completed = true;
-                break;
-            }
-        }
-
-        if (!target_completed) return false; // Not a backward edge
-
-        // 2. Build cycle_key from routing step's def_step_id
-        const cycle_key = try std.fmt.allocPrint(alloc, "cycle_{s}", .{routing_step.def_step_id});
-
-        // 3. Get or initialize cycle state
-        const cycle_state = try self.store.getCycleState(run_row.id, cycle_key);
-        var iteration_count: i64 = 0;
-        var max_iterations: i64 = 10;
-
-        if (cycle_state) |cs| {
-            iteration_count = cs.iteration_count;
-            max_iterations = cs.max_iterations;
-        }
-
-        // Check max_cycle_iterations from workflow config
-        const wf_max = try getStepFieldInt(alloc, run_row.workflow_json, routing_step.def_step_id, "max_cycle_iterations");
-        if (wf_max) |m| {
-            max_iterations = m;
-        }
-
-        // 4. Check if limit exceeded
-        if (iteration_count >= max_iterations) {
-            const err_msg = try std.fmt.allocPrint(alloc, "cycle iteration limit ({d}) exceeded for {s}", .{ max_iterations, cycle_key });
-            try self.store.updateStepStatus(routing_step.id, "failed", null, null, err_msg, routing_step.attempt);
-            try self.store.insertEvent(run_row.id, routing_step.id, "step.failed", "{}");
-            try self.store.updateRunStatus(run_row.id, "failed", err_msg);
-            log.warn("cycle limit exceeded for {s}", .{cycle_key});
-            return true;
-        }
-
-        // 5. Increment cycle iteration
-        iteration_count += 1;
-        try self.store.upsertCycleState(run_row.id, cycle_key, iteration_count, max_iterations);
-
-        // 6. Walk workflow_json steps to find the cycle body
-        //    (from target_def_id through routing step's def_step_id)
-        const parsed = std.json.parseFromSlice(std.json.Value, alloc, run_row.workflow_json, .{}) catch return false;
-        if (parsed.value != .object) return false;
-        const steps_val = parsed.value.object.get("steps") orelse return false;
-        if (steps_val != .array) return false;
-
-        // Build ordered list of step def IDs and their types + depends_on
-        const StepInfo = struct {
-            def_id: []const u8,
-            step_type: []const u8,
-            depends_on: []const []const u8,
-        };
-
-        var step_infos: std.ArrayListUnmanaged(StepInfo) = .empty;
-        for (steps_val.array.items) |step_val| {
-            if (step_val != .object) continue;
-            const step_obj = step_val.object;
-            const id_val = step_obj.get("id") orelse continue;
-            if (id_val != .string) continue;
-
-            const stype = if (step_obj.get("type")) |t| blk: {
-                if (t == .string) break :blk t.string;
-                break :blk "task";
-            } else "task";
-
-            var deps_list: std.ArrayListUnmanaged([]const u8) = .empty;
-            if (step_obj.get("depends_on")) |deps_val| {
-                if (deps_val == .array) {
-                    for (deps_val.array.items) |dep_item| {
-                        if (dep_item == .string) {
-                            try deps_list.append(alloc, dep_item.string);
-                        }
-                    }
-                }
-            }
-
-            try step_infos.append(alloc, .{
-                .def_id = id_val.string,
-                .step_type = stype,
-                .depends_on = try deps_list.toOwnedSlice(alloc),
-            });
-        }
-
-        // Find indices of target and routing step in the workflow
-        var target_idx: ?usize = null;
-        var routing_idx: ?usize = null;
-        for (step_infos.items, 0..) |si, idx| {
-            if (std.mem.eql(u8, si.def_id, target_def_id)) target_idx = idx;
-            if (std.mem.eql(u8, si.def_id, routing_step.def_step_id)) routing_idx = idx;
-        }
-
-        if (target_idx == null or routing_idx == null) return false;
-        if (target_idx.? >= routing_idx.?) return false; // Not a backward edge
-
-        // 7. Create new step instances for target through routing step
-        var new_step_ids: std.ArrayListUnmanaged([]const u8) = .empty;
-        var new_def_ids: std.ArrayListUnmanaged([]const u8) = .empty;
-
-        var idx: usize = target_idx.?;
-        while (idx <= routing_idx.?) : (idx += 1) {
-            const si = step_infos.items[idx];
-            const new_id_buf = ids.generateId();
-            const new_id = try alloc.dupe(u8, &new_id_buf);
-
-            // First step in cycle is "ready", rest are "pending"
-            const initial_status: []const u8 = if (idx == target_idx.?) "ready" else "pending";
-
-            try self.store.insertStepWithIteration(
-                new_id,
-                run_row.id,
-                si.def_id,
-                si.step_type,
-                initial_status,
-                "{}",
-                1,
-                null,
-                null,
-                null,
-                iteration_count,
-            );
-
-            try new_step_ids.append(alloc, new_id);
-            try new_def_ids.append(alloc, si.def_id);
-        }
-
-        // 8. Chain new instances with deps among themselves
-        for (step_infos.items[target_idx.? .. routing_idx.? + 1], 0..) |si, si_idx| {
-            const new_id = new_step_ids.items[si_idx];
-            for (si.depends_on) |dep_def_id| {
-                // Check if dep is within the cycle body
-                const dep_new_id = lookupId(new_def_ids.items, new_step_ids.items, dep_def_id);
-                if (dep_new_id) |did| {
-                    try self.store.insertStepDep(new_id, did);
-                }
-            }
-        }
-
-        // 9. For any step outside the cycle that depended on the routing step,
-        //    add a dep to the new routing step instance
-        const new_routing_id = new_step_ids.items[new_step_ids.items.len - 1];
-        for (all_steps) |s| {
-            // Skip steps inside the cycle body
-            var in_cycle = false;
-            for (new_def_ids.items) |cd| {
-                if (std.mem.eql(u8, s.def_step_id, cd)) {
-                    in_cycle = true;
-                    break;
-                }
-            }
-            if (in_cycle) continue;
-
-            // Check if this step depends on the old routing step
-            const deps = try self.store.getStepDeps(alloc, s.id);
-            for (deps) |dep_id| {
-                if (std.mem.eql(u8, dep_id, routing_step.id)) {
-                    // Add new dep to the new routing step instance
-                    try self.store.insertStepDep(s.id, new_routing_id);
-                    break;
-                }
-            }
-        }
-
-        // 10. Mark the routing step as completed (the current instance)
-        const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"cycle_back\",\"target\":\"{s}\",\"iteration\":{d}}}", .{ target_def_id, iteration_count });
-        try self.store.updateStepStatus(routing_step.id, "completed", null, output, null, routing_step.attempt);
-        try self.store.insertEvent(run_row.id, routing_step.id, "step.completed", output);
-        log.info("cycle back from {s} to {s} (iteration {d})", .{ routing_step.def_step_id, target_def_id, iteration_count });
-
-        return true;
-    }
-
-    // ── checkRunCompletion ───────────────────────────────────────────
-
-    fn checkRunCompletion(self: *Engine, run_id: []const u8, alloc: std.mem.Allocator) !void {
-        const steps = try self.store.getStepsByRun(alloc, run_id);
-        var all_terminal = true;
-        var any_failed = false;
-        for (steps) |step| {
-            if (std.mem.eql(u8, step.status, "completed") or std.mem.eql(u8, step.status, "skipped")) continue;
-            if (std.mem.eql(u8, step.status, "failed")) {
-                any_failed = true;
-                continue;
-            }
-            if (std.mem.eql(u8, step.status, "waiting_approval")) {
-                all_terminal = false;
-                continue;
-            }
-            all_terminal = false; // pending, ready, running
-        }
-        if (all_terminal and !any_failed) {
-            try self.store.updateRunStatus(run_id, "completed", null);
-            try self.store.insertEvent(run_id, null, "run.completed", "{}");
-            // Fire run.completed callbacks
-            if (try self.store.getRun(alloc, run_id)) |run_row| {
-                callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.completed", run_id, null, "{}", self.metrics);
-            }
-            log.info("run {s} completed", .{run_id});
-        } else if (all_terminal and any_failed) {
-            try self.store.updateRunStatus(run_id, "failed", "one or more steps failed");
-            try self.store.insertEvent(run_id, null, "run.failed", "{}");
-            // Fire run.failed callbacks
-            if (try self.store.getRun(alloc, run_id)) |run_row| {
-                callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_id, null, "{}", self.metrics);
-            }
-            log.info("run {s} failed", .{run_id});
-        }
-    }
-
-    // ── Helpers ──────────────────────────────────────────────────────
-
-    fn skipStepByDefId(self: *Engine, alloc: std.mem.Allocator, all_steps: []const types.StepRow, run_id: []const u8, target_def_id: []const u8) !void {
-        for (all_steps) |s| {
-            if (std.mem.eql(u8, s.def_step_id, target_def_id)) {
-                try self.store.updateStepStatus(s.id, "skipped", null, null, null, s.attempt);
-                try self.store.insertEvent(run_id, s.id, "step.skipped", "{}");
-                log.info("skipped step {s} (def: {s})", .{ s.id, target_def_id });
-                break;
-            }
-        }
-        _ = alloc;
-    }
-
-    fn failStepWithError(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, err_text: []const u8) !void {
-        try self.store.updateStepStatus(step.id, "failed", null, null, err_text, step.attempt);
-        try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}");
-        callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics);
-    }
-};
-
-fn computeRetryDelayMs(cfg: RuntimeConfig, step: types.StepRow, now_ms: i64) i64 {
-    var delay = cfg.retry_base_delay_ms;
-    var remaining_exp = step.attempt - 1;
-    while (remaining_exp > 0) : (remaining_exp -= 1) {
-        if (delay >= cfg.retry_max_delay_ms) break;
-        const doubled = delay * 2;
-        delay = if (doubled > cfg.retry_max_delay_ms) cfg.retry_max_delay_ms else doubled;
-    }
-
-    const jitter_cap = if (cfg.retry_jitter_ms > 0) cfg.retry_jitter_ms else 0;
-    var jitter: i64 = 0;
-    if (jitter_cap > 0) {
-        const seed = std.hash.Wyhash.hash(0, step.id);
-        const mixed = seed ^ @as(u64, @intCast(now_ms));
-        jitter = @as(i64, @intCast(mixed % @as(u64, @intCast(jitter_cap + 1))));
-    }
-    return delay + jitter;
-}
-
-// ── Free functions (workflow JSON helpers) ────────────────────────────
-
-/// Parse workflow_json to find a step definition by def_step_id and return a string field.
-fn getStepField(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8, field: []const u8) !?[]const u8 {
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch {
-        return null;
-    };
-    // Note: do not deinit here — the alloc is an arena
-
-    const root = parsed.value;
-    if (root != .object) return null;
-
-    const steps_val = root.object.get("steps") orelse return null;
-    if (steps_val != .array) return null;
-
-    for (steps_val.array.items) |step_val| {
-        if (step_val != .object) continue;
-        const step_obj = step_val.object;
-
-        const id_val = step_obj.get("id") orelse continue;
-        if (id_val != .string) continue;
-        if (!std.mem.eql(u8, id_val.string, def_step_id)) continue;
-
-        const field_val = step_obj.get(field) orelse return null;
-        if (field_val == .string) {
-            return try alloc.dupe(u8, field_val.string);
-        }
-        return null;
-    }
-    return null;
-}
-
-/// Parse workflow_json to find a step definition by def_step_id and return a field as raw JSON.
-/// Unlike getStepField which only returns strings, this serializes any JSON value type.
-fn getStepFieldRaw(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8, field: []const u8) !?[]const u8 {
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch {
-        return null;
-    };
-
-    const root = parsed.value;
-    if (root != .object) return null;
-
-    const steps_val = root.object.get("steps") orelse return null;
-    if (steps_val != .array) return null;
-
-    for (steps_val.array.items) |step_val| {
-        if (step_val != .object) continue;
-        const step_obj = step_val.object;
-
-        const id_val = step_obj.get("id") orelse continue;
-        if (id_val != .string) continue;
-        if (!std.mem.eql(u8, id_val.string, def_step_id)) continue;
-
-        const field_val = step_obj.get(field) orelse return null;
-        if (field_val == .string) {
-            return try alloc.dupe(u8, field_val.string);
-        }
-        // Serialize non-string values as JSON
-        var out: std.io.Writer.Allocating = .init(alloc);
-        var jw: std.json.Stringify = .{ .writer = &out.writer };
-        jw.write(field_val) catch return null;
-        return out.toOwnedSlice() catch return null;
-    }
-    return null;
-}
-
-/// Parse workflow_json to find a step definition by def_step_id and return an integer field.
-fn getStepFieldInt(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8, field: []const u8) !?i64 {
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch {
-        return null;
-    };
-
-    const root = parsed.value;
-    if (root != .object) return null;
-
-    const steps_val = root.object.get("steps") orelse return null;
-    if (steps_val != .array) return null;
-
-    for (steps_val.array.items) |step_val| {
-        if (step_val != .object) continue;
-        const step_obj = step_val.object;
-
-        const id_val = step_obj.get("id") orelse continue;
-        if (id_val != .string) continue;
-        if (!std.mem.eql(u8, id_val.string, def_step_id)) continue;
-
-        const field_val = step_obj.get(field) orelse return null;
-        if (field_val == .integer) return field_val.integer;
-        return null;
-    }
-    return null;
-}
-
-/// Parse workflow_json to find a step definition and get its worker_tags.
-fn getStepTags(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8) ![]const []const u8 {
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch {
-        return &.{};
-    };
-
-    const root = parsed.value;
-    if (root != .object) return &.{};
-
-    const steps_val = root.object.get("steps") orelse return &.{};
-    if (steps_val != .array) return &.{};
-
-    for (steps_val.array.items) |step_val| {
-        if (step_val != .object) continue;
-        const step_obj = step_val.object;
-
-        const id_val = step_obj.get("id") orelse continue;
-        if (id_val != .string) continue;
-        if (!std.mem.eql(u8, id_val.string, def_step_id)) continue;
-
-        const tags_val = step_obj.get("worker_tags") orelse return &.{};
-        if (tags_val != .array) return &.{};
-
-        var tags: std.ArrayListUnmanaged([]const u8) = .empty;
-        for (tags_val.array.items) |tag_item| {
-            if (tag_item == .string) {
-                try tags.append(alloc, try alloc.dupe(u8, tag_item.string));
-            }
-        }
-        return tags.toOwnedSlice(alloc);
-    }
-    return &.{};
-}
-
-/// Build a template Context from a run's input and completed step outputs.
-fn buildTemplateContext(alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, store: *Store) !templates.Context {
-    // Get all steps for this run to collect outputs
-    const all_steps = try store.getStepsByRun(alloc, run_row.id);
-
-    var step_outputs: std.ArrayListUnmanaged(templates.Context.StepOutput) = .empty;
-    for (all_steps) |s| {
-        if (std.mem.eql(u8, s.status, "completed")) {
-            // Check if this step has children (fan_out/map)
-            if (std.mem.eql(u8, s.type, "fan_out") or std.mem.eql(u8, s.type, "map")) {
-                // Collect child outputs
-                const children = try store.getChildSteps(alloc, s.id);
-                var child_outputs: std.ArrayListUnmanaged([]const u8) = .empty;
-                for (children) |child| {
-                    if (child.output_json) |oj| {
-                        const extracted = extractOutputField(alloc, oj) catch oj;
-                        try child_outputs.append(alloc, extracted);
-                    }
-                }
-                try step_outputs.append(alloc, .{
-                    .step_id = s.def_step_id,
-                    .output = null,
-                    .outputs = child_outputs.items,
-                });
-            } else {
-                // Regular step — single output
-                const output = if (s.output_json) |oj|
-                    (extractOutputField(alloc, oj) catch oj)
-                else
-                    null;
-                try step_outputs.append(alloc, .{
-                    .step_id = s.def_step_id,
-                    .output = output,
-                    .outputs = null,
-                });
-            }
-        }
-    }
-
-    // Determine item context (for map child steps)
-    const item: ?[]const u8 = if (step.parent_step_id != null) blk: {
-        // This is a child step of a map/fan_out — extract item from input_json
-        break :blk extractItemFromInput(alloc, step.input_json) catch null;
-    } else null;
-
-    return templates.Context{
-        .input_json = run_row.input_json,
-        .step_outputs = step_outputs.items,
-        .item = item,
-    };
-}
-
-/// Look up a generated ID by definition ID from parallel arrays.
-fn lookupId(def_ids: []const []const u8, gen_ids: []const []const u8, target: []const u8) ?[]const u8 {
-    for (def_ids, 0..) |did, i| {
-        if (std.mem.eql(u8, did, target)) return gen_ids[i];
-    }
-    return null;
-}
-
-/// Find a step's status by ID from a list of steps.
-fn findStepStatus(steps: []const types.StepRow, step_id: []const u8) ?[]const u8 {
-    for (steps) |s| {
-        if (std.mem.eql(u8, s.id, step_id)) return s.status;
-    }
-    return null;
-}
-
-/// Find a step's def_step_id by step ID from a list of steps.
-fn findStepDefId(steps: []const types.StepRow, step_id: []const u8) ?[]const u8 {
-    for (steps) |s| {
-        if (std.mem.eql(u8, s.id, step_id)) return s.def_step_id;
-    }
-    return null;
-}
-
-/// Find a step's output_json by step ID from a list of steps.
-fn findStepOutput(steps: []const types.StepRow, step_id: []const u8) ?[]const u8 {
-    for (steps) |s| {
-        if (std.mem.eql(u8, s.id, step_id)) {
-            if (s.output_json) |oj| {
-                return oj;
-            }
-            return null;
-        }
-    }
-    return null;
-}
-
-/// Wrap a raw output string in a JSON object: {"output": "..."}
-fn wrapOutput(alloc: std.mem.Allocator, output: []const u8) ![]const u8 {
-    // Use JSON serializer for proper escaping
-    var out: std.ArrayListUnmanaged(u8) = .empty;
-    try out.appendSlice(alloc, "{\"output\":");
-
-    // JSON-encode the output string
-    try out.append(alloc, '"');
-    for (output) |ch| {
-        switch (ch) {
-            '"' => try out.appendSlice(alloc, "\\\""),
-            '\\' => try out.appendSlice(alloc, "\\\\"),
-            '\n' => try out.appendSlice(alloc, "\\n"),
-            '\r' => try out.appendSlice(alloc, "\\r"),
-            '\t' => try out.appendSlice(alloc, "\\t"),
-            else => try out.append(alloc, ch),
-        }
-    }
-    try out.append(alloc, '"');
-    try out.append(alloc, '}');
-    return try out.toOwnedSlice(alloc);
-}
-
-/// Wrap an item value in a JSON object: {"item": "..."}
-fn wrapItemJson(alloc: std.mem.Allocator, item: []const u8) ![]const u8 {
-    var out: std.ArrayListUnmanaged(u8) = .empty;
-    try out.appendSlice(alloc, "{\"item\":");
-
-    try out.append(alloc, '"');
-    for (item) |ch| {
-        switch (ch) {
-            '"' => try out.appendSlice(alloc, "\\\""),
-            '\\' => try out.appendSlice(alloc, "\\\\"),
-            '\n' => try out.appendSlice(alloc, "\\n"),
-            '\r' => try out.appendSlice(alloc, "\\r"),
-            '\t' => try out.appendSlice(alloc, "\\t"),
-            else => try out.append(alloc, ch),
-        }
-    }
-    try out.append(alloc, '"');
-    try out.append(alloc, '}');
-    return try out.toOwnedSlice(alloc);
-}
-
-/// Extract the "output" field from a JSON string like {"output": "..."}.
-fn extractOutputField(alloc: std.mem.Allocator, json_str: []const u8) ![]const u8 {
-    const parsed = try std.json.parseFromSlice(std.json.Value, alloc, json_str, .{});
-    const root = parsed.value;
-    if (root != .object) return json_str;
-    const output_val = root.object.get("output") orelse return json_str;
-    if (output_val == .string) return try alloc.dupe(u8, output_val.string);
-    return json_str;
-}
-
-/// Extract an array of strings from a JSON field.
-fn extractJsonArray(alloc: std.mem.Allocator, json_str: []const u8, field_name: []const u8) !?[][]const u8 {
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, json_str, .{}) catch {
-        return null;
-    };
-    const root = parsed.value;
-    if (root != .object) return null;
-
-    const arr_val = root.object.get(field_name) orelse return null;
-    if (arr_val != .array) return null;
-
-    var items: std.ArrayListUnmanaged([]const u8) = .empty;
-    for (arr_val.array.items) |item| {
-        switch (item) {
-            .string => |s| try items.append(alloc, try alloc.dupe(u8, s)),
-            else => {
-                // Serialize non-string values as JSON
-                var json_out: std.io.Writer.Allocating = .init(alloc);
-                var jw: std.json.Stringify = .{ .writer = &json_out.writer };
-                jw.write(item) catch continue;
-                const slice = json_out.toOwnedSlice() catch continue;
-                try items.append(alloc, slice);
-            },
-        }
-    }
-    const result = try items.toOwnedSlice(alloc);
-    return result;
-}
-
-/// Serialize an array of strings to a JSON array string.
-fn serializeStringArray(alloc: std.mem.Allocator, items: []const []const u8) ![]const u8 {
-    var buf: std.ArrayListUnmanaged(u8) = .empty;
-    try buf.append(alloc, '[');
-    for (items, 0..) |item, i| {
-        if (i > 0) try buf.append(alloc, ',');
-        try buf.append(alloc, '"');
-        for (item) |ch| {
-            switch (ch) {
-                '"' => try buf.appendSlice(alloc, "\\\""),
-                '\\' => try buf.appendSlice(alloc, "\\\\"),
-                '\n' => try buf.appendSlice(alloc, "\\n"),
-                '\r' => try buf.appendSlice(alloc, "\\r"),
-                '\t' => try buf.appendSlice(alloc, "\\t"),
-                else => try buf.append(alloc, ch),
-            }
-        }
-        try buf.append(alloc, '"');
-    }
-    try buf.append(alloc, ']');
-    return try buf.toOwnedSlice(alloc);
-}
-
-/// Parsed handoff target information.
-const HandoffTarget = struct {
-    tags: []const []const u8,
-    tags_str: []const u8,
-    message: ?[]const u8,
-};
-
-/// Extract handoff_to target from a worker output string.
-/// Worker output may be raw text or JSON like: {"output": "...", "handoff_to": {"tags": [...], "message": "..."}}
-fn extractHandoffTarget(alloc: std.mem.Allocator, output: []const u8) ?HandoffTarget {
-    // Try to parse the output as JSON
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, output, .{}) catch return null;
-    const root = parsed.value;
-    if (root != .object) return null;
-
-    const handoff_val = root.object.get("handoff_to") orelse return null;
-    if (handoff_val != .object) return null;
-
-    // Extract tags
-    const tags_val = handoff_val.object.get("tags") orelse return null;
-    if (tags_val != .array) return null;
-
-    var tag_list: std.ArrayListUnmanaged([]const u8) = .empty;
-    var tags_str_buf: std.ArrayListUnmanaged(u8) = .empty;
-
-    for (tags_val.array.items, 0..) |tag_item, i| {
-        if (tag_item == .string) {
-            tag_list.append(alloc, alloc.dupe(u8, tag_item.string) catch return null) catch return null;
-            if (i > 0) tags_str_buf.append(alloc, ',') catch return null;
-            tags_str_buf.appendSlice(alloc, tag_item.string) catch return null;
-        }
-    }
-
-    if (tag_list.items.len == 0) return null;
-
-    // Extract message (optional)
-    var message: ?[]const u8 = null;
-    if (handoff_val.object.get("message")) |msg_val| {
-        if (msg_val == .string) {
-            message = alloc.dupe(u8, msg_val.string) catch null;
-        }
-    }
-
-    return HandoffTarget{
-        .tags = tag_list.toOwnedSlice(alloc) catch return null,
-        .tags_str = tags_str_buf.toOwnedSlice(alloc) catch return null,
-        .message = message,
-    };
-}
-
-/// Build a formatted chat transcript from chat messages.
-fn buildChatTranscript(alloc: std.mem.Allocator, messages: []const types.ChatMessageRow) ![]const u8 {
-    var buf: std.ArrayListUnmanaged(u8) = .empty;
-    for (messages, 0..) |msg, i| {
-        if (i > 0) try buf.appendSlice(alloc, "\\n");
-        const line = try std.fmt.allocPrint(alloc, "[Round {d}] {s}: {s}", .{ msg.round, msg.role, msg.message });
-        try buf.appendSlice(alloc, line);
-    }
-    return try buf.toOwnedSlice(alloc);
-}
-
-/// Build input_json payload that carries an already rendered prompt for child task steps.
-fn buildRenderedPromptInputJson(alloc: std.mem.Allocator, rendered_prompt: []const u8) ![]const u8 {
-    return std.json.Stringify.valueAlloc(alloc, .{
-        .rendered_prompt = rendered_prompt,
-    }, .{});
-}
-
-/// Extract optional input_json.rendered_prompt for dynamic child task execution.
-fn extractRenderedPromptFromInput(alloc: std.mem.Allocator, input_json: []const u8) ?[]const u8 {
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, input_json, .{}) catch {
-        return null;
-    };
-    const root = parsed.value;
-    if (root != .object) return null;
-    const rendered = root.object.get("rendered_prompt") orelse return null;
-    if (rendered != .string) return null;
-    return alloc.dupe(u8, rendered.string) catch null;
-}
-
-/// Extract the "item" field from input_json, or return the whole input_json
-/// as item text if it's a simple value.
-fn extractItemFromInput(alloc: std.mem.Allocator, input_json: []const u8) ![]const u8 {
-    const parsed = std.json.parseFromSlice(std.json.Value, alloc, input_json, .{}) catch {
-        return input_json;
-    };
-    const root = parsed.value;
-    if (root != .object) return input_json;
-    const item_val = root.object.get("item") orelse return input_json;
-    if (item_val == .string) return try alloc.dupe(u8, item_val.string);
-    return input_json;
-}
-
-// ── Tests ─────────────────────────────────────────────────────────────
-
-test "Engine: init and stop" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    var engine = Engine.init(&store, allocator, 500);
-    try std.testing.expect(engine.running.load(.acquire));
-    engine.stop();
-    try std.testing.expect(!engine.running.load(.acquire));
-}
-
-test "Engine: tick with no active runs" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    var engine = Engine.init(&store, allocator, 500);
-    // Should not error — no active runs
-    try engine.tick();
-}
-
-test "Engine: checkRunCompletion marks run completed" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    // Insert a run
-    try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]");
-
-    // Insert a completed step
-    try store.insertStep("s1", "r1", "step1", "task", "completed", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-    try engine.checkRunCompletion("r1", arena.allocator());
-
-    // Verify run status is "completed"
-    const run = (try store.getRun(arena.allocator(), "r1")).?;
-    try std.testing.expectEqualStrings("completed", run.status);
-}
-
-test "Engine: checkRunCompletion marks run failed" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]");
-    try store.insertStep("s1", "r1", "step1", "task", "completed", "{}", 1, null, null, null);
-    try store.insertStep("s2", "r1", "step2", "task", "failed", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-    try engine.checkRunCompletion("r1", arena.allocator());
-
-    const run = (try store.getRun(arena.allocator(), "r1")).?;
-    try std.testing.expectEqualStrings("failed", run.status);
-}
-
-test "Engine: checkRunCompletion does not complete with pending steps" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]");
-    try store.insertStep("s1", "r1", "step1", "task", "completed", "{}", 1, null, null, null);
-    try store.insertStep("s2", "r1", "step2", "task", "pending", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-    try engine.checkRunCompletion("r1", arena.allocator());
-
-    // Run should still be "running"
-    const run = (try store.getRun(arena.allocator(), "r1")).?;
-    try std.testing.expectEqualStrings("running", run.status);
-}
-
-test "Engine: pending to ready promotion" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"s1","type":"task","prompt_template":"hello"},{"id":"s2","type":"task","prompt_template":"world","depends_on":["s1"]}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-
-    // s1 is completed, s2 is pending and depends on s1
-    try store.insertStep("step1", "r1", "s1", "task", "completed", "{}", 1, null, null, null);
-    try store.insertStep("step2", "r1", "s2", "task", "pending", "{}", 1, null, null, null);
-    try store.insertStepDep("step2", "step1");
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    // Get run row
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-
-    // processRun should promote step2 from pending to ready
-    try engine.processRun(arena.allocator(), run_row);
-
-    // Re-fetch step2
-    const step2 = (try store.getStep(arena.allocator(), "step2")).?;
-    // It should be promoted to "ready" (not "pending")
-    // Note: since there are no workers, the task step won't actually execute,
-    // so it stays at "ready"
-    try std.testing.expectEqualStrings("ready", step2.status);
-}
-
-test "Engine: approval step sets waiting_approval" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"approve1","type":"approval"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step1", "r1", "approve1", "approval", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    const step = (try store.getStep(arena.allocator(), "step1")).?;
-    try std.testing.expectEqualStrings("waiting_approval", step.status);
-}
-
-test "Engine: fan_out creates child steps" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"fan1","type":"fan_out","count":3}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step1", "r1", "fan1", "fan_out", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    // fan_out step should be completed
-    const step = (try store.getStep(arena.allocator(), "step1")).?;
-    try std.testing.expectEqualStrings("completed", step.status);
-
-    // Should have created 3 child steps
-    const children = try store.getChildSteps(arena.allocator(), "step1");
-    try std.testing.expectEqual(@as(usize, 3), children.len);
-
-    // Each child should be "ready" and type "task"
-    for (children) |child| {
-        try std.testing.expectEqualStrings("ready", child.status);
-        try std.testing.expectEqualStrings("task", child.type);
-    }
-}
-
-test "Engine: map creates child steps from input array" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"map1","type":"map","items_from":"$.topics"}]}
-    ;
-    const input =
-        \\{"topics":["AI","ML","DL"]}
-    ;
-    try store.insertRun("r1", null, "running", wf, input, "[]");
-    try store.insertStep("step1", "r1", "map1", "map", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    // map step should be completed
-    const step = (try store.getStep(arena.allocator(), "step1")).?;
-    try std.testing.expectEqualStrings("completed", step.status);
-
-    // Should have created 3 child steps
-    const children = try store.getChildSteps(arena.allocator(), "step1");
-    try std.testing.expectEqual(@as(usize, 3), children.len);
-}
-
-test "getStepField extracts prompt_template" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"research","type":"task","prompt_template":"Research {{input.topic}}"}]}
-    ;
-    const result = try getStepField(arena.allocator(), wf, "research", "prompt_template");
-    try std.testing.expect(result != null);
-    try std.testing.expectEqualStrings("Research {{input.topic}}", result.?);
-}
-
-test "getStepField returns null for missing step" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"research","type":"task"}]}
-    ;
-    const result = try getStepField(arena.allocator(), wf, "nonexistent", "prompt_template");
-    try std.testing.expect(result == null);
-}
-
-test "getStepFieldInt extracts count" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"fan1","type":"fan_out","count":5}]}
-    ;
-    const result = try getStepFieldInt(arena.allocator(), wf, "fan1", "count");
-    try std.testing.expect(result != null);
-    try std.testing.expectEqual(@as(i64, 5), result.?);
-}
-
-test "extractJsonArray extracts string array" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const json =
-        \\{"topics":["AI","ML","DL"]}
-    ;
-    const result = try extractJsonArray(arena.allocator(), json, "topics");
-    try std.testing.expect(result != null);
-    try std.testing.expectEqual(@as(usize, 3), result.?.len);
-    try std.testing.expectEqualStrings("AI", result.?[0]);
-    try std.testing.expectEqualStrings("ML", result.?[1]);
-    try std.testing.expectEqualStrings("DL", result.?[2]);
-}
-
-test "wrapOutput creates valid JSON" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const result = try wrapOutput(arena.allocator(), "hello world");
-    try std.testing.expectEqualStrings("{\"output\":\"hello world\"}", result);
-}
-
-test "wrapOutput escapes special characters" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const result = try wrapOutput(arena.allocator(), "line1\nline2");
-    try std.testing.expectEqualStrings("{\"output\":\"line1\\nline2\"}", result);
-}
-
-test "build/extract rendered_prompt input JSON round-trip" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const input_json = try buildRenderedPromptInputJson(arena.allocator(), "say \"hi\"\\nnext");
-    const prompt = extractRenderedPromptFromInput(arena.allocator(), input_json);
-    try std.testing.expect(prompt != null);
-    try std.testing.expectEqualStrings("say \"hi\"\\nnext", prompt.?);
-}
-
-test "Engine: task step fallback uses input_json.rendered_prompt" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    try store.insertRun("r-rendered", null, "running", "{\"steps\":[]}", "{}", "[]");
-    try store.insertWorker("w-rendered", "http://127.0.0.1:1", "", "webhook", null, "[]", 1, "registered");
-    try store.insertStep("parent-step", "r-rendered", "missing-parent-def", "task", "completed", "{}", 1, null, null, null);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-    const rendered_input = try buildRenderedPromptInputJson(arena.allocator(), "child fallback prompt");
-    try store.insertStep(
-        "child-step",
-        "r-rendered",
-        "missing-child-def",
-        "task",
-        "ready",
-        rendered_input,
-        2,
-        null,
-        "parent-step",
-        0,
-    );
-
-    var engine = Engine.init(&store, allocator, 500);
-    const run_row = (try store.getRun(arena.allocator(), "r-rendered")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    const child = (try store.getStep(arena.allocator(), "child-step")).?;
-    try std.testing.expectEqualStrings("ready", child.status);
-    try std.testing.expectEqual(@as(i64, 2), child.attempt);
-}
-
-test "Engine: rendered_prompt has priority over parent prompt_template" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"parent","type":"debate","prompt_template":"parent template"},{"id":"child","type":"task","prompt_template":"child template"}]}
-    ;
-    try store.insertRun("r-priority", null, "running", wf, "{}", "[]");
-    try store.insertStep("parent-step", "r-priority", "parent", "debate", "running", "{}", 1, null, null, null);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const rendered_input = try buildRenderedPromptInputJson(arena.allocator(), "rendered prompt");
-    try store.insertStep(
-        "child-step",
-        "r-priority",
-        "child",
-        "task",
-        "ready",
-        rendered_input,
-        1,
-        null,
-        "parent-step",
-        0,
-    );
-
-    var engine = Engine.init(&store, allocator, 500);
-    const run_row = (try store.getRun(arena.allocator(), "r-priority")).?;
-    const child_step = (try store.getStep(arena.allocator(), "child-step")).?;
-    const source = (try engine.resolveTaskPromptSource(arena.allocator(), run_row, child_step)).?;
-
-    switch (source) {
-        .rendered => |prompt| try std.testing.expectEqualStrings("rendered prompt", prompt),
-        .template => try std.testing.expect(false),
-    }
-}
-
-test "findStepStatus finds matching step" {
-    const steps = [_]types.StepRow{
-        makeTestStepRow("s1", "completed"),
-        makeTestStepRow("s2", "pending"),
-    };
-    const status = findStepStatus(&steps, "s2");
-    try std.testing.expect(status != null);
-    try std.testing.expectEqualStrings("pending", status.?);
-}
-
-test "findStepStatus returns null for missing step" {
-    const steps = [_]types.StepRow{
-        makeTestStepRow("s1", "completed"),
-    };
-    const status = findStepStatus(&steps, "s999");
-    try std.testing.expect(status == null);
-}
-
-fn makeTestStepRow(id: []const u8, status: []const u8) types.StepRow {
-    return .{
-        .id = id,
-        .run_id = "r1",
-        .def_step_id = id,
-        .type = "task",
-        .status = status,
-        .worker_id = null,
-        .input_json = "{}",
-        .output_json = null,
-        .error_text = null,
-        .attempt = 1,
-        .max_attempts = 1,
-        .timeout_ms = null,
-        .next_attempt_at_ms = null,
-        .parent_step_id = null,
-        .item_index = null,
-        .created_at_ms = 0,
-        .updated_at_ms = 0,
-        .started_at_ms = null,
-        .ended_at_ms = null,
-        .child_run_id = null,
-        .iteration_index = 0,
-    };
-}
-
-// ── Transform step tests ─────────────────────────────────────────────
-
-test "Engine: transform step renders output_template" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"t1","type":"task","prompt_template":"hello"},{"id":"tr1","type":"transform","output_template":"result: {{steps.t1.output}}"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-
-    // Insert task1 as completed with output
-    try store.insertStep("step_t1", "r1", "t1", "task", "completed", "{}", 1, null, null, null);
-    try store.updateStepStatus("step_t1", "completed", null, "{\"output\":\"hello\"}", null, 1);
-
-    // Insert transform1 as ready with dependency on task1
-    try store.insertStep("step_tr1", "r1", "tr1", "transform", "ready", "{}", 1, null, null, null);
-    try store.insertStepDep("step_tr1", "step_t1");
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    // Verify transform completed
-    const s = (try store.getStep(arena.allocator(), "step_tr1")).?;
-    try std.testing.expectEqualStrings("completed", s.status);
-    // Output should contain the rendered template
-    try std.testing.expect(s.output_json != null);
-    // The output should contain "hello" from the task step
-    try std.testing.expect(std.mem.indexOf(u8, s.output_json.?, "hello") != null);
-}
-
-test "Engine: transform step fails without output_template" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"tr1","type":"transform"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_tr1", "r1", "tr1", "transform", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    const s = (try store.getStep(arena.allocator(), "step_tr1")).?;
-    try std.testing.expectEqualStrings("failed", s.status);
-    try std.testing.expect(s.error_text != null);
-}
-
-// ── Wait step tests ──────────────────────────────────────────────────
-
-test "Engine: wait step with duration_ms=0 completes after two ticks" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"w1","type":"wait","duration_ms":0}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    // First tick: step becomes "running" with started_at_ms
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    const s1 = (try store.getStep(arena.allocator(), "step_w1")).?;
-    try std.testing.expectEqualStrings("running", s1.status);
-    try std.testing.expect(s1.started_at_ms != null);
-
-    // Second tick: step should be "completed" since duration=0
-    const run_row2 = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row2);
-
-    const s2 = (try store.getStep(arena.allocator(), "step_w1")).?;
-    try std.testing.expectEqualStrings("completed", s2.status);
-    try std.testing.expect(s2.output_json != null);
-}
-
-test "Engine: wait step with signal enters waiting_approval" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"w1","type":"wait","signal":"deploy"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    const s = (try store.getStep(arena.allocator(), "step_w1")).?;
-    try std.testing.expectEqualStrings("waiting_approval", s.status);
-}
-
-test "Engine: wait step without config fails" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"w1","type":"wait"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    const s = (try store.getStep(arena.allocator(), "step_w1")).?;
-    try std.testing.expectEqualStrings("failed", s.status);
-}
-
-test "Engine: wait step with invalid duration string fails" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"w1","type":"wait","duration_ms":"abc"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+    return serializeJsonValue(alloc, .{ .object = result });
+}
 
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
+/// Serialize completed_nodes set to JSON array.
+fn serializeCompletedNodes(alloc: std.mem.Allocator, completed_nodes: *std.StringHashMap(void)) ![]const u8 {
+    var arr: std.ArrayListUnmanaged([]const u8) = .empty;
+    var it = completed_nodes.iterator();
+    while (it.next()) |entry| {
+        try arr.append(alloc, entry.key_ptr.*);
+    }
+    return json.Stringify.valueAlloc(alloc, arr.items, .{});
+}
 
-    const s = (try store.getStep(arena.allocator(), "step_w1")).?;
-    try std.testing.expectEqualStrings("failed", s.status);
-    try std.testing.expect(s.error_text != null);
-    try std.testing.expect(std.mem.indexOf(u8, s.error_text.?, "duration_ms must be an integer") != null);
+/// Serialize route_results map + workflow_version to JSON for checkpoint metadata.
+fn serializeRouteResults(alloc: std.mem.Allocator, route_results: *std.StringHashMap([]const u8)) !?[]const u8 {
+    return serializeRouteResultsWithVersion(alloc, route_results, null);
 }
 
-// ── Router step tests ────────────────────────────────────────────────
+fn serializeRouteResultsWithVersion(alloc: std.mem.Allocator, route_results: *std.StringHashMap([]const u8), wf_version: ?i64) !?[]const u8 {
+    if (route_results.count() == 0 and wf_version == null) return null;
 
-test "Engine: router step routes to matching target" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var obj = json.ObjectMap.init(alloc);
 
-    const wf =
-        \\{"steps":[{"id":"classify","type":"task","prompt_template":"classify"},{"id":"router1","type":"router","routes":{"bug":"fix_bug","feature":"add_feature"}},{"id":"fix_bug","type":"task","prompt_template":"fix"},{"id":"add_feature","type":"task","prompt_template":"add"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
+    if (route_results.count() > 0) {
+        var rr_obj = json.ObjectMap.init(alloc);
+        var it = route_results.iterator();
+        while (it.next()) |entry| {
+            try rr_obj.put(entry.key_ptr.*, .{ .string = entry.value_ptr.* });
+        }
+        try obj.put("route_results", .{ .object = rr_obj });
+    }
+
+    if (wf_version) |v| {
+        try obj.put("workflow_version", .{ .integer = v });
+    }
 
-    // classify step completed with "bug" in output
-    try store.insertStep("step_classify", "r1", "classify", "task", "completed", "{}", 1, null, null, null);
-    try store.updateStepStatus("step_classify", "completed", null, "{\"output\":\"this is a bug report\"}", null, 1);
+    return try serializeJsonValue(alloc, .{ .object = obj });
+}
 
-    // router step is ready, depends on classify
-    try store.insertStep("step_router", "r1", "router1", "router", "ready", "{}", 1, null, null, null);
-    try store.insertStepDep("step_router", "step_classify");
+/// Serialize a string array as JSON.
+fn serializeStringArray(alloc: std.mem.Allocator, items: []const []const u8) ![]const u8 {
+    return json.Stringify.valueAlloc(alloc, items, .{});
+}
 
-    // Target steps are pending
-    try store.insertStep("step_fix", "r1", "fix_bug", "task", "pending", "{}", 1, null, null, null);
-    try store.insertStepDep("step_fix", "step_router");
-    try store.insertStep("step_add", "r1", "add_feature", "task", "pending", "{}", 1, null, null, null);
-    try store.insertStepDep("step_add", "step_router");
+/// Try to extract "state_updates" from worker output JSON.
+/// Worker can return: {"state_updates": {"key": "value"}, ...}
+fn extractStateUpdates(alloc: std.mem.Allocator, output: []const u8) ?[]const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, output, .{}) catch return null;
+    if (parsed.value != .object) return null;
+    const su = parsed.value.object.get("state_updates") orelse return null;
+    return serializeJsonValue(alloc, su) catch null;
+}
 
-    var engine = Engine.init(&store, allocator, 500);
+/// Extract "goto" field from worker output JSON.
+/// Returns array of target node names. Supports:
+///   - "goto": "node_name" -> ["node_name"]
+///   - "goto": ["node_a", "node_b"] -> ["node_a", "node_b"]
+fn extractGotoTargets(alloc: std.mem.Allocator, output: []const u8) ?[]const []const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, output, .{}) catch return null;
+    if (parsed.value != .object) return null;
+    const goto_val = parsed.value.object.get("goto") orelse return null;
 
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+    var targets: std.ArrayListUnmanaged([]const u8) = .empty;
+    if (goto_val == .string) {
+        targets.append(alloc, goto_val.string) catch return null;
+    } else if (goto_val == .array) {
+        for (goto_val.array.items) |item| {
+            if (item == .string) {
+                targets.append(alloc, item.string) catch continue;
+            }
+        }
+    } else {
+        return null;
+    }
 
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
+    if (targets.items.len == 0) return null;
+    return targets.toOwnedSlice(alloc) catch null;
+}
 
-    // Router should be completed
-    const router = (try store.getStep(arena.allocator(), "step_router")).?;
-    try std.testing.expectEqualStrings("completed", router.status);
-    try std.testing.expect(router.output_json != null);
-    try std.testing.expect(std.mem.indexOf(u8, router.output_json.?, "fix_bug") != null);
+/// Parse interrupt_before / interrupt_after arrays from workflow definition.
+fn parseBreakpointList(alloc: std.mem.Allocator, workflow_json: []const u8, field: []const u8) []const []const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return &.{};
+    return parseBreakpointListFromRoot(alloc, parsed.value, field);
+}
 
-    // add_feature should be skipped
-    const add = (try store.getStep(arena.allocator(), "step_add")).?;
-    try std.testing.expectEqualStrings("skipped", add.status);
+fn parseBreakpointListFromRoot(alloc: std.mem.Allocator, root: json.Value, field: []const u8) []const []const u8 {
+    if (root != .object) return &.{};
+    const arr_val = root.object.get(field) orelse return &.{};
+    if (arr_val != .array) return &.{};
 
-    // fix_bug should still be pending (not skipped)
-    const fix = (try store.getStep(arena.allocator(), "step_fix")).?;
-    try std.testing.expectEqualStrings("pending", fix.status);
+    var result: std.ArrayListUnmanaged([]const u8) = .empty;
+    for (arr_val.array.items) |item| {
+        if (item == .string) {
+            result.append(alloc, item.string) catch continue;
+        }
+    }
+    return result.toOwnedSlice(alloc) catch &.{};
 }
 
-test "Engine: router step uses default when no match" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+/// Check if a node name is in a breakpoint list.
+fn isInBreakpointList(name: []const u8, list: []const []const u8) bool {
+    for (list) |item| {
+        if (std.mem.eql(u8, name, item)) return true;
+    }
+    return false;
+}
 
-    const wf =
-        \\{"steps":[{"id":"classify","type":"task","prompt_template":"classify"},{"id":"router1","type":"router","routes":{"bug":"fix_bug"},"default":"fix_bug"},{"id":"fix_bug","type":"task","prompt_template":"fix"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
+/// Get an integer field from a node's JSON.
+fn getNodeFieldInt(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?i64 {
+    const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null;
+    if (parsed.value != .object) return null;
+    const val = parsed.value.object.get(field) orelse return null;
+    if (val == .integer) return val.integer;
+    return null;
+}
 
-    // classify step completed with something that doesn't match any route
-    try store.insertStep("step_classify", "r1", "classify", "task", "completed", "{}", 1, null, null, null);
-    try store.updateStepStatus("step_classify", "completed", null, "{\"output\":\"unknown category\"}", null, 1);
+/// Get a float field from a node's JSON.
+fn getNodeFieldFloat(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?f64 {
+    const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null;
+    if (parsed.value != .object) return null;
+    const val = parsed.value.object.get(field) orelse return null;
+    if (val == .float) return val.float;
+    if (val == .integer) return @as(f64, @floatFromInt(val.integer));
+    return null;
+}
 
-    // router step is ready
-    try store.insertStep("step_router", "r1", "router1", "router", "ready", "{}", 1, null, null, null);
-    try store.insertStepDep("step_router", "step_classify");
+/// Get a nested object field as JSON string from a node's JSON.
+fn getNodeObjectField(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?[]const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null;
+    if (parsed.value != .object) return null;
+    const val = parsed.value.object.get(field) orelse return null;
+    if (val != .object) return null;
+    return serializeJsonValue(alloc, val) catch null;
+}
 
-    // Target step
-    try store.insertStep("step_fix", "r1", "fix_bug", "task", "pending", "{}", 1, null, null, null);
-    try store.insertStepDep("step_fix", "step_router");
+fn resolveDeclaredRouteValue(alloc: std.mem.Allocator, node_json: []const u8, candidate: ?[]const u8) ?[]const u8 {
+    const routes_json = getNodeObjectField(alloc, node_json, "routes") orelse return candidate;
+    const parsed = json.parseFromSlice(json.Value, alloc, routes_json, .{}) catch return candidate;
+    if (parsed.value != .object) return candidate;
 
-    var engine = Engine.init(&store, allocator, 500);
+    if (candidate) |route_value| {
+        if (parsed.value.object.get(route_value) != null) return route_value;
+    }
 
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+    const default_route = getNodeField(alloc, node_json, "default") orelse return candidate;
+    if (parsed.value.object.get(default_route) != null) return default_route;
+    return candidate;
+}
 
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
+// ── Retry Config Helpers (Gap 2) ────────────────────────────────────
 
-    // Router should be completed with default target
-    const router = (try store.getStep(arena.allocator(), "step_router")).?;
-    try std.testing.expectEqualStrings("completed", router.status);
-    try std.testing.expect(router.output_json != null);
-    try std.testing.expect(std.mem.indexOf(u8, router.output_json.?, "fix_bug") != null);
+/// Parse retry.max_attempts from node JSON. Returns null if no retry config.
+fn parseRetryMaxAttempts(alloc: std.mem.Allocator, node_json: []const u8) ?u32 {
+    const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null;
+    const val = getNodeFieldInt(alloc, retry_json, "max_attempts") orelse return null;
+    if (val < 1) return 1;
+    if (val > 100) return 100;
+    return @intCast(val);
 }
 
-test "Engine: router step fails without routes" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+fn parseRetryInitialMs(alloc: std.mem.Allocator, node_json: []const u8) ?u64 {
+    const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null;
+    const val = getNodeFieldInt(alloc, retry_json, "initial_interval_ms") orelse return null;
+    if (val < 0) return 0;
+    return @intCast(val);
+}
 
-    const wf =
-        \\{"steps":[{"id":"classify","type":"task","prompt_template":"classify"},{"id":"router1","type":"router"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
+fn parseRetryBackoff(alloc: std.mem.Allocator, node_json: []const u8) ?f64 {
+    const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null;
+    return getNodeFieldFloat(alloc, retry_json, "backoff_factor");
+}
 
-    try store.insertStep("step_classify", "r1", "classify", "task", "completed", "{}", 1, null, null, null);
-    try store.updateStepStatus("step_classify", "completed", null, "{\"output\":\"test\"}", null, 1);
+fn parseRetryMaxMs(alloc: std.mem.Allocator, node_json: []const u8) ?u64 {
+    const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null;
+    const val = getNodeFieldInt(alloc, retry_json, "max_interval_ms") orelse return null;
+    if (val < 0) return 0;
+    return @intCast(val);
+}
 
-    try store.insertStep("step_router", "r1", "router1", "router", "ready", "{}", 1, null, null, null);
-    try store.insertStepDep("step_router", "step_classify");
+// ── Cache Key Helpers (Gap 3) ───────────────────────────────────────
 
-    var engine = Engine.init(&store, allocator, 500);
+/// Parse cache.ttl_ms from node JSON. Returns null if no cache config.
+fn parseCacheTtlMs(alloc: std.mem.Allocator, node_json: []const u8) ?i64 {
+    const cache_json = getNodeObjectField(alloc, node_json, "cache") orelse return null;
+    return getNodeFieldInt(alloc, cache_json, "ttl_ms");
+}
 
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+/// Compute a cache key from node_name + rendered_prompt using FNV hash.
+fn computeCacheKey(alloc: std.mem.Allocator, node_name: []const u8, rendered_prompt: []const u8) ![]const u8 {
+    var hasher = std.hash.Fnv1a_64.init();
+    hasher.update(node_name);
+    hasher.update("|");
+    hasher.update(rendered_prompt);
+    const hash = hasher.final();
+    return try std.fmt.allocPrint(alloc, "{x:0>16}", .{hash});
+}
 
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
+// ── Deferred Node Helpers (Gap 6) ───────────────────────────────────
 
-    const router = (try store.getStep(arena.allocator(), "step_router")).?;
-    try std.testing.expectEqualStrings("failed", router.status);
+/// Collect all deferred node names from workflow.
+fn collectDeferredNodes(alloc: std.mem.Allocator, workflow_json: []const u8) []const []const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return &.{};
+    return collectDeferredNodesFromRoot(alloc, parsed.value);
 }
 
-// ── getStepFieldRaw tests ────────────────────────────────────────────
+fn collectDeferredNodesFromRoot(alloc: std.mem.Allocator, root: json.Value) []const []const u8 {
+    if (root != .object) return &.{};
+    const nodes_val = root.object.get("nodes") orelse return &.{};
+    if (nodes_val != .object) return &.{};
+
+    var result: std.ArrayListUnmanaged([]const u8) = .empty;
+    var it = nodes_val.object.iterator();
+    while (it.next()) |entry| {
+        const name = entry.key_ptr.*;
+        const node = entry.value_ptr.*;
+        if (node == .object) {
+            if (node.object.get("defer")) |d| {
+                if (d == .bool and d.bool) {
+                    result.append(alloc, name) catch continue;
+                }
+            }
+        }
+    }
+    return result.toOwnedSlice(alloc) catch &.{};
+}
 
-test "getStepFieldRaw returns JSON object as string" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+// ── Managed Values Helpers (Gap 7) ──────────────────────────────────
 
-    const wf =
-        \\{"steps":[{"id":"r1","type":"router","routes":{"bug":"fix_bug","feature":"add_feature"}}]}
-    ;
-    const result = try getStepFieldRaw(arena.allocator(), wf, "r1", "routes");
-    try std.testing.expect(result != null);
-    // Should be a JSON string containing the routes object
-    try std.testing.expect(std.mem.indexOf(u8, result.?, "bug") != null);
-    try std.testing.expect(std.mem.indexOf(u8, result.?, "fix_bug") != null);
+/// Inject __meta into state JSON before node execution.
+fn injectMeta(alloc: std.mem.Allocator, state_json: []const u8, run_id: []const u8, node_name: []const u8, step_number: i64, max_steps: i64) ![]const u8 {
+    const remaining = max_steps - step_number;
+    const is_last = (step_number >= max_steps - 1);
+    const meta_json = try std.fmt.allocPrint(alloc,
+        \\{{"__meta":{{"step":{d},"is_last_step":{s},"remaining_steps":{d},"run_id":"{s}","node_name":"{s}"}}}}
+    , .{ step_number, if (is_last) "true" else "false", remaining, run_id, node_name });
+
+    // Merge __meta into state using simple applyUpdates with empty schema (last_value default)
+    return state_mod.applyUpdates(alloc, state_json, meta_json, "{}");
 }
 
-test "getStepFieldRaw returns string values directly" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+/// Remove __meta from state JSON after node execution (don't persist in checkpoints).
+fn stripMeta(alloc: std.mem.Allocator, state_json: []const u8) ![]const u8 {
+    const parsed = json.parseFromSlice(json.Value, alloc, state_json, .{}) catch return try alloc.dupe(u8, state_json);
+    if (parsed.value != .object) return try alloc.dupe(u8, state_json);
 
-    const wf =
-        \\{"steps":[{"id":"r1","type":"router","default":"fallback"}]}
-    ;
-    const result = try getStepFieldRaw(arena.allocator(), wf, "r1", "default");
-    try std.testing.expect(result != null);
-    try std.testing.expectEqualStrings("fallback", result.?);
+    var result_obj = json.ObjectMap.init(alloc);
+    var it = parsed.value.object.iterator();
+    while (it.next()) |entry| {
+        if (!std.mem.eql(u8, entry.key_ptr.*, "__meta")) {
+            try result_obj.put(entry.key_ptr.*, entry.value_ptr.*);
+        }
+    }
+    return serializeJsonValue(alloc, .{ .object = result_obj });
 }
 
-// ── Loop step tests ──────────────────────────────────────────────────
+/// Build subgraph input state from parent state using input_mapping.
+/// input_mapping is {"child_key": "state.parent_key", ...}
+fn buildSubgraphInput(alloc: std.mem.Allocator, parent_state: []const u8, input_mapping_json: []const u8) ![]const u8 {
+    const mapping_parsed = json.parseFromSlice(json.Value, alloc, input_mapping_json, .{}) catch return try alloc.dupe(u8, "{}");
+    if (mapping_parsed.value != .object) return try alloc.dupe(u8, "{}");
 
-test "Engine: loop step creates first iteration children" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var result = json.ObjectMap.init(alloc);
+    var it = mapping_parsed.value.object.iterator();
+    while (it.next()) |entry| {
+        const child_key = entry.key_ptr.*;
+        const parent_path = if (entry.value_ptr.* == .string) entry.value_ptr.string else continue;
 
-    // Workflow: loop with body ["t1"] — single body step for simplicity
-    const wf =
-        \\{"steps":[{"id":"loop1","type":"loop","max_iterations":3,"exit_condition":"done","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null);
+        // Resolve the value from parent state
+        if (state_mod.getStateValue(alloc, parent_state, parent_path) catch null) |value_str| {
+            const val_parsed = json.parseFromSlice(json.Value, alloc, value_str, .{}) catch continue;
+            try result.put(child_key, val_parsed.value);
+        }
+    }
 
-    var engine = Engine.init(&store, allocator, 500);
+    return serializeJsonValue(alloc, .{ .object = result });
+}
 
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+/// Reconcile with nulltickets: check if associated task has been cancelled.
+/// Returns true if the run should continue, false if it should be cancelled.
+fn reconcileWithTracker(alloc: std.mem.Allocator, tracker_url: []const u8, tracker_api_token: ?[]const u8, task_id: []const u8) bool {
+    const task_id_enc = encodePathSegment(alloc, task_id) catch return true;
+    defer alloc.free(task_id_enc);
 
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
+    const url = std.fmt.allocPrint(alloc, "{s}/tasks/{s}", .{ tracker_url, task_id_enc }) catch return true;
+    defer alloc.free(url);
 
-    // Loop step should be "running"
-    const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?;
-    try std.testing.expectEqualStrings("running", loop_step.status);
+    var client: std.http.Client = .{ .allocator = alloc };
+    defer client.deinit();
 
-    // Should have created 1 child step
-    const children = try store.getChildSteps(arena.allocator(), "step_loop");
-    try std.testing.expectEqual(@as(usize, 1), children.len);
-    try std.testing.expectEqualStrings("ready", children[0].status);
-    try std.testing.expectEqualStrings("t1", children[0].def_step_id);
-    try std.testing.expectEqual(@as(i64, 0), children[0].iteration_index);
-}
+    var response_body: std.io.Writer.Allocating = .init(alloc);
+    defer response_body.deinit();
 
-test "Engine: loop step iterates until exit condition" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var auth_header: ?[]const u8 = null;
+    defer if (auth_header) |value| alloc.free(value);
+    var headers_buf: [1]std.http.Header = undefined;
+    const extra_headers: []const std.http.Header = if (tracker_api_token) |token| blk: {
+        auth_header = std.fmt.allocPrint(alloc, "Bearer {s}", .{token}) catch return true;
+        headers_buf[0] = .{ .name = "Authorization", .value = auth_header.? };
+        break :blk headers_buf[0..1];
+    } else &.{};
 
-    const wf =
-        \\{"steps":[{"id":"loop1","type":"loop","max_iterations":5,"exit_condition":"done","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null);
+    const result = client.fetch(.{
+        .location = .{ .url = url },
+        .method = .GET,
+        .response_writer = &response_body.writer,
+        .extra_headers = extra_headers,
+    }) catch return true; // network errors -> continue
 
-    var engine = Engine.init(&store, allocator, 500);
+    const status_code = @intFromEnum(result.status);
+    if (status_code < 200 or status_code >= 300) return true;
 
-    // Tick 1: creates iteration 0 children, marks loop as running
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const body = response_body.written();
+    const parsed = json.parseFromSlice(json.Value, alloc, body, .{}) catch return true;
+    if (parsed.value != .object) return true;
 
-    // Get the first child and mark it completed with "not done"
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        try std.testing.expectEqual(@as(usize, 1), children.len);
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"not done\"}", null, 1);
-    }
+    const stage = parsed.value.object.get("stage") orelse return true;
+    if (stage != .string) return true;
 
-    // Tick 2: exit condition "done" not in "not done"... wait, "not done" contains "done"!
-    // Let's use a different output that doesn't contain "done"
+    // Terminal states -> cancel
+    if (std.mem.eql(u8, stage.string, "done") or
+        std.mem.eql(u8, stage.string, "cancelled") or
+        std.mem.eql(u8, stage.string, "canceled"))
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        // Fix: update to something that doesn't contain "done"
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"still working\"}", null, 1);
-    }
+        log.info("reconciliation: task {s} is in terminal state '{s}', cancelling run", .{ task_id, stage.string });
+        return false;
+    }
+
+    return true;
+}
+
+// ── Rich Streaming Helpers ──────────────────────────────────────────
 
-    // Tick 2: exit condition not met, creates iteration 1
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+/// Broadcast multi-mode SSE events for a node execution.
+/// Emits events in values, updates, tasks, and debug modes.
+fn broadcastNodeEvents(
+    hub: *sse_mod.SseHub,
+    alloc: std.mem.Allocator,
+    run_id: []const u8,
+    node_name: []const u8,
+    node_type: []const u8,
+    state_json: []const u8,
+    state_updates: ?[]const u8,
+    step_number: i64,
+    duration_ms: i64,
+) void {
+    const step_id_buf = ids.generateId();
+    const step_id = alloc.dupe(u8, &step_id_buf) catch return;
+    const now_ms = ids.nowMs();
+    // ISO 8601 timestamp (approximate, using epoch ms)
+    const ts_str = std.fmt.allocPrint(alloc, "{d}", .{now_ms}) catch "0";
 
-    // Should now have 2 children (iteration 0 and 1)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        try std.testing.expectEqual(@as(usize, 2), children.len);
+    // values mode: full state after step
+    const values_data = std.fmt.allocPrint(alloc,
+        \\{{"event":"values","data":{{"step":"{s}","state":{s}}}}}
+    , .{ node_name, state_json }) catch null;
+    if (values_data) |vd| {
+        hub.broadcast(run_id, .{ .event_type = "values", .data = vd, .mode = .values });
+    }
+
+    // updates mode: node name + partial updates
+    const updates_payload = state_updates orelse "{}";
+    const updates_data = std.fmt.allocPrint(alloc,
+        \\{{"event":"updates","data":{{"step":"{s}","updates":{s}}}}}
+    , .{ node_name, updates_payload }) catch null;
+    if (updates_data) |ud| {
+        hub.broadcast(run_id, .{ .event_type = "updates", .data = ud, .mode = .updates });
     }
 
-    // Mark iteration 1 child as completed with "done" in output
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        // Find iteration 1 child
-        for (children) |child| {
-            if (child.iteration_index == 1) {
-                try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"done\"}", null, 1);
+    // tasks mode: task_start and task_result
+    const task_start_data = std.fmt.allocPrint(alloc,
+        \\{{"id":"{s}","name":"{s}","type":"{s}"}}
+    , .{ step_id, node_name, node_type }) catch null;
+    if (task_start_data) |tsd| {
+        hub.broadcast(run_id, .{ .event_type = "task_start", .data = tsd, .mode = .tasks });
+    }
+
+    const task_result_data = std.fmt.allocPrint(alloc,
+        \\{{"id":"{s}","name":"{s}","result":{s},"duration_ms":{d}}}
+    , .{ step_id, node_name, updates_payload, duration_ms }) catch null;
+    if (task_result_data) |trd| {
+        hub.broadcast(run_id, .{ .event_type = "task_result", .data = trd, .mode = .tasks });
+    }
+
+    // debug mode: wrapped with step number and timestamp
+    const debug_data = std.fmt.allocPrint(alloc,
+        \\{{"step_number":{d},"timestamp_ms":{s},"type":"task_result","payload":{{"name":"{s}","updates":{s},"duration_ms":{d}}}}}
+    , .{ step_number, ts_str, node_name, updates_payload, duration_ms }) catch null;
+    if (debug_data) |dd| {
+        hub.broadcast(run_id, .{ .event_type = "debug", .data = dd, .mode = .debug });
+    }
+}
+
+/// Get workflow version from workflow JSON definition.
+fn getWorkflowVersion(alloc: std.mem.Allocator, workflow_json: []const u8) i64 {
+    const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return 1;
+    if (parsed.value != .object) return 1;
+    const val = parsed.value.object.get("version") orelse return 1;
+    if (val == .integer) return val.integer;
+    return 1;
+}
+
+/// Get workflow version from checkpoint metadata.
+fn getCheckpointWorkflowVersion(alloc: std.mem.Allocator, metadata_json: ?[]const u8) i64 {
+    const meta = metadata_json orelse return 1;
+    const parsed = json.parseFromSlice(json.Value, alloc, meta, .{}) catch return 1;
+    if (parsed.value != .object) return 1;
+    const val = parsed.value.object.get("workflow_version") orelse return 1;
+    if (val == .integer) return val.integer;
+    return 1;
+}
+
+/// Filter completed nodes to only those still present in the workflow definition.
+/// Returns true if any nodes were removed (migration happened).
+fn migrateCompletedNodes(alloc: std.mem.Allocator, completed_nodes: *std.StringHashMap(void), workflow_json: []const u8) bool {
+    const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return false;
+    if (parsed.value != .object) return false;
+    const nodes_val = parsed.value.object.get("nodes") orelse return false;
+    if (nodes_val != .object) return false;
+
+    var to_remove: std.ArrayListUnmanaged([]const u8) = .empty;
+    var it = completed_nodes.iterator();
+    while (it.next()) |entry| {
+        const name = entry.key_ptr.*;
+        // Keep special nodes
+        if (std.mem.eql(u8, name, "__start__") or std.mem.eql(u8, name, "__end__")) continue;
+        // Remove if node no longer exists in workflow
+        if (nodes_val.object.get(name) == null) {
+            to_remove.append(alloc, name) catch continue;
+        }
+    }
+
+    if (to_remove.items.len == 0) return false;
+
+    for (to_remove.items) |name| {
+        _ = completed_nodes.remove(name);
+        log.warn("migration: removed completed node '{s}' (no longer in workflow)", .{name});
+    }
+    return true;
+}
+
+// ── UI Messages ──────────────────────────────────────────────────────
+
+/// Process "ui_messages" from worker response JSON.
+/// For each message:
+///   - If it has "remove": true -> broadcast as "ui_message_delete" SSE event
+///   - Otherwise -> broadcast as "ui_message" SSE event
+/// Also applies to state.__ui_messages via add_messages reducer.
+fn processUiMessages(hub: *sse_mod.SseHub, alloc: std.mem.Allocator, run_id: []const u8, step_id: []const u8, response_json: []const u8) void {
+    const parsed = json.parseFromSlice(json.Value, alloc, response_json, .{}) catch return;
+    if (parsed.value != .object) return;
+    const ui_msgs_val = parsed.value.object.get("ui_messages") orelse return;
+    if (ui_msgs_val != .array) return;
+
+    for (ui_msgs_val.array.items) |msg| {
+        if (msg != .object) continue;
+
+        // Check for remove flag
+        const is_remove = blk: {
+            if (msg.object.get("remove")) |rm_val| {
+                if (rm_val == .bool) break :blk rm_val.bool;
             }
-        }
-    }
+            break :blk false;
+        };
 
-    // Tick 3: exit condition met, loop completes
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+        // Add step_id to the event data
+        var event_obj = json.ObjectMap.init(alloc);
+        var it = msg.object.iterator();
+        while (it.next()) |entry| {
+            event_obj.put(entry.key_ptr.*, entry.value_ptr.*) catch continue;
+        }
+        event_obj.put("step_id", .{ .string = step_id }) catch {};
+        const event_data = serializeJsonValue(alloc, .{ .object = event_obj }) catch continue;
 
-    // Loop should be completed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?;
-        try std.testing.expectEqualStrings("completed", loop_step.status);
-        try std.testing.expect(loop_step.output_json != null);
+        if (is_remove) {
+            hub.broadcast(run_id, .{ .event_type = "ui_message_delete", .data = event_data, .mode = .custom });
+        } else {
+            hub.broadcast(run_id, .{ .event_type = "ui_message", .data = event_data, .mode = .custom });
+        }
     }
 }
 
-test "Engine: loop step stops at max_iterations" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+/// Apply ui_messages to run state's __ui_messages key using add_messages reducer.
+fn applyUiMessagesToState(alloc: std.mem.Allocator, state_json: []const u8, response_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
 
-    const wf =
-        \\{"steps":[{"id":"loop1","type":"loop","max_iterations":2,"exit_condition":"never_match","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]}
+    const resp_parsed = json.parseFromSlice(json.Value, arena_alloc, response_json, .{}) catch return try alloc.dupe(u8, state_json);
+    if (resp_parsed.value != .object) return try alloc.dupe(u8, state_json);
+    const ui_msgs_val = resp_parsed.value.object.get("ui_messages") orelse return try alloc.dupe(u8, state_json);
+    if (ui_msgs_val != .array) return try alloc.dupe(u8, state_json);
+
+    // Serialize the ui_messages array
+    const ui_msgs_json = serializeJsonValue(arena_alloc, ui_msgs_val) catch return try alloc.dupe(u8, state_json);
+
+    // Build updates: {"__ui_messages": <ui_msgs>}
+    const updates = std.fmt.allocPrint(arena_alloc, "{{\"__ui_messages\":{s}}}", .{ui_msgs_json}) catch return try alloc.dupe(u8, state_json);
+
+    // Build a temporary schema that uses add_messages for __ui_messages
+    const schema =
+        \\{"__ui_messages":{"type":"array","reducer":"add_messages"}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null);
 
-    var engine = Engine.init(&store, allocator, 500);
+    return state_mod.applyUpdates(alloc, state_json, updates, schema) catch try alloc.dupe(u8, state_json);
+}
 
-    // Tick 1: creates iteration 0
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+// ── Stream Messages ──────────────────────────────────────────────────
 
-    // Complete iteration 0 child
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"result0\"}", null, 1);
-    }
+/// Process "stream_messages" from worker response JSON.
+/// For each message: broadcast as a "message" SSE event with step context.
+fn processStreamMessages(hub: *sse_mod.SseHub, alloc: std.mem.Allocator, run_id: []const u8, step_id: []const u8, node_type: []const u8, response_json: []const u8) void {
+    const parsed = json.parseFromSlice(json.Value, alloc, response_json, .{}) catch return;
+    if (parsed.value != .object) return;
+    const stream_msgs_val = parsed.value.object.get("stream_messages") orelse return;
+    if (stream_msgs_val != .array) return;
 
-    // Tick 2: creates iteration 1
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    for (stream_msgs_val.array.items) |msg| {
+        if (msg != .object) continue;
 
-    // Complete iteration 1 child
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        for (children) |child| {
-            if (child.iteration_index == 1) {
-                try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"result1\"}", null, 1);
-            }
+        // Build enriched message with step context
+        var event_obj = json.ObjectMap.init(alloc);
+        var it = msg.object.iterator();
+        while (it.next()) |entry| {
+            event_obj.put(entry.key_ptr.*, entry.value_ptr.*) catch continue;
         }
-    }
-
-    // Tick 3: max_iterations=2 reached (iterations 0,1), loop completes
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+        event_obj.put("step_id", .{ .string = step_id }) catch {};
+        event_obj.put("node_type", .{ .string = node_type }) catch {};
+        const event_data = serializeJsonValue(alloc, .{ .object = event_obj }) catch continue;
 
-    // Loop should be completed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?;
-        try std.testing.expectEqualStrings("completed", loop_step.status);
+        hub.broadcast(run_id, .{ .event_type = "message", .data = event_data, .mode = .custom });
     }
 }
 
-test "Engine: loop step fails when child fails" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"loop1","type":"loop","max_iterations":3,"exit_condition":"done","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null);
+// ── Mermaid Graph Export ─────────────────────────────────────────────
 
-    var engine = Engine.init(&store, allocator, 500);
+/// Generate Mermaid diagram syntax from a workflow JSON definition.
+/// Returns a Mermaid flowchart string.
+pub fn generateMermaid(alloc: std.mem.Allocator, definition_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
 
-    // Tick 1: creates iteration 0
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const parsed = try json.parseFromSlice(json.Value, arena_alloc, definition_json, .{});
+    if (parsed.value != .object) return try alloc.dupe(u8, "graph TD\n");
 
-    // Mark child as failed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        try store.updateStepStatus(children[0].id, "failed", null, null, "child error", 1);
-    }
+    const nodes_val = parsed.value.object.get("nodes") orelse return try alloc.dupe(u8, "graph TD\n");
+    if (nodes_val != .object) return try alloc.dupe(u8, "graph TD\n");
 
-    // Tick 2: loop should fail
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const edges_val = parsed.value.object.get("edges") orelse return try alloc.dupe(u8, "graph TD\n");
+    if (edges_val != .array) return try alloc.dupe(u8, "graph TD\n");
 
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?;
-        try std.testing.expectEqualStrings("failed", loop_step.status);
-    }
-}
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
 
-test "Engine: loop step with multiple body steps chains them" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    // Header
+    try buf.appendSlice(arena_alloc, "graph TD\n");
 
-    const wf =
-        \\{"steps":[{"id":"loop1","type":"loop","max_iterations":1,"exit_condition":"done","body":["s1","s2"]},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"s2","type":"task","prompt_template":"step2"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null);
+    // __start__ and __end__ nodes
+    try buf.appendSlice(arena_alloc, "    __start__((Start))\n");
 
-    var engine = Engine.init(&store, allocator, 500);
+    // Node definitions
+    var nodes_it = nodes_val.object.iterator();
+    while (nodes_it.next()) |entry| {
+        const name = entry.key_ptr.*;
+        const node = entry.value_ptr.*;
 
-    // Tick 1: creates iteration 0 with 2 body steps chained
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+        const node_type_str = blk: {
+            if (node == .object) {
+                if (node.object.get("type")) |t| {
+                    if (t == .string) break :blk t.string;
+                }
+            }
+            break :blk "task";
+        };
 
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_loop");
-        try std.testing.expectEqual(@as(usize, 2), children.len);
-
-        // First child (s1) should be "ready", second (s2) should be "pending"
-        // Children are ordered by item_index ASC
-        var ready_count: usize = 0;
-        var pending_count: usize = 0;
-        for (children) |child| {
-            if (std.mem.eql(u8, child.status, "ready")) ready_count += 1;
-            if (std.mem.eql(u8, child.status, "pending")) pending_count += 1;
+        // Choose Mermaid shape based on node type
+        if (std.mem.eql(u8, node_type_str, "route")) {
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "{");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "\\nroute}\n");
+        } else if (std.mem.eql(u8, node_type_str, "interrupt")) {
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "[/");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "\\ninterrupt/]\n");
+        } else if (std.mem.eql(u8, node_type_str, "send")) {
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "[[");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "\\nsend]]\n");
+        } else if (std.mem.eql(u8, node_type_str, "transform")) {
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "(");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "\\ntransform)\n");
+        } else if (std.mem.eql(u8, node_type_str, "subgraph")) {
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "[");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "\\nsubgraph]\n");
+        } else {
+            // task, agent, and others: rectangle
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "[");
+            try buf.appendSlice(arena_alloc, name);
+            try buf.appendSlice(arena_alloc, "\\n");
+            try buf.appendSlice(arena_alloc, node_type_str);
+            try buf.appendSlice(arena_alloc, "]\n");
+        }
+    }
+
+    // __end__ node
+    try buf.appendSlice(arena_alloc, "    __end__((End))\n");
+
+    // Edges
+    for (edges_val.array.items) |edge_item| {
+        if (edge_item != .array) continue;
+        if (edge_item.array.items.len < 2) continue;
+
+        const source_raw = if (edge_item.array.items[0] == .string) edge_item.array.items[0].string else continue;
+        const target = if (edge_item.array.items[1] == .string) edge_item.array.items[1].string else continue;
+
+        // Parse conditional edge "source:value"
+        if (std.mem.indexOfScalar(u8, source_raw, ':')) |colon_pos| {
+            const source = source_raw[0..colon_pos];
+            const condition = source_raw[colon_pos + 1 ..];
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, source);
+            try buf.appendSlice(arena_alloc, " -->|");
+            try buf.appendSlice(arena_alloc, condition);
+            try buf.appendSlice(arena_alloc, "| ");
+            try buf.appendSlice(arena_alloc, target);
+            try buf.appendSlice(arena_alloc, "\n");
+        } else {
+            try buf.appendSlice(arena_alloc, "    ");
+            try buf.appendSlice(arena_alloc, source_raw);
+            try buf.appendSlice(arena_alloc, " --> ");
+            try buf.appendSlice(arena_alloc, target);
+            try buf.appendSlice(arena_alloc, "\n");
         }
-        try std.testing.expectEqual(@as(usize, 1), ready_count);
-        try std.testing.expectEqual(@as(usize, 1), pending_count);
     }
+
+    return try alloc.dupe(u8, buf.items);
 }
 
-// ── Sub-workflow step tests ──────────────────────────────────────────
+// ── Tests ─────────────────────────────────────────────────────────────
 
-test "Engine: sub_workflow step creates child run" {
+test "Engine: init and stop" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
-    // Parent workflow has a sub_workflow step with inline workflow
-    const wf =
-        \\{"steps":[{"id":"sub1","type":"sub_workflow","workflow":{"steps":[{"id":"inner1","type":"task","prompt_template":"inner work"}]}}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null);
-
     var engine = Engine.init(&store, allocator, 500);
-
-    // Tick 1: creates child run and marks sub_workflow as running
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Verify sub_workflow step is "running" and has child_run_id
-    var child_run_id: []const u8 = undefined;
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?;
-        try std.testing.expectEqualStrings("running", sub_step.status);
-        try std.testing.expect(sub_step.child_run_id != null);
-        child_run_id = try allocator.dupe(u8, sub_step.child_run_id.?);
-    }
-    defer allocator.free(child_run_id);
-
-    // Verify child run exists and has steps
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const child_run = (try store.getRun(arena.allocator(), child_run_id)).?;
-        try std.testing.expectEqualStrings("running", child_run.status);
-
-        const child_steps = try store.getStepsByRun(arena.allocator(), child_run_id);
-        try std.testing.expectEqual(@as(usize, 1), child_steps.len);
-        try std.testing.expectEqualStrings("inner1", child_steps[0].def_step_id);
-        try std.testing.expectEqualStrings("ready", child_steps[0].status);
-    }
+    try std.testing.expect(engine.running.load(.acquire));
+    engine.stop();
+    try std.testing.expect(!engine.running.load(.acquire));
 }
 
-test "Engine: sub_workflow step completes when child run completes" {
+test "Engine: tick with no active runs" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
-    const wf =
-        \\{"steps":[{"id":"sub1","type":"sub_workflow","workflow":{"steps":[{"id":"inner1","type":"task","prompt_template":"inner work"}]}}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null);
-
     var engine = Engine.init(&store, allocator, 500);
-
-    // Tick 1: creates child run
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Get child run ID and manually complete its step + run
-    var child_run_id: []const u8 = undefined;
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?;
-        child_run_id = try allocator.dupe(u8, sub_step.child_run_id.?);
-    }
-    defer allocator.free(child_run_id);
-
-    // Complete the child run's step
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const child_steps = try store.getStepsByRun(arena.allocator(), child_run_id);
-        try store.updateStepStatus(child_steps[0].id, "completed", null, "{\"output\":\"inner result\"}", null, 1);
-    }
-
-    // Mark child run as completed
-    try store.updateRunStatus(child_run_id, "completed", null);
-
-    // Tick 2: sub_workflow should detect child run completed and complete itself
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Verify sub_workflow step completed with child's output
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?;
-        try std.testing.expectEqualStrings("completed", sub_step.status);
-        try std.testing.expect(sub_step.output_json != null);
-        try std.testing.expect(std.mem.indexOf(u8, sub_step.output_json.?, "inner result") != null);
-    }
+    try engine.tick();
 }
 
-test "Engine: sub_workflow step fails when child run fails" {
+test "engine: find ready nodes - simple chain" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
 
+    // Edges: __start__ -> a -> b -> __end__
     const wf =
-        \\{"steps":[{"id":"sub1","type":"sub_workflow","workflow":{"steps":[{"id":"inner1","type":"task","prompt_template":"inner work"}]}}]}
+        \\{"nodes":{"a":{"type":"task"},"b":{"type":"task"}},"edges":[["__start__","a"],["a","b"],["b","__end__"]],"schema":{}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    // Tick 1: creates child run
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
 
-    // Get child run ID
-    var child_run_id: []const u8 = undefined;
+    // Completed: [] -> ready: [a]
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?;
-        child_run_id = try allocator.dupe(u8, sub_step.child_run_id.?);
+        var completed = std.StringHashMap(void).init(alloc);
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        try std.testing.expectEqual(@as(usize, 1), ready.len);
+        try std.testing.expectEqualStrings("a", ready[0]);
     }
-    defer allocator.free(child_run_id);
 
-    // Mark child run as failed
-    try store.updateRunStatus(child_run_id, "failed", "inner step failed");
-
-    // Tick 2: sub_workflow should detect child run failed
+    // Completed: [a] -> ready: [b]
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
+        var completed = std.StringHashMap(void).init(alloc);
+        try completed.put("a", {});
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        try std.testing.expectEqual(@as(usize, 1), ready.len);
+        try std.testing.expectEqualStrings("b", ready[0]);
     }
 
-    // Verify sub_workflow step failed
+    // Completed: [a, b] -> ready: [__end__]
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?;
-        try std.testing.expectEqualStrings("failed", sub_step.status);
-        try std.testing.expect(sub_step.error_text != null);
+        var completed = std.StringHashMap(void).init(alloc);
+        try completed.put("a", {});
+        try completed.put("b", {});
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        try std.testing.expectEqual(@as(usize, 1), ready.len);
+        try std.testing.expectEqualStrings("__end__", ready[0]);
     }
 }
 
-test "Engine: sub_workflow step fails without workflow" {
+test "engine: find ready nodes - parallel" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
 
+    // Edges: __start__ -> a, __start__ -> b, a -> c, b -> c
     const wf =
-        \\{"steps":[{"id":"sub1","type":"sub_workflow"}]}
+        \\{"nodes":{"a":{"type":"task"},"b":{"type":"task"},"c":{"type":"task"}},"edges":[["__start__","a"],["__start__","b"],["a","c"],["b","c"]],"schema":{}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null);
 
-    var engine = Engine.init(&store, allocator, 500);
+    // Completed: [] -> ready: [a, b]
+    {
+        var completed = std.StringHashMap(void).init(alloc);
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        try std.testing.expectEqual(@as(usize, 2), ready.len);
+        // Both a and b should be ready (order may vary)
+        var has_a = false;
+        var has_b = false;
+        for (ready) |name| {
+            if (std.mem.eql(u8, name, "a")) has_a = true;
+            if (std.mem.eql(u8, name, "b")) has_b = true;
+        }
+        try std.testing.expect(has_a);
+        try std.testing.expect(has_b);
+    }
 
+    // Completed: [a] -> ready: [] (c needs both a and b)
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
+        var completed = std.StringHashMap(void).init(alloc);
+        try completed.put("a", {});
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        // b is already in completed? No. So b should be ready
+        // Wait - b is from __start__ and __start__ is always completed
+        // b should be ready since its only inbound is __start__
+        // But if we only put "a" as completed, b's inbound __start__ is always satisfied
+        // So b should be ready. And c should NOT be ready since b is not completed.
+        var has_c = false;
+        for (ready) |name| {
+            if (std.mem.eql(u8, name, "c")) has_c = true;
+        }
+        try std.testing.expect(!has_c);
     }
 
+    // Completed: [a, b] -> ready: [c]
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?;
-        try std.testing.expectEqualStrings("failed", sub_step.status);
+        var completed = std.StringHashMap(void).init(alloc);
+        try completed.put("a", {});
+        try completed.put("b", {});
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        try std.testing.expectEqual(@as(usize, 1), ready.len);
+        try std.testing.expectEqualStrings("c", ready[0]);
     }
 }
 
-test "Engine: loop step fails without body" {
+test "engine: find ready nodes - route edges" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
 
+    // Edges: __start__ -> r, r:yes -> a, r:no -> b
     const wf =
-        \\{"steps":[{"id":"loop1","type":"loop","max_iterations":3,"exit_condition":"done"}]}
+        \\{"nodes":{"r":{"type":"route"},"a":{"type":"task"},"b":{"type":"task"}},"edges":[["__start__","r"],["r:yes","a"],["r:no","b"]],"schema":{}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null);
 
-    var engine = Engine.init(&store, allocator, 500);
+    // Completed: [r] with route result "yes" -> ready: [a]
+    {
+        var completed = std.StringHashMap(void).init(alloc);
+        try completed.put("r", {});
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        try routes.put("r", "yes");
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        try std.testing.expectEqual(@as(usize, 1), ready.len);
+        try std.testing.expectEqualStrings("a", ready[0]);
+    }
 
+    // Completed: [r] with route result "no" -> ready: [b]
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
+        var completed = std.StringHashMap(void).init(alloc);
+        try completed.put("r", {});
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        try routes.put("r", "no");
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        try std.testing.expectEqual(@as(usize, 1), ready.len);
+        try std.testing.expectEqualStrings("b", ready[0]);
     }
 
+    // Completed: [r] with route result "yes" -> b should NOT be ready
     {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?;
-        try std.testing.expectEqualStrings("failed", loop_step.status);
+        var completed = std.StringHashMap(void).init(alloc);
+        try completed.put("r", {});
+        var routes = std.StringHashMap([]const u8).init(alloc);
+        try routes.put("r", "yes");
+        const ready = try findReadyNodes(alloc, wf, &completed, &routes);
+        for (ready) |name| {
+            try std.testing.expect(!std.mem.eql(u8, name, "b"));
+        }
     }
 }
 
-// ── Debate step tests ────────────────────────────────────────────────
-
-test "Engine: debate step creates participant children" {
+test "engine: processRun completes simple workflow" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
+    // Create a workflow with just a transform node
     const wf =
-        \\{"steps":[{"id":"review","type":"debate","count":2,"worker_tags":["reviewer"],"judge_tags":["senior"],"prompt_template":"Review this code","judge_template":"Pick the best:\n{{debate_responses}}"}]}
+        \\{"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"done\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null);
+
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
@@ -4305,30 +2939,26 @@ test "Engine: debate step creates participant children" {
     const run_row = (try store.getRun(arena.allocator(), "r1")).?;
     try engine.processRun(arena.allocator(), run_row);
 
-    // Debate step should be "running"
-    const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?;
-    try std.testing.expectEqualStrings("running", debate_step.status);
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    try std.testing.expectEqualStrings("completed", updated_run.status);
 
-    // Should have 2 participant children
-    const children = try store.getChildSteps(arena.allocator(), "step_debate");
-    try std.testing.expectEqual(@as(usize, 2), children.len);
-
-    for (children) |child| {
-        try std.testing.expectEqualStrings("ready", child.status);
-        try std.testing.expectEqualStrings("task", child.type);
+    // Verify state was updated
+    if (updated_run.state_json) |sj| {
+        try std.testing.expect(std.mem.indexOf(u8, sj, "done") != null);
     }
 }
 
-test "Engine: debate step fails without count" {
+test "engine: interrupt node stops run" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
     const wf =
-        \\{"steps":[{"id":"review","type":"debate","prompt_template":"Review this"}]}
+        \\{"nodes":{"i1":{"type":"interrupt"}},"edges":[["__start__","i1"],["i1","__end__"]],"schema":{}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null);
+
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
@@ -4338,290 +2968,394 @@ test "Engine: debate step fails without count" {
     const run_row = (try store.getRun(arena.allocator(), "r1")).?;
     try engine.processRun(arena.allocator(), run_row);
 
-    const step = (try store.getStep(arena.allocator(), "step_debate")).?;
-    try std.testing.expectEqualStrings("failed", step.status);
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    try std.testing.expectEqualStrings("interrupted", updated_run.status);
 }
 
-test "Engine: debate step fails without prompt_template" {
+test "engine: route node with conditional edges" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
+    // Workflow: start -> route -> (yes: t_yes, no: t_no) -> end
     const wf =
-        \\{"steps":[{"id":"review","type":"debate","count":2}]}
+        \\{"nodes":{"r":{"type":"route","input":"state.decision"},"t_yes":{"type":"transform","updates":"{\"path\":\"yes\"}"},"t_no":{"type":"transform","updates":"{\"path\":\"no\"}"}},"edges":[["__start__","r"],["r:yes","t_yes"],["r:no","t_no"],["t_yes","__end__"],["t_no","__end__"]],"schema":{"decision":{"type":"string","reducer":"last_value"},"path":{"type":"string","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null);
+
+    const init_state =
+        \\{"decision":"yes"}
+    ;
+
+    try store.createRunWithState("r1", null, wf, "{}", init_state);
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
     var arena = std.heap.ArenaAllocator.init(allocator);
     defer arena.deinit();
 
+    // First tick: route node executes and completes
     const run_row = (try store.getRun(arena.allocator(), "r1")).?;
     try engine.processRun(arena.allocator(), run_row);
 
-    const step = (try store.getStep(arena.allocator(), "step_debate")).?;
-    try std.testing.expectEqualStrings("failed", step.status);
+    // May need a second tick to process t_yes and __end__
+    const run_row2 = (try store.getRun(arena.allocator(), "r1")).?;
+    if (std.mem.eql(u8, run_row2.status, "running")) {
+        try engine.processRun(arena.allocator(), run_row2);
+    }
+
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    try std.testing.expectEqualStrings("completed", updated_run.status);
+
+    // Verify the "yes" path was taken
+    if (updated_run.state_json) |sj| {
+        try std.testing.expect(std.mem.indexOf(u8, sj, "yes") != null);
+    }
 }
 
-test "Engine: debate step creates judge after participants complete" {
+test "engine: route node falls back to declared default route" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
     const wf =
-        \\{"steps":[{"id":"review","type":"debate","count":2,"worker_tags":["reviewer"],"judge_tags":["senior"],"prompt_template":"Review this code","judge_template":"Pick the best:\n{{debate_responses}}"}]}
+        \\{"nodes":{"r":{"type":"route","input":"state.decision","routes":{"yes":"t_yes","fallback":"t_fallback"},"default":"fallback"},"t_yes":{"type":"transform","updates":"{\"path\":\"yes\"}"},"t_fallback":{"type":"transform","updates":"{\"path\":\"fallback\"}"}},"edges":[["__start__","r"],["r:yes","t_yes"],["r:fallback","t_fallback"],["t_yes","__end__"],["t_fallback","__end__"]],"schema":{"decision":{"type":"string","reducer":"last_value"},"path":{"type":"string","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null);
+
+    try store.createRunWithState("r1", null, wf, "{}", "{\"decision\":\"unknown\"}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
-    // Tick 1: creates participant children
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Complete both participant children
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_debate");
-        try std.testing.expectEqual(@as(usize, 2), children.len);
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"review A\"}", null, 1);
-        try store.updateStepStatus(children[1].id, "completed", null, "{\"output\":\"review B\"}", null, 1);
-    }
+    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
+    try engine.processRun(arena.allocator(), run_row);
 
-    // Tick 2: should create judge child
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
+    const run_row2 = (try store.getRun(arena.allocator(), "r1")).?;
+    if (std.mem.eql(u8, run_row2.status, "running")) {
+        try engine.processRun(arena.allocator(), run_row2);
     }
 
-    // Should now have 3 children (2 participants + 1 judge)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_debate");
-        try std.testing.expectEqual(@as(usize, 3), children.len);
-
-        // Find judge child
-        var found_judge = false;
-        for (children) |child| {
-            if (std.mem.indexOf(u8, child.def_step_id, "_judge") != null) {
-                found_judge = true;
-                try std.testing.expectEqualStrings("ready", child.status);
-                try std.testing.expectEqualStrings("task", child.type);
-            }
-        }
-        try std.testing.expect(found_judge);
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    try std.testing.expectEqualStrings("completed", updated_run.status);
+    if (updated_run.state_json) |sj| {
+        try std.testing.expect(std.mem.indexOf(u8, sj, "fallback") != null);
     }
 }
 
-test "Engine: debate step completes when judge completes" {
+test "wrapOutput creates valid JSON" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const result = try wrapOutput(arena.allocator(), "hello world");
+    try std.testing.expectEqualStrings("{\"output\":\"hello world\"}", result);
+}
+
+test "wrapOutput escapes special characters" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const result = try wrapOutput(arena.allocator(), "line1\nline2");
+    try std.testing.expectEqualStrings("{\"output\":\"line1\\nline2\"}", result);
+}
+
+test "serializeCompletedNodes" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
+
+    var completed = std.StringHashMap(void).init(alloc);
+    try completed.put("a", {});
+    try completed.put("b", {});
+
+    const result = try serializeCompletedNodes(alloc, &completed);
+    // Should be a JSON array containing "a" and "b"
+    try std.testing.expect(std.mem.indexOf(u8, result, "\"a\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "\"b\"") != null);
+}
+
+test "getNodeJson returns node definition" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
     const wf =
-        \\{"steps":[{"id":"review","type":"debate","count":2,"prompt_template":"Review this","judge_template":"Pick best: {{debate_responses}}"}]}
+        \\{"nodes":{"a":{"type":"task","prompt_template":"hello"}},"edges":[]}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null);
+    const result = getNodeJson(arena.allocator(), wf, "a");
+    try std.testing.expect(result != null);
+    try std.testing.expect(std.mem.indexOf(u8, result.?, "task") != null);
+}
 
-    var engine = Engine.init(&store, allocator, 500);
+test "getNodeJson returns null for missing node" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 1: creates participant children
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const wf =
+        \\{"nodes":{"a":{"type":"task"}},"edges":[]}
+    ;
+    const result = getNodeJson(arena.allocator(), wf, "b");
+    try std.testing.expect(result == null);
+}
 
-    // Complete participants
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_debate");
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"A\"}", null, 1);
-        try store.updateStepStatus(children[1].id, "completed", null, "{\"output\":\"B\"}", null, 1);
-    }
+test "getNodeField extracts string field" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 2: creates judge child
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const node =
+        \\{"type":"task","prompt_template":"hello {{state.name}}"}
+    ;
+    const result = getNodeField(arena.allocator(), node, "prompt_template");
+    try std.testing.expect(result != null);
+    try std.testing.expectEqualStrings("hello {{state.name}}", result.?);
+}
 
-    // Complete the judge child
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_debate");
-        for (children) |child| {
-            if (std.mem.indexOf(u8, child.def_step_id, "_judge") != null) {
-                try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"A is best\"}", null, 1);
-            }
-        }
-    }
+test "extractStateUpdates from worker response" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 3: debate should be completed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const output =
+        \\{"state_updates":{"result":"done","count":5},"other":"ignored"}
+    ;
+    const result = extractStateUpdates(arena.allocator(), output);
+    try std.testing.expect(result != null);
+    try std.testing.expect(std.mem.indexOf(u8, result.?, "done") != null);
+}
 
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?;
-        try std.testing.expectEqualStrings("completed", debate_step.status);
-        try std.testing.expect(debate_step.output_json != null);
-        try std.testing.expect(std.mem.indexOf(u8, debate_step.output_json.?, "A is best") != null);
-    }
+test "extractStateUpdates returns null for plain text" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const result = extractStateUpdates(arena.allocator(), "just plain text");
+    try std.testing.expect(result == null);
 }
 
-test "Engine: debate step completes without judge_template" {
+test "buildTaskStateUpdates uses output_key for plain text output" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // No judge_template — should complete with collected responses when participants are done
-    const wf =
-        \\{"steps":[{"id":"review","type":"debate","count":2,"prompt_template":"Review this"}]}
+    const node =
+        \\{"type":"task","output_key":"plan"}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null);
+    const result = try buildTaskStateUpdates(arena.allocator(), node, "draft plan");
+    try std.testing.expectEqualStrings("{\"plan\":\"draft plan\"}", result);
+}
 
-    var engine = Engine.init(&store, allocator, 500);
+test "buildTaskStateUpdates applies output_mapping from JSON output" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 1: creates participant children
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const node =
+        \\{"type":"task","output_key":"review_result","output_mapping":{"grade":"grade","feedback":"details.feedback"}}
+    ;
+    const output =
+        \\{"grade":"approve","details":{"feedback":"looks good"}}
+    ;
+    const result = try buildTaskStateUpdates(arena.allocator(), node, output);
+    try std.testing.expect(std.mem.indexOf(u8, result, "\"review_result\":{\"grade\":\"approve\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "\"grade\":\"approve\"") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "\"feedback\":\"looks good\"") != null);
+}
 
-    // Complete participants
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_debate");
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"review 1\"}", null, 1);
-        try store.updateStepStatus(children[1].id, "completed", null, "{\"output\":\"review 2\"}", null, 1);
-    }
+test "getSendItemsPath prefers canonical items_key" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 2: no judge_template, should complete with responses
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const node =
+        \\{"type":"send","items_key":"state.files","items_from":"state.legacy"}
+    ;
+    const result = getSendItemsPath(arena.allocator(), node);
+    try std.testing.expect(result != null);
+    try std.testing.expectEqualStrings("state.files", result.?);
+}
 
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?;
-        try std.testing.expectEqualStrings("completed", debate_step.status);
-        try std.testing.expect(debate_step.output_json != null);
-    }
+test "getSendItemsPath accepts legacy items_from alias" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const node =
+        \\{"type":"send","items_from":"state.files"}
+    ;
+    const result = getSendItemsPath(arena.allocator(), node);
+    try std.testing.expect(result != null);
+    try std.testing.expectEqualStrings("state.files", result.?);
 }
 
-test "Engine: debate step fails when participant fails" {
+test "extractGotoTargets: string target" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    const wf =
-        \\{"steps":[{"id":"review","type":"debate","count":2,"prompt_template":"Review this","judge_template":"Pick: {{debate_responses}}"}]}
+    const output =
+        \\{"state_updates":{"x":1},"goto":"merge_step"}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null);
+    const targets = extractGotoTargets(arena.allocator(), output);
+    try std.testing.expect(targets != null);
+    try std.testing.expectEqual(@as(usize, 1), targets.?.len);
+    try std.testing.expectEqualStrings("merge_step", targets.?[0]);
+}
 
-    var engine = Engine.init(&store, allocator, 500);
+test "extractGotoTargets: array targets" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 1: creates participant children
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const output =
+        \\{"goto":["step_a","step_b"]}
+    ;
+    const targets = extractGotoTargets(arena.allocator(), output);
+    try std.testing.expect(targets != null);
+    try std.testing.expectEqual(@as(usize, 2), targets.?.len);
+    try std.testing.expectEqualStrings("step_a", targets.?[0]);
+    try std.testing.expectEqualStrings("step_b", targets.?[1]);
+}
 
-    // Fail one participant
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_debate");
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"review A\"}", null, 1);
-        try store.updateStepStatus(children[1].id, "failed", null, null, "worker error", 1);
-    }
+test "extractGotoTargets: no goto field" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 2: debate should fail
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const targets = extractGotoTargets(arena.allocator(), "{\"state_updates\":{}}");
+    try std.testing.expect(targets == null);
+}
 
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?;
-        try std.testing.expectEqualStrings("failed", debate_step.status);
-    }
+test "extractGotoTargets: not JSON" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const targets = extractGotoTargets(arena.allocator(), "plain text");
+    try std.testing.expect(targets == null);
 }
 
-// ── Group chat step tests ────────────────────────────────────────────
+test "parseBreakpointList: valid list" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const wf =
+        \\{"interrupt_before":["review","merge"],"interrupt_after":["generate"],"nodes":{},"edges":[]}
+    ;
+    const before = parseBreakpointList(arena.allocator(), wf, "interrupt_before");
+    try std.testing.expectEqual(@as(usize, 2), before.len);
+    try std.testing.expectEqualStrings("review", before[0]);
+    try std.testing.expectEqualStrings("merge", before[1]);
+
+    const after = parseBreakpointList(arena.allocator(), wf, "interrupt_after");
+    try std.testing.expectEqual(@as(usize, 1), after.len);
+    try std.testing.expectEqualStrings("generate", after[0]);
+}
 
-test "Engine: group_chat step parses participants and starts" {
+test "parseBreakpointList: missing field" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
     const wf =
-        \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["architect"],"role":"Architect"},{"tags":["security"],"role":"Security"}],"max_rounds":3,"exit_condition":"CONSENSUS","prompt_template":"Discuss: topic","round_template":"Previous:\n{{chat_history}}\nYour role: {{role}}. Respond."}]}
+        \\{"nodes":{},"edges":[]}
+    ;
+    const result = parseBreakpointList(arena.allocator(), wf, "interrupt_before");
+    try std.testing.expectEqual(@as(usize, 0), result.len);
+}
+
+test "isInBreakpointList" {
+    const list = [_][]const u8{ "review", "merge" };
+    try std.testing.expect(isInBreakpointList("review", &list));
+    try std.testing.expect(isInBreakpointList("merge", &list));
+    try std.testing.expect(!isInBreakpointList("build", &list));
+}
+
+test "getNodeFieldInt: valid integer" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const node =
+        \\{"type":"agent","max_turns":10}
+    ;
+    const result = getNodeFieldInt(arena.allocator(), node, "max_turns");
+    try std.testing.expect(result != null);
+    try std.testing.expectEqual(@as(i64, 10), result.?);
+}
+
+test "getNodeFieldInt: missing field" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const node =
+        \\{"type":"task"}
+    ;
+    const result = getNodeFieldInt(arena.allocator(), node, "max_turns");
+    try std.testing.expect(result == null);
+}
+
+test "getNodeFieldInt: string field returns null" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const node =
+        \\{"type":"task","max_turns":"five"}
+    ;
+    const result = getNodeFieldInt(arena.allocator(), node, "max_turns");
+    try std.testing.expect(result == null);
+}
+
+test "buildSubgraphInput: maps values from parent state" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+    const alloc = arena.allocator();
+
+    const parent_state =
+        \\{"fix_result":"patched code","count":42}
+    ;
+    const mapping =
+        \\{"code":"state.fix_result"}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_gc", "r1", "discuss", "group_chat", "ready", "{}", 1, null, null, null);
 
-    var engine = Engine.init(&store, allocator, 500);
+    const result = try buildSubgraphInput(alloc, parent_state, mapping);
+    const parsed = try json.parseFromSlice(json.Value, alloc, result, .{});
+    try std.testing.expect(parsed.value == .object);
+    const code = parsed.value.object.get("code") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("patched code", code.string);
+}
 
+test "buildSubgraphInput: empty mapping" {
+    const allocator = std.testing.allocator;
     var arena = std.heap.ArenaAllocator.init(allocator);
     defer arena.deinit();
 
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
-
-    // group_chat step should be "running"
-    const gc_step = (try store.getStep(arena.allocator(), "step_gc")).?;
-    try std.testing.expectEqualStrings("running", gc_step.status);
+    const result = try buildSubgraphInput(arena.allocator(), "{\"x\":1}", "{}");
+    try std.testing.expectEqualStrings("{}", result);
 }
 
-test "Engine: group_chat step fails without participants" {
+test "engine: breakpoint interrupt_before stops run" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
+    // Workflow with interrupt_before on t1
     const wf =
-        \\{"steps":[{"id":"discuss","type":"group_chat","prompt_template":"Discuss"}]}
+        \\{"interrupt_before":["t1"],"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"done\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_gc", "r1", "discuss", "group_chat", "ready", "{}", 1, null, null, null);
+
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
@@ -4631,20 +3365,23 @@ test "Engine: group_chat step fails without participants" {
     const run_row = (try store.getRun(arena.allocator(), "r1")).?;
     try engine.processRun(arena.allocator(), run_row);
 
-    const step = (try store.getStep(arena.allocator(), "step_gc")).?;
-    try std.testing.expectEqualStrings("failed", step.status);
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    // Should be interrupted, not completed, because interrupt_before fires before t1
+    try std.testing.expectEqualStrings("interrupted", updated_run.status);
 }
 
-test "Engine: group_chat step fails without prompt_template" {
+test "engine: breakpoint interrupt_after stops run after node" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
+    // Workflow with interrupt_after on t1; there's a t2 after t1
     const wf =
-        \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["a"],"role":"A"}]}]}
+        \\{"interrupt_after":["t1"],"nodes":{"t1":{"type":"transform","updates":"{\"x\":\"done\"}"},"t2":{"type":"transform","updates":"{\"y\":\"also\"}"}},"edges":[["__start__","t1"],["t1","t2"],["t2","__end__"]],"schema":{"x":{"type":"string","reducer":"last_value"},"y":{"type":"string","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_gc", "r1", "discuss", "group_chat", "ready", "{}", 1, null, null, null);
+
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
@@ -4654,130 +3391,105 @@ test "Engine: group_chat step fails without prompt_template" {
     const run_row = (try store.getRun(arena.allocator(), "r1")).?;
     try engine.processRun(arena.allocator(), run_row);
 
-    const step = (try store.getStep(arena.allocator(), "step_gc")).?;
-    try std.testing.expectEqualStrings("failed", step.status);
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    // t1 should have executed (state contains x), but run is interrupted
+    try std.testing.expectEqualStrings("interrupted", updated_run.status);
+    // Verify t1's state was saved
+    if (updated_run.state_json) |sj| {
+        try std.testing.expect(std.mem.indexOf(u8, sj, "done") != null);
+    }
 }
 
-test "Engine: group_chat builds chat history across rounds" {
+test "engine: configurable runs inject __config" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
-    // Manually insert chat messages and test the poll logic
+    // Workflow with a transform that sets result
     const wf =
-        \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["a"],"role":"Architect"},{"tags":["b"],"role":"Security"}],"max_rounds":2,"exit_condition":"CONSENSUS","prompt_template":"Discuss topic","round_template":"Previous:\n{{chat_history}}\nYour role: {{role}}. Respond."}]}
+        \\{"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"ok\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"},"__config":{"type":"object","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_gc", "r1", "discuss", "group_chat", "running", "{}", 1, null, null, null);
 
-    // Insert round 1 messages (simulating what dispatch would produce)
-    try store.insertChatMessage("r1", "step_gc", 1, "Architect", null, "I suggest microservices");
-    try store.insertChatMessage("r1", "step_gc", 1, "Security", null, "We need auth first");
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.setConfigJson("r1", "{\"model\":\"gpt-4\"}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
-    // Poll: round 1 complete, no CONSENSUS, max_rounds=2, so it should try round 2
-    // Since no workers, dispatch will fail silently. Then next poll round_count stays at 2 for round 1.
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Step should still be running (no workers to dispatch round 2)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const step = (try store.getStep(arena.allocator(), "step_gc")).?;
-        try std.testing.expectEqualStrings("running", step.status);
-    }
-
-    // Simulate round 2 messages with CONSENSUS
-    try store.insertChatMessage("r1", "step_gc", 2, "Architect", null, "CONSENSUS reached");
-    try store.insertChatMessage("r1", "step_gc", 2, "Security", null, "Agreed, CONSENSUS");
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Poll: round 2 complete with CONSENSUS, should complete
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
+    try engine.processRun(arena.allocator(), run_row);
 
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const step = (try store.getStep(arena.allocator(), "step_gc")).?;
-        try std.testing.expectEqualStrings("completed", step.status);
-        try std.testing.expect(step.output_json != null);
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    try std.testing.expectEqualStrings("completed", updated_run.status);
+    // Verify __config was injected into state
+    if (updated_run.state_json) |sj| {
+        try std.testing.expect(std.mem.indexOf(u8, sj, "__config") != null);
+        try std.testing.expect(std.mem.indexOf(u8, sj, "gpt-4") != null);
     }
 }
 
-test "Engine: group_chat completes at max_rounds" {
+test "engine: transform store_updates uses trusted tracker settings" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
+    test_store_write_base_url = "";
+    test_store_write_api_token = null;
+    test_store_write_namespace = "";
+    test_store_write_key = "";
+    test_store_write_value_json = "";
+
     const wf =
-        \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["a"],"role":"A"},{"tags":["b"],"role":"B"}],"max_rounds":1,"exit_condition":"NEVER_MATCH","prompt_template":"Discuss","round_template":"{{chat_history}} {{role}}"}]}
+        \\{"nodes":{"save":{"type":"transform","updates":"{\"review_result\":{\"grade\":\"approved\"}}","store_updates":{"namespace":"project_context","key":"latest_review","value":"state.review_result"}}},"edges":[["__start__","save"],["save","__end__"]],"schema":{"review_result":{"type":"object","reducer":"last_value"},"__config":{"type":"object","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_gc", "r1", "discuss", "group_chat", "running", "{}", 1, null, null, null);
 
-    // Insert round 1 messages (no exit condition match)
-    try store.insertChatMessage("r1", "step_gc", 1, "A", null, "hello");
-    try store.insertChatMessage("r1", "step_gc", 1, "B", null, "world");
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
+    engine.store_writer = mockStoreWriter;
+    engine.setTrustedTrackerAccess("http://tickets.test", "secret-token");
 
-    // Poll: round 1 complete, no exit match, max_rounds=1, should complete
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const step = (try store.getStep(arena.allocator(), "step_gc")).?;
-        try std.testing.expectEqualStrings("completed", step.status);
-    }
-}
-
-test "buildChatTranscript formats messages" {
-    const allocator = std.testing.allocator;
     var arena = std.heap.ArenaAllocator.init(allocator);
     defer arena.deinit();
 
-    const messages = [_]types.ChatMessageRow{
-        .{ .id = 1, .run_id = "r1", .step_id = "s1", .round = 1, .role = "Architect", .worker_id = null, .message = "hello", .ts_ms = 1000 },
-        .{ .id = 2, .run_id = "r1", .step_id = "s1", .round = 1, .role = "Security", .worker_id = null, .message = "world", .ts_ms = 1001 },
-    };
+    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
+    try engine.processRun(arena.allocator(), run_row);
 
-    const transcript = try buildChatTranscript(arena.allocator(), &messages);
-    try std.testing.expect(std.mem.indexOf(u8, transcript, "Architect") != null);
-    try std.testing.expect(std.mem.indexOf(u8, transcript, "Security") != null);
-    try std.testing.expect(std.mem.indexOf(u8, transcript, "hello") != null);
-    try std.testing.expect(std.mem.indexOf(u8, transcript, "world") != null);
+    const updated_run = (try store.getRun(arena.allocator(), "r1")).?;
+    try std.testing.expectEqualStrings("completed", updated_run.status);
+    try std.testing.expectEqualStrings("http://tickets.test", test_store_write_base_url);
+    try std.testing.expect(test_store_write_api_token != null);
+    try std.testing.expectEqualStrings("secret-token", test_store_write_api_token.?);
+    try std.testing.expectEqualStrings("project_context", test_store_write_namespace);
+    try std.testing.expectEqualStrings("latest_review", test_store_write_key);
+    try std.testing.expectEqualStrings("{\"grade\":\"approved\"}", test_store_write_value_json);
 }
 
-// ── Saga step tests ──────────────────────────────────────────────────
-
-test "Engine: saga step creates first body child and initializes state" {
+test "engine: workflow cannot override trusted tracker settings" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
+    test_store_write_base_url = "";
+    test_store_write_api_token = null;
+    test_store_write_namespace = "";
+    test_store_write_key = "";
+    test_store_write_value_json = "";
+
     const wf =
-        \\{"steps":[{"id":"deploy_saga","type":"saga","body":["provision","deploy","verify"],"compensations":{"provision":"deprovision","deploy":"rollback_deploy"}},{"id":"provision","type":"task","prompt_template":"provision"},{"id":"deploy","type":"task","prompt_template":"deploy"},{"id":"verify","type":"task","prompt_template":"verify"},{"id":"deprovision","type":"task","prompt_template":"deprovision"},{"id":"rollback_deploy","type":"task","prompt_template":"rollback"}]}
+        \\{"tracker_url":"http://evil.test","tracker_api_token":"evil-token","nodes":{"save":{"type":"transform","updates":"{\"review_result\":{\"grade\":\"approved\"}}","store_updates":{"namespace":"project_context","key":"latest_review","value":"state.review_result"}}},"edges":[["__start__","save"],["save","__end__"]],"schema":{"review_result":{"type":"object","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_saga", "r1", "deploy_saga", "saga", "ready", "{}", 1, null, null, null);
+
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
+    engine.store_writer = mockStoreWriter;
+    engine.setTrustedTrackerAccess("http://tickets.test", "secret-token");
 
     var arena = std.heap.ArenaAllocator.init(allocator);
     defer arena.deinit();
@@ -4785,285 +3497,114 @@ test "Engine: saga step creates first body child and initializes state" {
     const run_row = (try store.getRun(arena.allocator(), "r1")).?;
     try engine.processRun(arena.allocator(), run_row);
 
-    // Saga step should be "running"
-    const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?;
-    try std.testing.expectEqualStrings("running", saga_step.status);
-
-    // Should have created 1 child step (first body step)
-    const children = try store.getChildSteps(arena.allocator(), "step_saga");
-    try std.testing.expectEqual(@as(usize, 1), children.len);
-    try std.testing.expectEqualStrings("provision", children[0].def_step_id);
-    try std.testing.expectEqualStrings("ready", children[0].status);
-
-    // Should have saga_state entries
-    const saga_states = try store.getSagaStates(arena.allocator(), "r1", "step_saga");
-    try std.testing.expectEqual(@as(usize, 3), saga_states.len);
-    try std.testing.expectEqualStrings("pending", saga_states[0].status);
-    try std.testing.expectEqualStrings("pending", saga_states[1].status);
-    try std.testing.expectEqualStrings("pending", saga_states[2].status);
+    try std.testing.expectEqualStrings("http://tickets.test", test_store_write_base_url);
+    try std.testing.expect(test_store_write_api_token != null);
+    try std.testing.expectEqualStrings("secret-token", test_store_write_api_token.?);
 }
 
-test "Engine: saga step executes body sequentially and completes" {
-    const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"saga1","type":"saga","body":["s1","s2"],"compensations":{"s1":"c1"}},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"s2","type":"task","prompt_template":"step2"},{"id":"c1","type":"task","prompt_template":"comp1"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    // Tick 1: creates first body child (s1)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Complete first body child (s1)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        try std.testing.expectEqual(@as(usize, 1), children.len);
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"provisioned\"}", null, 1);
-    }
-
-    // Tick 2: detects s1 completed, creates s2
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+test "encodePathSegment percent-encodes reserved characters" {
+    const encoded = try encodePathSegment(std.testing.allocator, "task/alpha beta");
+    defer std.testing.allocator.free(encoded);
 
-    // Should now have 2 children
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        try std.testing.expectEqual(@as(usize, 2), children.len);
-    }
+    try std.testing.expectEqualStrings("task%2Falpha%20beta", encoded);
+}
 
-    // Complete second body child (s2)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        for (children) |child| {
-            if (std.mem.eql(u8, child.def_step_id, "s2")) {
-                try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"deployed\"}", null, 1);
-            }
-        }
-    }
+test "getWorkflowVersion: extracts version" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 3: detects s2 completed, all body steps done, saga completes
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    try std.testing.expectEqual(@as(i64, 2), getWorkflowVersion(arena.allocator(), "{\"version\":2,\"nodes\":{}}"));
+    try std.testing.expectEqual(@as(i64, 1), getWorkflowVersion(arena.allocator(), "{\"nodes\":{}}"));
+    try std.testing.expectEqual(@as(i64, 1), getWorkflowVersion(arena.allocator(), "invalid"));
+}
 
-    // Tick 4: saga polls — should now detect all completed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+test "getCheckpointWorkflowVersion: extracts from metadata" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Saga should be completed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?;
-        try std.testing.expectEqualStrings("completed", saga_step.status);
-        try std.testing.expect(saga_step.output_json != null);
-    }
+    try std.testing.expectEqual(@as(i64, 3), getCheckpointWorkflowVersion(arena.allocator(), "{\"workflow_version\":3}"));
+    try std.testing.expectEqual(@as(i64, 1), getCheckpointWorkflowVersion(arena.allocator(), "{\"route_results\":{}}"));
+    try std.testing.expectEqual(@as(i64, 1), getCheckpointWorkflowVersion(arena.allocator(), null));
 }
 
-test "Engine: saga step runs compensation in reverse on failure" {
+test "migrateCompletedNodes: filters removed nodes" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+
+    const alloc = arena.allocator();
+    var completed = std.StringHashMap(void).init(alloc);
+    try completed.put("analyze", {});
+    try completed.put("old_node", {});
+    try completed.put("__start__", {});
 
     const wf =
-        \\{"steps":[{"id":"saga1","type":"saga","body":["s1","s2"],"compensations":{"s1":"c1","s2":"c2"}},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"s2","type":"task","prompt_template":"step2"},{"id":"c1","type":"task","prompt_template":"comp1"},{"id":"c2","type":"task","prompt_template":"comp2"}]}
+        \\{"nodes":{"analyze":{"type":"task"},"new_node":{"type":"task"}},"edges":[]}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
 
-    // Tick 1: creates first body child (s1)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Complete first body child (s1)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"provisioned\"}", null, 1);
-    }
-
-    // Tick 2: creates s2
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Fail second body child (s2)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        for (children) |child| {
-            if (std.mem.eql(u8, child.def_step_id, "s2")) {
-                try store.updateStepStatus(child.id, "failed", null, null, "deploy failed", 1);
-            }
-        }
-    }
-
-    // Tick 3: detects s2 failed, starts compensation (s1 was completed, so compensate s1)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Tick 4: compensation child creation may happen here
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Should have created compensation child for s1
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        var found_comp = false;
-        for (children) |child| {
-            if (std.mem.eql(u8, child.def_step_id, "c1")) {
-                found_comp = true;
-            }
-        }
-        try std.testing.expect(found_comp);
-    }
+    const migrated = migrateCompletedNodes(alloc, &completed, wf);
+    try std.testing.expect(migrated);
+    try std.testing.expect(completed.get("analyze") != null);
+    try std.testing.expect(completed.get("__start__") != null);
+    try std.testing.expect(completed.get("old_node") == null);
+}
 
-    // Complete the compensation child
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        for (children) |child| {
-            if (std.mem.eql(u8, child.def_step_id, "c1")) {
-                try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"deprovisioned\"}", null, 1);
-            }
-        }
-    }
+test "migrateCompletedNodes: no changes needed" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 5: compensation done
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const alloc = arena.allocator();
+    var completed = std.StringHashMap(void).init(alloc);
+    try completed.put("analyze", {});
 
-    // Tick 6: saga should finalize as failed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const wf =
+        \\{"nodes":{"analyze":{"type":"task"}},"edges":[]}
+    ;
 
-    // Saga should be failed with compensation output
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?;
-        try std.testing.expectEqualStrings("failed", saga_step.status);
-        try std.testing.expect(saga_step.output_json != null);
-        // Output should contain failed_at and compensated
-        try std.testing.expect(std.mem.indexOf(u8, saga_step.output_json.?, "failed_at") != null);
-        try std.testing.expect(std.mem.indexOf(u8, saga_step.output_json.?, "compensated") != null);
-    }
+    const migrated = migrateCompletedNodes(alloc, &completed, wf);
+    try std.testing.expect(!migrated);
 }
 
-test "Engine: saga step fails immediately with no completed steps to compensate" {
+test "serializeRouteResultsWithVersion: includes version" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"saga1","type":"saga","body":["s1"],"compensations":{"s1":"c1"}},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"c1","type":"task","prompt_template":"comp1"}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null);
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    var engine = Engine.init(&store, allocator, 500);
+    const alloc = arena.allocator();
+    var route_results = std.StringHashMap([]const u8).init(alloc);
 
-    // Tick 1: creates first body child (s1)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const result = try serializeRouteResultsWithVersion(alloc, &route_results, 5);
+    try std.testing.expect(result != null);
+    try std.testing.expect(std.mem.indexOf(u8, result.?, "workflow_version") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result.?, "5") != null);
+}
 
-    // Fail the first body child
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const children = try store.getChildSteps(arena.allocator(), "step_saga");
-        try store.updateStepStatus(children[0].id, "failed", null, null, "provision failed", 1);
-    }
+test "serializeRouteResultsWithVersion: null version, empty routes" {
+    const allocator = std.testing.allocator;
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Tick 2: detects s1 failed, no completed steps, saga fails immediately
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    const alloc = arena.allocator();
+    var route_results = std.StringHashMap([]const u8).init(alloc);
 
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?;
-        try std.testing.expectEqualStrings("failed", saga_step.status);
-        try std.testing.expect(saga_step.output_json != null);
-        try std.testing.expect(std.mem.indexOf(u8, saga_step.output_json.?, "compensated\":[]") != null);
-    }
+    const result = try serializeRouteResultsWithVersion(alloc, &route_results, null);
+    try std.testing.expect(result == null);
 }
 
-test "Engine: saga step fails without body" {
+test "engine: workflow version stored in checkpoint metadata" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
     const wf =
-        \\{"steps":[{"id":"saga1","type":"saga"}]}
+        \\{"version":2,"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"done\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"}}}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null);
+
+    try store.createRunWithState("r1", null, wf, "{}", "{}");
+    try store.updateRunStatus("r1", "running", null);
 
     var engine = Engine.init(&store, allocator, 500);
 
@@ -5073,213 +3614,175 @@ test "Engine: saga step fails without body" {
     const run_row = (try store.getRun(arena.allocator(), "r1")).?;
     try engine.processRun(arena.allocator(), run_row);
 
-    const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?;
-    try std.testing.expectEqualStrings("failed", saga_step.status);
+    // Check that checkpoint has workflow_version in metadata
+    const latest_cp = (try store.getLatestCheckpoint(arena.allocator(), "r1")).?;
+    try std.testing.expect(latest_cp.metadata_json != null);
+    try std.testing.expect(std.mem.indexOf(u8, latest_cp.metadata_json.?, "workflow_version") != null);
+    try std.testing.expect(std.mem.indexOf(u8, latest_cp.metadata_json.?, "2") != null);
 }
 
-// ── Graph cycle tests ────────────────────────────────────────────────
+test "OrchestratorEvent: eventKindString returns correct strings" {
+    try std.testing.expectEqualStrings("run.started", OrchestratorEvent.eventKindString(.run_started));
+    try std.testing.expectEqualStrings("run.completed", OrchestratorEvent.eventKindString(.run_completed));
+    try std.testing.expectEqualStrings("run.failed", OrchestratorEvent.eventKindString(.run_failed));
+    try std.testing.expectEqualStrings("run.interrupted", OrchestratorEvent.eventKindString(.run_interrupted));
+    try std.testing.expectEqualStrings("run.cancelled", OrchestratorEvent.eventKindString(.run_cancelled));
+    try std.testing.expectEqualStrings("step.started", OrchestratorEvent.eventKindString(.step_started));
+    try std.testing.expectEqualStrings("step.completed", OrchestratorEvent.eventKindString(.step_completed));
+    try std.testing.expectEqualStrings("step.failed", OrchestratorEvent.eventKindString(.step_failed));
+    try std.testing.expectEqualStrings("step.retrying", OrchestratorEvent.eventKindString(.step_retrying));
+    try std.testing.expectEqualStrings("checkpoint.created", OrchestratorEvent.eventKindString(.checkpoint_created));
+    try std.testing.expectEqualStrings("state.injected", OrchestratorEvent.eventKindString(.state_injected));
+}
 
-test "Engine: condition routes back to earlier step creates new instances" {
+test "OrchestratorEvent: toJson serializes correctly" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    // Workflow: compute -> check -> (if true_target=compute, false_target=done)
-    const wf =
-        \\{"steps":[{"id":"compute","type":"task","prompt_template":"compute","depends_on":[]},{"id":"check","type":"condition","expression":"retry","true_target":"compute","false_target":"done","depends_on":["compute"]},{"id":"done","type":"task","prompt_template":"done","depends_on":["check"]}]}
-    ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
 
-    // Step "compute" completed
-    try store.insertStep("step_compute", "r1", "compute", "task", "completed", "{}", 1, null, null, null);
-    try store.updateStepStatus("step_compute", "completed", null, "{\"output\":\"retry this\"}", null, 1);
+    const ev = OrchestratorEvent{
+        .event_type = .run_started,
+        .run_id = "run-123",
+        .step_id = null,
+        .node_name = "analyze",
+        .timestamp_ms = 1700000000000,
+        .metadata_json = null,
+    };
 
-    // Step "check" is ready, depends on compute
-    try store.insertStep("step_check", "r1", "check", "condition", "ready", "{}", 1, null, null, null);
-    try store.insertStepDep("step_check", "step_compute");
+    const json_str = ev.toJson(arena.allocator());
+    try std.testing.expect(json_str != null);
+    try std.testing.expect(std.mem.indexOf(u8, json_str.?, "run.started") != null);
+    try std.testing.expect(std.mem.indexOf(u8, json_str.?, "run-123") != null);
+    try std.testing.expect(std.mem.indexOf(u8, json_str.?, "analyze") != null);
+}
 
-    // Step "done" is pending
-    try store.insertStep("step_done", "r1", "done", "task", "pending", "{}", 1, null, null, null);
-    try store.insertStepDep("step_done", "step_check");
+test "engine: validateConfig returns false with no workers" {
+    const allocator = std.testing.allocator;
+    var store = try Store.init(allocator, ":memory:");
+    defer store.deinit();
 
     var engine = Engine.init(&store, allocator, 500);
-
-    // Tick 1: condition evaluates to true, target "compute" is already completed
-    //         Should detect cycle and create new step instances
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
-
-    // Verify: condition step should be completed with cycle_back output
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const check_step = (try store.getStep(arena.allocator(), "step_check")).?;
-        try std.testing.expectEqualStrings("completed", check_step.status);
-        try std.testing.expect(check_step.output_json != null);
-        try std.testing.expect(std.mem.indexOf(u8, check_step.output_json.?, "cycle_back") != null);
-    }
-
-    // Verify: new step instances were created (total steps > 3)
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const all_steps = try store.getStepsByRun(arena.allocator(), "r1");
-        // Original: compute, check, done = 3
-        // New: compute(iter1), check(iter1) = 2 more
-        try std.testing.expect(all_steps.len > 3);
-
-        // Find new compute instance with iteration_index > 0
-        var found_new_compute = false;
-        for (all_steps) |s| {
-            if (std.mem.eql(u8, s.def_step_id, "compute") and s.iteration_index > 0) {
-                found_new_compute = true;
-                try std.testing.expectEqualStrings("ready", s.status);
-            }
-        }
-        try std.testing.expect(found_new_compute);
-    }
-
-    // Verify cycle_state was updated
-    {
-        const cycle_state = try store.getCycleState("r1", "cycle_check");
-        try std.testing.expect(cycle_state != null);
-        try std.testing.expectEqual(@as(i64, 1), cycle_state.?.iteration_count);
-    }
+    try std.testing.expect(!engine.validateConfig());
 }
 
-test "Engine: graph cycle respects max_cycle_iterations" {
+test "engine: validateConfig returns true with registered workers" {
     const allocator = std.testing.allocator;
     var store = try Store.init(allocator, ":memory:");
     defer store.deinit();
 
-    // Workflow with max_cycle_iterations=1
+    try store.insertWorker("w1", "http://localhost:9000", "", "webhook", null, "[]", 5, "config");
+    var engine = Engine.init(&store, allocator, 500);
+    try std.testing.expect(engine.validateConfig());
+}
+
+test "generateMermaid: simple chain" {
+    const allocator = std.testing.allocator;
     const wf =
-        \\{"steps":[{"id":"compute","type":"task","prompt_template":"compute"},{"id":"check","type":"condition","expression":"retry","true_target":"compute","false_target":"done","max_cycle_iterations":1,"depends_on":["compute"]},{"id":"done","type":"task","prompt_template":"done","depends_on":["check"]}]}
+        \\{"nodes":{"analyze":{"type":"task"},"review":{"type":"task"}},"edges":[["__start__","analyze"],["analyze","review"],["review","__end__"]]}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
+    const result = try generateMermaid(allocator, wf);
+    defer allocator.free(result);
 
-    // Pre-set cycle state to max
-    try store.upsertCycleState("r1", "cycle_check", 1, 1);
-
-    // compute completed
-    try store.insertStep("step_compute", "r1", "compute", "task", "completed", "{}", 1, null, null, null);
-    try store.updateStepStatus("step_compute", "completed", null, "{\"output\":\"retry\"}", null, 1);
-
-    // check is ready
-    try store.insertStep("step_check", "r1", "check", "condition", "ready", "{}", 1, null, null, null);
-    try store.insertStepDep("step_check", "step_compute");
-
-    // done is pending
-    try store.insertStep("step_done", "r1", "done", "task", "pending", "{}", 1, null, null, null);
-    try store.insertStepDep("step_done", "step_check");
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    // Tick: condition should fail because cycle limit exceeded
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-        try engine.processRun(arena.allocator(), run_row);
-    }
+    try std.testing.expect(std.mem.indexOf(u8, result, "graph TD") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "__start__((Start))") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "__end__((End))") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "analyze[analyze") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "__start__ --> analyze") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "review --> __end__") != null);
+}
 
-    // Check step should be failed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const check_step = (try store.getStep(arena.allocator(), "step_check")).?;
-        try std.testing.expectEqualStrings("failed", check_step.status);
-        try std.testing.expect(check_step.error_text != null);
-        try std.testing.expect(std.mem.indexOf(u8, check_step.error_text.?, "exceeded") != null);
-    }
+test "generateMermaid: route node with conditional edges" {
+    const allocator = std.testing.allocator;
+    const wf =
+        \\{"nodes":{"decide":{"type":"route"},"approve":{"type":"task"},"reject":{"type":"task"}},"edges":[["__start__","decide"],["decide:yes","approve"],["decide:no","reject"],["approve","__end__"],["reject","__end__"]]}
+    ;
+    const result = try generateMermaid(allocator, wf);
+    defer allocator.free(result);
 
-    // Run should be failed
-    {
-        var arena = std.heap.ArenaAllocator.init(allocator);
-        defer arena.deinit();
-        const run = (try store.getRun(arena.allocator(), "r1")).?;
-        try std.testing.expectEqualStrings("failed", run.status);
-    }
+    try std.testing.expect(std.mem.indexOf(u8, result, "decide{decide") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "decide -->|yes| approve") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "decide -->|no| reject") != null);
 }
 
-// ── Worker handoff tests ─────────────────────────────────────────────
-
-test "extractHandoffTarget parses handoff_to from output" {
+test "generateMermaid: node type shapes" {
     const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const output =
-        \\{"output":"cannot handle","handoff_to":{"tags":["security_expert"],"message":"needs security review"}}
+    const wf =
+        \\{"nodes":{"t":{"type":"transform"},"i":{"type":"interrupt"},"s":{"type":"send"},"sg":{"type":"subgraph"}},"edges":[["__start__","t"],["t","__end__"]]}
     ;
-    const target = extractHandoffTarget(arena.allocator(), output);
-    try std.testing.expect(target != null);
-    try std.testing.expectEqual(@as(usize, 1), target.?.tags.len);
-    try std.testing.expectEqualStrings("security_expert", target.?.tags[0]);
-    try std.testing.expect(target.?.message != null);
-    try std.testing.expectEqualStrings("needs security review", target.?.message.?);
+    const result = try generateMermaid(allocator, wf);
+    defer allocator.free(result);
+
+    // transform uses rounded parens
+    try std.testing.expect(std.mem.indexOf(u8, result, "t(t\\ntransform)") != null);
+    // interrupt uses parallelogram
+    try std.testing.expect(std.mem.indexOf(u8, result, "i[/i\\ninterrupt/]") != null);
+    // send uses double brackets
+    try std.testing.expect(std.mem.indexOf(u8, result, "s[[s\\nsend]]") != null);
+    // subgraph uses rectangle
+    try std.testing.expect(std.mem.indexOf(u8, result, "sg[sg\\nsubgraph]") != null);
 }
 
-test "extractHandoffTarget returns null for normal output" {
+test "processUiMessages: broadcasts events" {
     const allocator = std.testing.allocator;
     var arena = std.heap.ArenaAllocator.init(allocator);
     defer arena.deinit();
+    const alloc = arena.allocator();
 
-    const output =
-        \\{"output":"all good, no handoff needed"}
+    var hub = sse_mod.SseHub.init(alloc);
+    defer hub.deinit();
+
+    const queue = hub.getOrCreateQueue("run1");
+
+    const response =
+        \\{"response":"ok","ui_messages":[{"id":"p1","name":"ProgressBar","props":{"progress":75}},{"id":"old","remove":true}]}
     ;
-    const target = extractHandoffTarget(arena.allocator(), output);
-    try std.testing.expect(target == null);
+    processUiMessages(&hub, alloc, "run1", "step1", response);
+
+    const snapshot = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, snapshot);
+    try std.testing.expectEqual(@as(usize, 2), snapshot.events.len);
+    try std.testing.expectEqualStrings("ui_message", snapshot.events[0].event_type);
+    try std.testing.expectEqualStrings("ui_message_delete", snapshot.events[1].event_type);
+    // First event should contain step_id
+    try std.testing.expect(std.mem.indexOf(u8, snapshot.events[0].data, "step1") != null);
 }
 
-test "extractHandoffTarget returns null for non-JSON output" {
+test "processStreamMessages: broadcasts message events" {
     const allocator = std.testing.allocator;
     var arena = std.heap.ArenaAllocator.init(allocator);
     defer arena.deinit();
+    const alloc = arena.allocator();
 
-    const target = extractHandoffTarget(arena.allocator(), "plain text output");
-    try std.testing.expect(target == null);
-}
+    var hub = sse_mod.SseHub.init(alloc);
+    defer hub.deinit();
 
-test "extractHandoffTarget handles handoff without message" {
-    const allocator = std.testing.allocator;
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
+    const queue = hub.getOrCreateQueue("run1");
 
-    const output =
-        \\{"output":"redirect","handoff_to":{"tags":["expert"]}}
+    const response =
+        \\{"response":"done","stream_messages":[{"role":"assistant","content":"Starting..."},{"role":"tool","content":"Found 3 issues","tool":"lint"}]}
     ;
-    const target = extractHandoffTarget(arena.allocator(), output);
-    try std.testing.expect(target != null);
-    try std.testing.expectEqual(@as(usize, 1), target.?.tags.len);
-    try std.testing.expectEqualStrings("expert", target.?.tags[0]);
-    try std.testing.expect(target.?.message == null);
+    processStreamMessages(&hub, alloc, "run1", "step1", "task", response);
+
+    const snapshot = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, snapshot);
+    try std.testing.expectEqual(@as(usize, 2), snapshot.events.len);
+    try std.testing.expectEqualStrings("message", snapshot.events[0].event_type);
+    try std.testing.expectEqualStrings("message", snapshot.events[1].event_type);
+    // Should contain step context
+    try std.testing.expect(std.mem.indexOf(u8, snapshot.events[0].data, "step1") != null);
+    try std.testing.expect(std.mem.indexOf(u8, snapshot.events[0].data, "task") != null);
+    try std.testing.expect(std.mem.indexOf(u8, snapshot.events[1].data, "tool") != null);
 }
 
-test "Engine: task step stays ready when no workers available (handoff path)" {
+test "applyUiMessagesToState: creates __ui_messages" {
     const allocator = std.testing.allocator;
-    var store = try Store.init(allocator, ":memory:");
-    defer store.deinit();
-
-    const wf =
-        \\{"steps":[{"id":"t1","type":"task","prompt_template":"do work"}]}
+    const state = "{}";
+    const response =
+        \\{"response":"ok","ui_messages":[{"id":"p1","name":"ProgressBar"}]}
     ;
-    try store.insertRun("r1", null, "running", wf, "{}", "[]");
-    try store.insertStep("step_t1", "r1", "t1", "task", "ready", "{}", 1, null, null, null);
-
-    var engine = Engine.init(&store, allocator, 500);
-
-    var arena = std.heap.ArenaAllocator.init(allocator);
-    defer arena.deinit();
-
-    const run_row = (try store.getRun(arena.allocator(), "r1")).?;
-    try engine.processRun(arena.allocator(), run_row);
+    const result = try applyUiMessagesToState(allocator, state, response);
+    defer allocator.free(result);
 
-    // No workers available, step should remain "ready"
-    const step = (try store.getStep(arena.allocator(), "step_t1")).?;
-    try std.testing.expectEqualStrings("ready", step.status);
+    try std.testing.expect(std.mem.indexOf(u8, result, "__ui_messages") != null);
+    try std.testing.expect(std.mem.indexOf(u8, result, "ProgressBar") != null);
 }
diff --git a/src/main.zig b/src/main.zig
index 437a610..45590f0 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -12,6 +12,7 @@ const redis_client = @import("redis_client.zig");
 const mqtt_client = @import("mqtt_client.zig");
 const tracker_mod = @import("tracker.zig");
 const workflow_loader = @import("workflow_loader.zig");
+const sse_mod = @import("sse.zig");
 const c = @cImport({
     @cInclude("signal.h");
 });
@@ -148,6 +149,9 @@ pub fn main() !void {
     var metrics = metrics_mod.Metrics{};
     var drain_mode = std.atomic.Value(bool).init(false);
 
+    var sse_hub = sse_mod.SseHub.init(allocator);
+    defer sse_hub.deinit();
+
     var response_queue = async_dispatch.ResponseQueue.init(allocator);
     defer response_queue.deinit();
 
@@ -240,6 +244,14 @@ pub fn main() !void {
 
     // Start DAG engine on a background thread
     const poll_ms: u64 = cfg.engine.poll_interval_ms;
+    // Hot reload watcher for workflow definitions
+    var wf_watcher: ?workflow_loader.WorkflowWatcher = null;
+    if (cfg.tracker) |tracker_cfg| {
+        if (tracker_cfg.workflows_dir.len > 0) {
+            wf_watcher = workflow_loader.WorkflowWatcher.init(allocator, tracker_cfg.workflows_dir, &store);
+        }
+    }
+
     var engine = engine_mod.Engine.init(&store, allocator, poll_ms);
     engine.configure(.{
         .health_check_interval_ms = @as(i64, @intCast(cfg.engine.health_check_interval_ms)),
@@ -250,7 +262,13 @@ pub fn main() !void {
         .retry_jitter_ms = @as(i64, @intCast(cfg.engine.retry_jitter_ms)),
         .retry_max_elapsed_ms = @as(i64, @intCast(cfg.engine.retry_max_elapsed_ms)),
     }, &metrics);
+    if (cfg.tracker) |tracker_cfg| {
+        engine.setTrustedTrackerAccess(tracker_cfg.url, tracker_cfg.api_token);
+    }
     engine.response_queue = &response_queue;
+    if (wf_watcher != null) {
+        engine.workflow_watcher = &wf_watcher.?;
+    }
     const engine_thread = try std.Thread.spawn(.{}, engine_mod.Engine.run, .{&engine});
 
     // Spawn listener threads for async protocols
@@ -337,6 +355,9 @@ pub fn main() !void {
         if (tracker_instance) |*ti| {
             ti.deinit();
         }
+        if (wf_watcher) |*ww| {
+            ww.deinit();
+        }
     }
 
     while (true) {
@@ -387,6 +408,8 @@ pub fn main() !void {
             .strategies = &strategy_map,
             .tracker_state = if (tracker_instance) |*ti| &ti.state else null,
             .tracker_cfg = if (cfg.tracker) |*tc| tc else null,
+            .sse_hub = &sse_hub,
+            .rate_limits = &engine.rate_limits,
         };
         const response = api.handleRequest(&ctx, request.method, request.target, request.body);
 
@@ -638,4 +661,6 @@ comptime {
     _ = @import("subprocess.zig");
     _ = @import("tracker_client.zig");
     _ = @import("tracker.zig");
+    _ = @import("state.zig");
+    _ = @import("sse.zig");
 }
diff --git a/src/migrations/004_orchestration.sql b/src/migrations/004_orchestration.sql
new file mode 100644
index 0000000..ce69d40
--- /dev/null
+++ b/src/migrations/004_orchestration.sql
@@ -0,0 +1,97 @@
+-- Note: step_deps table is kept for legacy POST /runs endpoint backward compatibility.
+-- cycle_state, chat_messages, saga_state tables are legacy (unused by current engine).
+
+-- Saved workflow definitions
+CREATE TABLE IF NOT EXISTS workflows (
+    id TEXT PRIMARY KEY,
+    name TEXT NOT NULL,
+    definition_json TEXT NOT NULL,
+    version INTEGER DEFAULT 1,
+    created_at_ms INTEGER NOT NULL,
+    updated_at_ms INTEGER NOT NULL
+);
+
+-- State checkpoints (snapshots after each step)
+CREATE TABLE IF NOT EXISTS checkpoints (
+    id TEXT PRIMARY KEY,
+    run_id TEXT NOT NULL REFERENCES runs(id),
+    step_id TEXT NOT NULL,
+    parent_id TEXT REFERENCES checkpoints(id),
+    state_json TEXT NOT NULL,
+    completed_nodes_json TEXT NOT NULL,
+    version INTEGER NOT NULL,
+    metadata_json TEXT,
+    created_at_ms INTEGER NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_checkpoints_run ON checkpoints(run_id, version);
+CREATE INDEX IF NOT EXISTS idx_checkpoints_parent ON checkpoints(parent_id);
+
+-- Agent intermediate events (from nullclaw callback)
+CREATE TABLE IF NOT EXISTS agent_events (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    run_id TEXT NOT NULL REFERENCES runs(id),
+    step_id TEXT NOT NULL,
+    iteration INTEGER NOT NULL,
+    tool TEXT,
+    args_json TEXT,
+    result_text TEXT,
+    status TEXT NOT NULL,
+    created_at_ms INTEGER NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_agent_events_run_step ON agent_events(run_id, step_id);
+
+-- Pending state injections (thread-safe queue for POST /runs/{id}/state)
+CREATE TABLE IF NOT EXISTS pending_state_injections (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    run_id TEXT NOT NULL REFERENCES runs(id),
+    updates_json TEXT NOT NULL,
+    apply_after_step TEXT,
+    created_at_ms INTEGER NOT NULL
+);
+
+-- Extend runs table
+ALTER TABLE runs ADD COLUMN state_json TEXT;
+ALTER TABLE runs ADD COLUMN workflow_id TEXT REFERENCES workflows(id);
+ALTER TABLE runs ADD COLUMN forked_from_run_id TEXT REFERENCES runs(id);
+ALTER TABLE runs ADD COLUMN forked_from_checkpoint_id TEXT REFERENCES checkpoints(id);
+ALTER TABLE runs ADD COLUMN checkpoint_count INTEGER DEFAULT 0;
+
+-- Extend steps table
+ALTER TABLE steps ADD COLUMN state_before_json TEXT;
+ALTER TABLE steps ADD COLUMN state_after_json TEXT;
+ALTER TABLE steps ADD COLUMN state_updates_json TEXT;
+-- NOTE: parent_step_id already exists from 001_init.sql — do NOT add it again
+
+-- Subgraph support: parent run linkage and per-run config
+ALTER TABLE runs ADD COLUMN parent_run_id TEXT REFERENCES runs(id);
+ALTER TABLE runs ADD COLUMN config_json TEXT;
+
+-- Node-level cache (Gap 3)
+CREATE TABLE IF NOT EXISTS node_cache (
+    cache_key TEXT PRIMARY KEY,
+    node_name TEXT NOT NULL,
+    result_json TEXT NOT NULL,
+    created_at_ms INTEGER NOT NULL,
+    ttl_ms INTEGER
+);
+
+-- Pending writes from parallel node execution (Gap 4)
+CREATE TABLE IF NOT EXISTS pending_writes (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    run_id TEXT NOT NULL,
+    step_id TEXT NOT NULL,
+    channel TEXT NOT NULL,
+    value_json TEXT NOT NULL,
+    created_at_ms INTEGER NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_pending_writes_run ON pending_writes(run_id);
+
+-- Token accounting columns on runs
+ALTER TABLE runs ADD COLUMN total_input_tokens INTEGER DEFAULT 0;
+ALTER TABLE runs ADD COLUMN total_output_tokens INTEGER DEFAULT 0;
+ALTER TABLE runs ADD COLUMN total_tokens INTEGER DEFAULT 0;
+
+-- Token accounting columns on steps
+ALTER TABLE steps ADD COLUMN input_tokens INTEGER DEFAULT 0;
+ALTER TABLE steps ADD COLUMN output_tokens INTEGER DEFAULT 0;
+ALTER TABLE steps ADD COLUMN total_tokens INTEGER DEFAULT 0;
diff --git a/src/sse.zig b/src/sse.zig
new file mode 100644
index 0000000..0c64344
--- /dev/null
+++ b/src/sse.zig
@@ -0,0 +1,402 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+pub const StreamMode = enum {
+    values, // Full state after each step
+    updates, // Only node name + updates
+    tasks, // Task start/finish with metadata
+    debug, // Everything with step number + timestamp
+    custom, // User-defined via node output
+
+    pub fn toString(self: StreamMode) []const u8 {
+        return @tagName(self);
+    }
+
+    pub fn fromString(s: []const u8) ?StreamMode {
+        inline for (@typeInfo(StreamMode).@"enum".fields) |f| {
+            if (std.mem.eql(u8, s, f.name)) return @enumFromInt(f.value);
+        }
+        return null;
+    }
+};
+
+pub const SseEvent = struct {
+    seq: u64 = 0,
+    event_type: []const u8, // "state_update", "step_started", etc.
+    data: []const u8, // JSON string
+    mode: StreamMode = .updates, // default mode
+};
+
+pub const EventSnapshot = struct {
+    events: []SseEvent,
+    latest_seq: u64,
+    oldest_seq: u64,
+    gap_detected: bool,
+};
+
+/// Per-run event queue. Thread-safe via mutex.
+pub const RunEventQueue = struct {
+    events: std.ArrayListUnmanaged(SseEvent),
+    alloc: Allocator,
+    mutex: std.Thread.Mutex,
+    closed: std.atomic.Value(bool),
+    next_seq: u64,
+
+    const max_retained_events: usize = 2048;
+
+    fn freeEvent(self: *RunEventQueue, event: SseEvent) void {
+        self.alloc.free(event.event_type);
+        self.alloc.free(event.data);
+    }
+
+    pub fn init(alloc: Allocator) RunEventQueue {
+        return .{
+            .events = .empty,
+            .alloc = alloc,
+            .mutex = .{},
+            .closed = std.atomic.Value(bool).init(false),
+            .next_seq = 1,
+        };
+    }
+
+    pub fn deinit(self: *RunEventQueue) void {
+        for (self.events.items) |event| {
+            self.freeEvent(event);
+        }
+        self.events.deinit(self.alloc);
+    }
+
+    /// Push an event to the queue. Thread-safe.
+    pub fn push(self: *RunEventQueue, event: SseEvent) void {
+        self.mutex.lock();
+        defer self.mutex.unlock();
+
+        const event_type = self.alloc.dupe(u8, event.event_type) catch return;
+        const data = self.alloc.dupe(u8, event.data) catch {
+            self.alloc.free(event_type);
+            return;
+        };
+
+        self.events.append(self.alloc, .{
+            .seq = self.next_seq,
+            .event_type = event_type,
+            .data = data,
+            .mode = event.mode,
+        }) catch {
+            self.alloc.free(event_type);
+            self.alloc.free(data);
+            return;
+        };
+        self.next_seq += 1;
+
+        while (self.events.items.len > max_retained_events) {
+            const dropped = self.events.orderedRemove(0);
+            self.freeEvent(dropped);
+        }
+    }
+
+    pub fn snapshotSince(self: *RunEventQueue, alloc: Allocator, after_seq: u64) EventSnapshot {
+        self.mutex.lock();
+        defer self.mutex.unlock();
+
+        const latest_seq = self.next_seq -| 1;
+        const oldest_seq = if (self.events.items.len > 0) self.events.items[0].seq else latest_seq;
+        const gap_detected = after_seq > 0 and self.events.items.len > 0 and after_seq < self.events.items[0].seq and self.events.items[0].seq - after_seq > 1;
+
+        var snapshot_events: std.ArrayListUnmanaged(SseEvent) = .empty;
+        for (self.events.items) |event| {
+            if (event.seq <= after_seq) continue;
+
+            const event_type = alloc.dupe(u8, event.event_type) catch continue;
+            const data = alloc.dupe(u8, event.data) catch {
+                alloc.free(event_type);
+                continue;
+            };
+
+            snapshot_events.append(alloc, .{
+                .seq = event.seq,
+                .event_type = event_type,
+                .data = data,
+                .mode = event.mode,
+            }) catch {
+                alloc.free(event_type);
+                alloc.free(data);
+            };
+        }
+
+        const events = snapshot_events.toOwnedSlice(alloc) catch {
+            for (snapshot_events.items) |event| {
+                alloc.free(event.event_type);
+                alloc.free(event.data);
+            }
+            snapshot_events.deinit(alloc);
+            return .{
+                .events = &.{},
+                .latest_seq = latest_seq,
+                .oldest_seq = oldest_seq,
+                .gap_detected = gap_detected,
+            };
+        };
+
+        return .{
+            .events = events,
+            .latest_seq = latest_seq,
+            .oldest_seq = oldest_seq,
+            .gap_detected = gap_detected,
+        };
+    }
+
+    pub fn freeSnapshot(_: *RunEventQueue, alloc: Allocator, snapshot: EventSnapshot) void {
+        for (snapshot.events) |event| {
+            alloc.free(event.event_type);
+            alloc.free(event.data);
+        }
+        if (snapshot.events.len > 0) alloc.free(snapshot.events);
+    }
+
+    /// Mark queue as closed (run completed/cancelled).
+    pub fn close(self: *RunEventQueue) void {
+        self.closed.store(true, .release);
+    }
+
+    pub fn isClosed(self: *RunEventQueue) bool {
+        return self.closed.load(.acquire);
+    }
+};
+
+/// Central hub managing per-run event queues.
+pub const SseHub = struct {
+    queues: std.StringHashMap(*RunEventQueue),
+    mutex: std.Thread.Mutex,
+    alloc: Allocator,
+
+    pub fn init(alloc: Allocator) SseHub {
+        return .{
+            .queues = std.StringHashMap(*RunEventQueue).init(alloc),
+            .mutex = .{},
+            .alloc = alloc,
+        };
+    }
+
+    pub fn deinit(self: *SseHub) void {
+        var it = self.queues.iterator();
+        while (it.next()) |entry| {
+            entry.value_ptr.*.deinit();
+            self.alloc.destroy(entry.value_ptr.*);
+            self.alloc.free(entry.key_ptr.*);
+        }
+        self.queues.deinit();
+    }
+
+    /// Get or create queue for a run.
+    pub fn getOrCreateQueue(self: *SseHub, run_id: []const u8) *RunEventQueue {
+        self.mutex.lock();
+        defer self.mutex.unlock();
+        if (self.queues.get(run_id)) |q| return q;
+        const queue = self.alloc.create(RunEventQueue) catch @panic("OOM: failed to allocate RunEventQueue");
+        queue.* = RunEventQueue.init(self.alloc);
+        const id_copy = self.alloc.dupe(u8, run_id) catch @panic("OOM: failed to duplicate run_id");
+        self.queues.put(id_copy, queue) catch @panic("OOM: failed to insert queue into map");
+        return queue;
+    }
+
+    /// Broadcast event to a run's queue. Creates the queue on first write so
+    /// late subscribers can still read recent buffered events.
+    pub fn broadcast(self: *SseHub, run_id: []const u8, event: SseEvent) void {
+        self.mutex.lock();
+        defer self.mutex.unlock();
+        const queue = if (self.queues.get(run_id)) |existing|
+            existing
+        else blk: {
+            const created = self.alloc.create(RunEventQueue) catch return;
+            created.* = RunEventQueue.init(self.alloc);
+            const id_copy = self.alloc.dupe(u8, run_id) catch {
+                self.alloc.destroy(created);
+                return;
+            };
+            self.queues.put(id_copy, created) catch {
+                self.alloc.free(id_copy);
+                self.alloc.destroy(created);
+                return;
+            };
+            break :blk created;
+        };
+        queue.push(event);
+    }
+
+    pub fn closeQueue(self: *SseHub, run_id: []const u8) void {
+        self.mutex.lock();
+        defer self.mutex.unlock();
+        if (self.queues.get(run_id)) |queue| {
+            queue.close();
+        }
+    }
+
+    /// Close and remove queue when run completes.
+    pub fn removeQueue(self: *SseHub, run_id: []const u8) void {
+        self.mutex.lock();
+        defer self.mutex.unlock();
+        if (self.queues.fetchRemove(run_id)) |entry| {
+            entry.value.close();
+            entry.value.deinit();
+            self.alloc.destroy(entry.value);
+            self.alloc.free(entry.key);
+        }
+    }
+};
+
+// ── Tests ─────────────────────────────────────────────────────────────
+
+test "sse hub snapshotSince supports multiple consumers" {
+    const alloc = std.testing.allocator;
+    var hub = SseHub.init(alloc);
+    defer hub.deinit();
+
+    const queue = hub.getOrCreateQueue("run1");
+    queue.push(.{ .event_type = "step_started", .data = "{}" });
+    queue.push(.{ .event_type = "step_completed", .data = "{}" });
+
+    const first = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, first);
+    const second = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, second);
+
+    try std.testing.expectEqual(@as(usize, 2), first.events.len);
+    try std.testing.expectEqual(@as(usize, 2), second.events.len);
+    try std.testing.expectEqualStrings("step_started", first.events[0].event_type);
+    try std.testing.expectEqualStrings("step_started", second.events[0].event_type);
+}
+
+test "sse hub queue owns event payloads beyond source arena lifetime" {
+    const alloc = std.testing.allocator;
+    var hub = SseHub.init(alloc);
+    defer hub.deinit();
+
+    const queue = hub.getOrCreateQueue("run1");
+
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    const arena_alloc = arena.allocator();
+
+    const event_type = try arena_alloc.dupe(u8, "step.completed");
+    const payload = try arena_alloc.dupe(u8, "{\"ok\":true}");
+    queue.push(.{ .event_type = event_type, .data = payload });
+    arena.deinit();
+
+    const snapshot = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, snapshot);
+
+    try std.testing.expectEqual(@as(usize, 1), snapshot.events.len);
+    try std.testing.expectEqualStrings("step.completed", snapshot.events[0].event_type);
+    try std.testing.expectEqualStrings("{\"ok\":true}", snapshot.events[0].data);
+}
+
+test "sse hub broadcast creates queue for late subscribers" {
+    const alloc = std.testing.allocator;
+    var hub = SseHub.init(alloc);
+    defer hub.deinit();
+
+    hub.broadcast("run1", .{ .event_type = "test", .data = "{}" });
+
+    const queue = hub.getOrCreateQueue("run1");
+    const snapshot = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, snapshot);
+
+    try std.testing.expectEqual(@as(usize, 1), snapshot.events.len);
+    try std.testing.expectEqualStrings("test", snapshot.events[0].event_type);
+}
+
+test "sse hub remove queue" {
+    const alloc = std.testing.allocator;
+    var hub = SseHub.init(alloc);
+    defer hub.deinit();
+
+    _ = hub.getOrCreateQueue("run1");
+    hub.removeQueue("run1");
+    // Queue should be gone
+    try std.testing.expectEqual(@as(usize, 0), hub.queues.count());
+}
+
+test "sse hub closeQueue preserves buffered events" {
+    const alloc = std.testing.allocator;
+    var hub = SseHub.init(alloc);
+    defer hub.deinit();
+
+    hub.broadcast("run1", .{ .event_type = "values", .data = "{}" });
+    hub.closeQueue("run1");
+
+    const queue = hub.getOrCreateQueue("run1");
+    try std.testing.expect(queue.isClosed());
+
+    const snapshot = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, snapshot);
+    try std.testing.expectEqual(@as(usize, 1), snapshot.events.len);
+}
+
+test "sse queue close" {
+    const alloc = std.testing.allocator;
+    var queue = RunEventQueue.init(alloc);
+    defer queue.deinit();
+
+    try std.testing.expect(!queue.isClosed());
+    queue.close();
+    try std.testing.expect(queue.isClosed());
+}
+
+test "stream mode toString and fromString" {
+    try std.testing.expectEqualStrings("values", StreamMode.values.toString());
+    try std.testing.expectEqualStrings("updates", StreamMode.updates.toString());
+    try std.testing.expectEqualStrings("tasks", StreamMode.tasks.toString());
+    try std.testing.expectEqualStrings("debug", StreamMode.debug.toString());
+    try std.testing.expectEqualStrings("custom", StreamMode.custom.toString());
+
+    try std.testing.expectEqual(StreamMode.values, StreamMode.fromString("values").?);
+    try std.testing.expectEqual(StreamMode.debug, StreamMode.fromString("debug").?);
+    try std.testing.expect(StreamMode.fromString("invalid") == null);
+}
+
+test "sse event default mode is updates" {
+    const ev = SseEvent{ .event_type = "test", .data = "{}" };
+    try std.testing.expectEqual(StreamMode.updates, ev.mode);
+}
+
+test "sse event with explicit mode" {
+    const ev = SseEvent{ .event_type = "values", .data = "{\"state\":{}}", .mode = .values };
+    try std.testing.expectEqual(StreamMode.values, ev.mode);
+    try std.testing.expectEqualStrings("values", ev.event_type);
+}
+
+test "sse hub broadcast with mode" {
+    const alloc = std.testing.allocator;
+    var hub = SseHub.init(alloc);
+    defer hub.deinit();
+
+    const queue = hub.getOrCreateQueue("run1");
+    queue.push(.{ .event_type = "values", .data = "{\"full\":true}", .mode = .values });
+    queue.push(.{ .event_type = "task_start", .data = "{}", .mode = .tasks });
+    queue.push(.{ .event_type = "debug", .data = "{}", .mode = .debug });
+
+    const snapshot = queue.snapshotSince(alloc, 0);
+    defer queue.freeSnapshot(alloc, snapshot);
+    try std.testing.expectEqual(@as(usize, 3), snapshot.events.len);
+    try std.testing.expectEqual(StreamMode.values, snapshot.events[0].mode);
+    try std.testing.expectEqual(StreamMode.tasks, snapshot.events[1].mode);
+    try std.testing.expectEqual(StreamMode.debug, snapshot.events[2].mode);
+}
+
+test "sse hub snapshotSince returns only events after cursor" {
+    const alloc = std.testing.allocator;
+    var hub = SseHub.init(alloc);
+    defer hub.deinit();
+
+    const queue = hub.getOrCreateQueue("run1");
+    queue.push(.{ .event_type = "one", .data = "{}" });
+    queue.push(.{ .event_type = "two", .data = "{}" });
+    queue.push(.{ .event_type = "three", .data = "{}" });
+
+    const snapshot = queue.snapshotSince(alloc, 2);
+    defer queue.freeSnapshot(alloc, snapshot);
+
+    try std.testing.expectEqual(@as(usize, 1), snapshot.events.len);
+    try std.testing.expectEqual(@as(u64, 3), snapshot.events[0].seq);
+    try std.testing.expectEqualStrings("three", snapshot.events[0].event_type);
+}
diff --git a/src/state.zig b/src/state.zig
new file mode 100644
index 0000000..5c266a6
--- /dev/null
+++ b/src/state.zig
@@ -0,0 +1,1025 @@
+/// State management module for NullBoiler orchestration.
+/// Implements reducers and state operations for the unified state model.
+/// Every node in the orchestration graph reads state, returns partial updates,
+/// and the engine applies reducers to compute the new state.
+const std = @import("std");
+const types = @import("types.zig");
+const ReducerType = types.ReducerType;
+const Allocator = std.mem.Allocator;
+const json = std.json;
+
+// ── Helpers ───────────────────────────────────────────────────────────
+
+/// Serialize a std.json.Value to an allocated JSON string.
+fn serializeValue(alloc: Allocator, value: json.Value) ![]const u8 {
+    var out: std.io.Writer.Allocating = .init(alloc);
+    var jw: json.Stringify = .{ .writer = &out.writer };
+    try jw.write(value);
+    return try out.toOwnedSlice();
+}
+
+/// Extract f64 from a json.Value (handles both .integer and .float).
+fn jsonToFloat(val: json.Value) ?f64 {
+    return switch (val) {
+        .float => |f| f,
+        .integer => |i| @as(f64, @floatFromInt(i)),
+        else => null,
+    };
+}
+
+/// Format an f64 as a string. Renders integers without decimal point.
+fn formatFloat(alloc: Allocator, f: f64) ![]const u8 {
+    const i: i64 = @intFromFloat(f);
+    if (@as(f64, @floatFromInt(i)) == f) {
+        return try std.fmt.allocPrint(alloc, "{d}", .{i});
+    }
+    return try std.fmt.allocPrint(alloc, "{d}", .{f});
+}
+
+// ── Overwrite Bypass (Gap 5) ──────────────────────────────────────────
+
+/// Check if a JSON value is wrapped in {"__overwrite": true, "value": ...}.
+fn isOverwrite(value: json.Value) bool {
+    if (value != .object) return false;
+    const ow = value.object.get("__overwrite") orelse return false;
+    if (ow != .bool) return false;
+    return ow.bool;
+}
+
+/// Extract the "value" field from an overwrite wrapper.
+/// Returns the unwrapped json.Value, or .null if "value" key is missing.
+fn extractOverwriteValue(value: json.Value) json.Value {
+    if (value != .object) return value;
+    return value.object.get("value") orelse .null;
+}
+
+// ── Public API ────────────────────────────────────────────────────────
+
+/// Apply a single reducer to merge old_value + update into new_value.
+/// Returns newly allocated JSON string owned by the caller.
+pub fn applyReducer(alloc: Allocator, reducer: ReducerType, old_value_json: ?[]const u8, update_json: []const u8) ![]const u8 {
+    switch (reducer) {
+        .last_value => {
+            return try alloc.dupe(u8, update_json);
+        },
+        .append => {
+            return try applyAppend(alloc, old_value_json, update_json);
+        },
+        .merge => {
+            return try applyMerge(alloc, old_value_json, update_json);
+        },
+        .add => {
+            return try applyAdd(alloc, old_value_json, update_json);
+        },
+        .min => {
+            return try applyMin(alloc, old_value_json, update_json);
+        },
+        .max => {
+            return try applyMax(alloc, old_value_json, update_json);
+        },
+        .add_messages => {
+            return try applyAddMessages(alloc, old_value_json, update_json);
+        },
+    }
+}
+
+/// Apply partial state updates to full state using schema reducers.
+/// For each key in updates_json:
+///   1. Look up reducer type from schema_json (format: {"key": {"type": "...", "reducer": "..."}})
+///   2. Get old value from state_json (may be null/missing)
+///   3. Apply reducer(old_value, new_value)
+///   4. Write result to output state
+pub fn applyUpdates(alloc: Allocator, state_json: []const u8, updates_json: []const u8, schema_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const state_parsed = try json.parseFromSlice(json.Value, arena_alloc, state_json, .{});
+    const state_obj = if (state_parsed.value == .object) state_parsed.value.object else json.ObjectMap.init(arena_alloc);
+
+    const updates_parsed = try json.parseFromSlice(json.Value, arena_alloc, updates_json, .{});
+    if (updates_parsed.value != .object) return try alloc.dupe(u8, state_json);
+
+    const schema_parsed = try json.parseFromSlice(json.Value, arena_alloc, schema_json, .{});
+    const schema_obj = if (schema_parsed.value == .object) schema_parsed.value.object else json.ObjectMap.init(arena_alloc);
+
+    // Start with a copy of all existing state keys
+    var result_obj = json.ObjectMap.init(arena_alloc);
+    var state_it = state_obj.iterator();
+    while (state_it.next()) |entry| {
+        try result_obj.put(entry.key_ptr.*, entry.value_ptr.*);
+    }
+
+    // For each update key, apply the reducer (with overwrite bypass, Gap 5)
+    var updates_it = updates_parsed.value.object.iterator();
+    while (updates_it.next()) |entry| {
+        const key = entry.key_ptr.*;
+        const update_value = entry.value_ptr.*;
+
+        // Gap 5: Check for overwrite bypass
+        if (isOverwrite(update_value)) {
+            const raw_val = extractOverwriteValue(update_value);
+            try result_obj.put(key, raw_val);
+            continue;
+        }
+
+        // Serialize the update value
+        const update_str = try serializeValue(arena_alloc, update_value);
+
+        // Look up reducer from schema
+        const reducer_type = blk: {
+            if (schema_obj.get(key)) |schema_entry| {
+                if (schema_entry == .object) {
+                    if (schema_entry.object.get("reducer")) |reducer_val| {
+                        if (reducer_val == .string) {
+                            break :blk ReducerType.fromString(reducer_val.string) orelse .last_value;
+                        }
+                    }
+                }
+            }
+            break :blk ReducerType.last_value;
+        };
+
+        // Get old value as JSON string (or null if missing)
+        const old_str: ?[]const u8 = blk: {
+            if (state_obj.get(key)) |old_val| {
+                break :blk try serializeValue(arena_alloc, old_val);
+            }
+            break :blk null;
+        };
+
+        // Apply the reducer (allocates into arena)
+        const new_str = try applyReducer(arena_alloc, reducer_type, old_str, update_str);
+
+        // Parse the result back into a json.Value and put in result
+        const new_parsed = try json.parseFromSlice(json.Value, arena_alloc, new_str, .{});
+        try result_obj.put(key, new_parsed.value);
+    }
+
+    // Serialize the result into the caller's allocator
+    const result_str = try serializeValue(arena_alloc, json.Value{ .object = result_obj });
+    return try alloc.dupe(u8, result_str);
+}
+
+/// Initialize state from input JSON and schema defaults.
+/// For each key in schema:
+///   - if key exists in input -> use input value
+///   - else -> use type default: "" for string, [] for array, 0 for number, false for boolean, {} for object, null otherwise
+pub fn initState(alloc: Allocator, input_json: []const u8, schema_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const input_parsed = try json.parseFromSlice(json.Value, arena_alloc, input_json, .{});
+    const input_obj = if (input_parsed.value == .object) input_parsed.value.object else json.ObjectMap.init(arena_alloc);
+
+    const schema_parsed = try json.parseFromSlice(json.Value, arena_alloc, schema_json, .{});
+    if (schema_parsed.value != .object) return try alloc.dupe(u8, input_json);
+
+    var result_obj = json.ObjectMap.init(arena_alloc);
+
+    var schema_it = schema_parsed.value.object.iterator();
+    while (schema_it.next()) |entry| {
+        const key = entry.key_ptr.*;
+        const schema_entry = entry.value_ptr.*;
+
+        if (input_obj.get(key)) |input_val| {
+            try result_obj.put(key, input_val);
+        } else {
+            const type_str = blk: {
+                if (schema_entry == .object) {
+                    if (schema_entry.object.get("type")) |type_val| {
+                        if (type_val == .string) {
+                            break :blk type_val.string;
+                        }
+                    }
+                }
+                break :blk "";
+            };
+
+            const default_val: json.Value = if (std.mem.eql(u8, type_str, "string"))
+                .{ .string = "" }
+            else if (std.mem.eql(u8, type_str, "array"))
+                .{ .array = json.Array.init(arena_alloc) }
+            else if (std.mem.eql(u8, type_str, "number"))
+                .{ .integer = 0 }
+            else if (std.mem.eql(u8, type_str, "boolean"))
+                .{ .bool = false }
+            else if (std.mem.eql(u8, type_str, "object"))
+                .{ .object = json.ObjectMap.init(arena_alloc) }
+            else
+                .null;
+
+            try result_obj.put(key, default_val);
+        }
+    }
+
+    const result_str = try serializeValue(arena_alloc, json.Value{ .object = result_obj });
+    return try alloc.dupe(u8, result_str);
+}
+
+/// Extract a value from state JSON by dotted path.
+/// Supports:
+///   - "state.messages" -> strips "state." prefix, returns value at key "messages"
+///   - "state.plan.files" -> nested object access
+///   - "state.messages[-1]" -> last element of array
+pub fn getStateValue(alloc: Allocator, state_json: []const u8, path: []const u8) !?[]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    // Strip "state." prefix if present
+    const effective_path = if (std.mem.startsWith(u8, path, "state."))
+        path["state.".len..]
+    else
+        path;
+
+    const parsed = try json.parseFromSlice(json.Value, arena_alloc, state_json, .{});
+    var current = parsed.value;
+
+    // Split by "." and walk the path
+    var segments = std.mem.splitScalar(u8, effective_path, '.');
+    while (segments.next()) |segment| {
+        // Check for array index like "messages[-1]"
+        if (std.mem.indexOfScalar(u8, segment, '[')) |bracket_pos| {
+            const key = segment[0..bracket_pos];
+            const index_str = segment[bracket_pos..];
+
+            // Navigate to the key first
+            if (current != .object) return null;
+            current = current.object.get(key) orelse return null;
+
+            // Parse the array index
+            if (std.mem.eql(u8, index_str, "[-1]")) {
+                if (current != .array) return null;
+                if (current.array.items.len == 0) return null;
+                current = current.array.items[current.array.items.len - 1];
+            } else {
+                // Parse positive index: [N]
+                if (index_str.len < 3) return null;
+                const num_str = index_str[1 .. index_str.len - 1];
+                const idx = std.fmt.parseInt(usize, num_str, 10) catch return null;
+                if (current != .array) return null;
+                if (idx >= current.array.items.len) return null;
+                current = current.array.items[idx];
+            }
+        } else {
+            if (current != .object) return null;
+            current = current.object.get(segment) orelse return null;
+        }
+    }
+
+    const result_str = try serializeValue(arena_alloc, current);
+    return try alloc.dupe(u8, result_str);
+}
+
+/// Convert JSON value to string for route matching.
+/// - true/false -> "true"/"false"
+/// - numbers -> decimal string representation
+/// - "quoted string" -> strip quotes, return inner string
+/// - anything else -> return as-is
+pub fn stringifyForRoute(alloc: Allocator, value_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const parsed = try json.parseFromSlice(json.Value, arena_alloc, value_json, .{});
+
+    switch (parsed.value) {
+        .bool => |b| {
+            return try alloc.dupe(u8, if (b) "true" else "false");
+        },
+        .integer => |i| {
+            return try std.fmt.allocPrint(alloc, "{d}", .{i});
+        },
+        .float => |f| {
+            const tmp = try formatFloat(arena_alloc, f);
+            return try alloc.dupe(u8, tmp);
+        },
+        .string => |s| {
+            return try alloc.dupe(u8, s);
+        },
+        else => {
+            return try alloc.dupe(u8, value_json);
+        },
+    }
+}
+
+// ── Reducer implementations ───────────────────────────────────────────
+
+/// append: if old is null/empty -> wrap update in array [update].
+/// If old is array -> parse, append update (element or array elements), serialize.
+fn applyAppend(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{});
+
+    const old = old_json orelse {
+        var arr = json.Array.init(arena_alloc);
+        try arr.append(update_parsed.value);
+        const result = try serializeValue(arena_alloc, json.Value{ .array = arr });
+        return try alloc.dupe(u8, result);
+    };
+
+    if (old.len == 0) {
+        var arr = json.Array.init(arena_alloc);
+        try arr.append(update_parsed.value);
+        const result = try serializeValue(arena_alloc, json.Value{ .array = arr });
+        return try alloc.dupe(u8, result);
+    }
+
+    const old_parsed = try json.parseFromSlice(json.Value, arena_alloc, old, .{});
+
+    if (old_parsed.value != .array) {
+        var arr = json.Array.init(arena_alloc);
+        try arr.append(old_parsed.value);
+        try arr.append(update_parsed.value);
+        const result = try serializeValue(arena_alloc, json.Value{ .array = arr });
+        return try alloc.dupe(u8, result);
+    }
+
+    // Old is array - copy elements then append update
+    var arr = json.Array.init(arena_alloc);
+    for (old_parsed.value.array.items) |item| {
+        try arr.append(item);
+    }
+
+    // If update is an array, append each element; otherwise append the single value
+    if (update_parsed.value == .array) {
+        for (update_parsed.value.array.items) |item| {
+            try arr.append(item);
+        }
+    } else {
+        try arr.append(update_parsed.value);
+    }
+
+    const result = try serializeValue(arena_alloc, json.Value{ .array = arr });
+    return try alloc.dupe(u8, result);
+}
+
+/// merge: deep merge two JSON objects. Update keys override old keys.
+/// Nested objects are recursively merged.
+fn applyMerge(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{});
+
+    if (update_parsed.value != .object) {
+        return try alloc.dupe(u8, update_json);
+    }
+
+    const old = old_json orelse {
+        return try alloc.dupe(u8, update_json);
+    };
+
+    if (old.len == 0) {
+        return try alloc.dupe(u8, update_json);
+    }
+
+    const old_parsed = try json.parseFromSlice(json.Value, arena_alloc, old, .{});
+
+    if (old_parsed.value != .object) {
+        return try alloc.dupe(u8, update_json);
+    }
+
+    const merged = try deepMerge(arena_alloc, old_parsed.value, update_parsed.value);
+    const result = try serializeValue(arena_alloc, merged);
+    return try alloc.dupe(u8, result);
+}
+
+/// Recursively deep-merge two JSON objects.
+fn deepMerge(alloc: Allocator, base: json.Value, overlay: json.Value) !json.Value {
+    if (base != .object or overlay != .object) {
+        return overlay;
+    }
+
+    var result = json.ObjectMap.init(alloc);
+
+    // Copy all base keys
+    var base_it = base.object.iterator();
+    while (base_it.next()) |entry| {
+        try result.put(entry.key_ptr.*, entry.value_ptr.*);
+    }
+
+    // Apply overlay keys, recursively merging nested objects
+    var overlay_it = overlay.object.iterator();
+    while (overlay_it.next()) |entry| {
+        const key = entry.key_ptr.*;
+        const overlay_val = entry.value_ptr.*;
+
+        if (result.get(key)) |existing| {
+            if (existing == .object and overlay_val == .object) {
+                const merged = try deepMerge(alloc, existing, overlay_val);
+                try result.put(key, merged);
+            } else {
+                try result.put(key, overlay_val);
+            }
+        } else {
+            try result.put(key, overlay_val);
+        }
+    }
+
+    return json.Value{ .object = result };
+}
+
+/// add: parse both as numbers (f64), add, return string. If old is null, treat as 0.
+fn applyAdd(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{});
+    const update_val = jsonToFloat(update_parsed.value) orelse return error.InvalidNumber;
+
+    const old_val: f64 = blk: {
+        const old = old_json orelse break :blk 0;
+        if (old.len == 0) break :blk 0;
+        const old_parsed = json.parseFromSlice(json.Value, arena_alloc, old, .{}) catch break :blk 0;
+        break :blk jsonToFloat(old_parsed.value) orelse 0;
+    };
+
+    return try formatFloat(alloc, old_val + update_val);
+}
+
+/// min: parse both as numbers, return the smaller. If old is null, return update.
+fn applyMin(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{});
+    const update_val = jsonToFloat(update_parsed.value) orelse return error.InvalidNumber;
+
+    const old = old_json orelse return try formatFloat(alloc, update_val);
+    if (old.len == 0) return try formatFloat(alloc, update_val);
+
+    const old_parsed = json.parseFromSlice(json.Value, arena_alloc, old, .{}) catch
+        return try formatFloat(alloc, update_val);
+    const old_val = jsonToFloat(old_parsed.value) orelse return try formatFloat(alloc, update_val);
+
+    return try formatFloat(alloc, @min(old_val, update_val));
+}
+
+/// max: parse both as numbers, return the larger. If old is null, return update.
+fn applyMax(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{});
+    const update_val = jsonToFloat(update_parsed.value) orelse return error.InvalidNumber;
+
+    const old = old_json orelse return try formatFloat(alloc, update_val);
+    if (old.len == 0) return try formatFloat(alloc, update_val);
+
+    const old_parsed = json.parseFromSlice(json.Value, arena_alloc, old, .{}) catch
+        return try formatFloat(alloc, update_val);
+    const old_val = jsonToFloat(old_parsed.value) orelse return try formatFloat(alloc, update_val);
+
+    return try formatFloat(alloc, @max(old_val, update_val));
+}
+
+/// add_messages: merge message arrays by "id" field.
+/// - If old is null → wrap update in array
+/// - If update msg has "remove": true → remove matching id from old
+/// - If update msg "id" matches existing → replace in-place
+/// - If update msg "id" doesn't match → append
+/// - If update msg has no "id" → generate one and append
+fn applyAddMessages(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    // Parse update: single object or array of objects
+    const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{});
+    var update_msgs = json.Array.init(arena_alloc);
+    if (update_parsed.value == .array) {
+        for (update_parsed.value.array.items) |item| {
+            try update_msgs.append(item);
+        }
+    } else if (update_parsed.value == .object) {
+        try update_msgs.append(update_parsed.value);
+    } else {
+        return try alloc.dupe(u8, update_json);
+    }
+
+    // Parse old array or start empty
+    var result_msgs = json.Array.init(arena_alloc);
+    if (old_json) |old| {
+        if (old.len > 0) {
+            const old_parsed = try json.parseFromSlice(json.Value, arena_alloc, old, .{});
+            if (old_parsed.value == .array) {
+                for (old_parsed.value.array.items) |item| {
+                    try result_msgs.append(item);
+                }
+            }
+        }
+    }
+
+    // Process each update message
+    for (update_msgs.items) |msg| {
+        if (msg != .object) continue;
+
+        const msg_id: ?[]const u8 = blk: {
+            if (msg.object.get("id")) |id_val| {
+                if (id_val == .string) break :blk id_val.string;
+            }
+            break :blk null;
+        };
+
+        // Check for remove flag
+        const is_remove = blk: {
+            if (msg.object.get("remove")) |rm_val| {
+                if (rm_val == .bool) break :blk rm_val.bool;
+            }
+            break :blk false;
+        };
+
+        if (is_remove) {
+            if (msg_id) |id| {
+                // Filter out the message with matching id
+                var filtered = json.Array.init(arena_alloc);
+                for (result_msgs.items) |existing| {
+                    if (existing == .object) {
+                        if (existing.object.get("id")) |eid| {
+                            if (eid == .string and std.mem.eql(u8, eid.string, id)) {
+                                continue; // skip — removing this message
+                            }
+                        }
+                    }
+                    try filtered.append(existing);
+                }
+                result_msgs = filtered;
+            }
+            continue;
+        }
+
+        if (msg_id) |id| {
+            // Try to find and replace existing message with same id
+            var replaced = false;
+            for (result_msgs.items, 0..) |existing, i| {
+                if (existing == .object) {
+                    if (existing.object.get("id")) |eid| {
+                        if (eid == .string and std.mem.eql(u8, eid.string, id)) {
+                            result_msgs.items[i] = msg;
+                            replaced = true;
+                            break;
+                        }
+                    }
+                }
+            }
+            if (!replaced) {
+                try result_msgs.append(msg);
+            }
+        } else {
+            // No id — generate one and append
+            var msg_copy = json.ObjectMap.init(arena_alloc);
+            var it = msg.object.iterator();
+            while (it.next()) |entry| {
+                try msg_copy.put(entry.key_ptr.*, entry.value_ptr.*);
+            }
+            const gen_id = try std.fmt.allocPrint(arena_alloc, "msg_{d}", .{result_msgs.items.len});
+            try msg_copy.put("id", json.Value{ .string = gen_id });
+            try result_msgs.append(json.Value{ .object = msg_copy });
+        }
+    }
+
+    const result = try serializeValue(arena_alloc, json.Value{ .array = result_msgs });
+    return try alloc.dupe(u8, result);
+}
+
+// ── Ephemeral State Keys ──────────────────────────────────────────────
+
+/// Strip ephemeral keys from state before checkpoint persistence.
+/// Parses the schema for keys with `"ephemeral": true` and removes
+/// those keys from the state JSON. Returns a new JSON string.
+pub fn stripEphemeralKeys(alloc: Allocator, state_json: []const u8, schema_json: []const u8) ![]const u8 {
+    var arena = std.heap.ArenaAllocator.init(alloc);
+    defer arena.deinit();
+    const arena_alloc = arena.allocator();
+
+    // Parse schema to find ephemeral keys
+    const schema_parsed = try json.parseFromSlice(json.Value, arena_alloc, schema_json, .{});
+    if (schema_parsed.value != .object) return try alloc.dupe(u8, state_json);
+
+    var ephemeral_keys = std.StringHashMap(void).init(arena_alloc);
+    var schema_it = schema_parsed.value.object.iterator();
+    while (schema_it.next()) |entry| {
+        const schema_entry = entry.value_ptr.*;
+        if (schema_entry == .object) {
+            if (schema_entry.object.get("ephemeral")) |eph_val| {
+                if (eph_val == .bool and eph_val.bool) {
+                    try ephemeral_keys.put(entry.key_ptr.*, {});
+                }
+            }
+        }
+    }
+
+    if (ephemeral_keys.count() == 0) return try alloc.dupe(u8, state_json);
+
+    // Parse state and remove ephemeral keys
+    const state_parsed = try json.parseFromSlice(json.Value, arena_alloc, state_json, .{});
+    if (state_parsed.value != .object) return try alloc.dupe(u8, state_json);
+
+    var result_obj = json.ObjectMap.init(arena_alloc);
+    var state_it = state_parsed.value.object.iterator();
+    while (state_it.next()) |entry| {
+        if (ephemeral_keys.get(entry.key_ptr.*) == null) {
+            try result_obj.put(entry.key_ptr.*, entry.value_ptr.*);
+        }
+    }
+
+    const result_str = try serializeValue(arena_alloc, json.Value{ .object = result_obj });
+    return try alloc.dupe(u8, result_str);
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────
+
+fn parseTestJson(alloc: Allocator, json_str: []const u8) !json.Parsed(json.Value) {
+    return try json.parseFromSlice(json.Value, alloc, json_str, .{});
+}
+
+test "last_value reducer" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .last_value, "\"old\"", "\"new\"");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("\"new\"", result);
+}
+
+test "add reducer" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .add, "10", "5");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("15", result);
+}
+
+test "add reducer with null old" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .add, null, "7");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("7", result);
+}
+
+test "append reducer" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .append, "[1,2]", "3");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("[1,2,3]", result);
+}
+
+test "append reducer with null old" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .append, null, "\"hello\"");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("[\"hello\"]", result);
+}
+
+test "merge reducer - flat objects" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .merge, "{\"a\":1,\"b\":2}", "{\"b\":3,\"c\":4}");
+    defer alloc.free(result);
+    // Parse result to check keys since JSON object key order is not guaranteed
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .object);
+
+    const a = parsed.value.object.get("a") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqual(@as(i64, 1), a.integer);
+
+    const b = parsed.value.object.get("b") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqual(@as(i64, 3), b.integer);
+
+    const c = parsed.value.object.get("c") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqual(@as(i64, 4), c.integer);
+}
+
+test "merge reducer - null old" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .merge, null, "{\"x\":1}");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("{\"x\":1}", result);
+}
+
+test "min reducer" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .min, "10", "3");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("3", result);
+}
+
+test "max reducer" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .max, "10", "3");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("10", result);
+}
+
+test "applyUpdates with mixed reducers" {
+    const alloc = std.testing.allocator;
+    const state =
+        \\{"count":5,"messages":["hello"],"config":{"a":1}}
+    ;
+    const updates =
+        \\{"count":3,"messages":"world","config":{"b":2}}
+    ;
+    const schema =
+        \\{"count":{"type":"number","reducer":"add"},"messages":{"type":"array","reducer":"append"},"config":{"type":"object","reducer":"merge"}}
+    ;
+
+    const result = try applyUpdates(alloc, state, updates, schema);
+    defer alloc.free(result);
+
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .object);
+
+    // count: 5 + 3 = 8
+    const count = parsed.value.object.get("count") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqual(@as(i64, 8), count.integer);
+
+    // messages: ["hello"] + "world" = ["hello","world"]
+    const messages = parsed.value.object.get("messages") orelse return error.TestUnexpectedResult;
+    try std.testing.expect(messages == .array);
+    try std.testing.expectEqual(@as(usize, 2), messages.array.items.len);
+
+    // config: merge {a:1} + {b:2} = {a:1, b:2}
+    const config = parsed.value.object.get("config") orelse return error.TestUnexpectedResult;
+    try std.testing.expect(config == .object);
+    try std.testing.expect(config.object.get("a") != null);
+    try std.testing.expect(config.object.get("b") != null);
+}
+
+test "initState with defaults" {
+    const alloc = std.testing.allocator;
+    const input =
+        \\{"prompt":"hi"}
+    ;
+    const schema =
+        \\{"prompt":{"type":"string","reducer":"last_value"},"messages":{"type":"array","reducer":"append"},"count":{"type":"number","reducer":"add"},"done":{"type":"boolean","reducer":"last_value"},"meta":{"type":"object","reducer":"merge"}}
+    ;
+
+    const result = try initState(alloc, input, schema);
+    defer alloc.free(result);
+
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .object);
+
+    // prompt should be from input
+    const prompt = parsed.value.object.get("prompt") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("hi", prompt.string);
+
+    // messages should default to []
+    const messages = parsed.value.object.get("messages") orelse return error.TestUnexpectedResult;
+    try std.testing.expect(messages == .array);
+    try std.testing.expectEqual(@as(usize, 0), messages.array.items.len);
+
+    // count should default to 0
+    const count = parsed.value.object.get("count") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqual(@as(i64, 0), count.integer);
+
+    // done should default to false
+    const done = parsed.value.object.get("done") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqual(false, done.bool);
+
+    // meta should default to {}
+    const meta = parsed.value.object.get("meta") orelse return error.TestUnexpectedResult;
+    try std.testing.expect(meta == .object);
+    try std.testing.expectEqual(@as(usize, 0), meta.object.count());
+}
+
+test "getStateValue simple key" {
+    const alloc = std.testing.allocator;
+    const state =
+        \\{"prompt":"hello","count":42}
+    ;
+    const result = try getStateValue(alloc, state, "state.prompt");
+    defer if (result) |r| alloc.free(r);
+    try std.testing.expect(result != null);
+    try std.testing.expectEqualStrings("\"hello\"", result.?);
+}
+
+test "getStateValue nested" {
+    const alloc = std.testing.allocator;
+    const state =
+        \\{"plan":{"files":["a.zig","b.zig"]}}
+    ;
+    const result = try getStateValue(alloc, state, "state.plan.files");
+    defer if (result) |r| alloc.free(r);
+    try std.testing.expect(result != null);
+    try std.testing.expectEqualStrings("[\"a.zig\",\"b.zig\"]", result.?);
+}
+
+test "getStateValue array last element" {
+    const alloc = std.testing.allocator;
+    const state =
+        \\{"messages":["first","second","third"]}
+    ;
+    const result = try getStateValue(alloc, state, "state.messages[-1]");
+    defer if (result) |r| alloc.free(r);
+    try std.testing.expect(result != null);
+    try std.testing.expectEqualStrings("\"third\"", result.?);
+}
+
+test "stringifyForRoute boolean" {
+    const alloc = std.testing.allocator;
+    const result_true = try stringifyForRoute(alloc, "true");
+    defer alloc.free(result_true);
+    try std.testing.expectEqualStrings("true", result_true);
+
+    const result_false = try stringifyForRoute(alloc, "false");
+    defer alloc.free(result_false);
+    try std.testing.expectEqualStrings("false", result_false);
+}
+
+test "stringifyForRoute number" {
+    const alloc = std.testing.allocator;
+    const result = try stringifyForRoute(alloc, "42");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("42", result);
+}
+
+test "stringifyForRoute string" {
+    const alloc = std.testing.allocator;
+    const result = try stringifyForRoute(alloc, "\"hello world\"");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("hello world", result);
+}
+
+test "add_messages reducer - append new" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .add_messages,
+        \\[{"id":"1","text":"hello"}]
+    ,
+        \\{"id":"2","text":"world"}
+    );
+    defer alloc.free(result);
+    // Parse and verify: should be array with 2 messages
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .array);
+    try std.testing.expectEqual(@as(usize, 2), parsed.value.array.items.len);
+    // First message id=1
+    const m0 = parsed.value.array.items[0];
+    try std.testing.expect(m0 == .object);
+    const id0 = m0.object.get("id") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("1", id0.string);
+    // Second message id=2
+    const m1 = parsed.value.array.items[1];
+    try std.testing.expect(m1 == .object);
+    const id1 = m1.object.get("id") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("2", id1.string);
+    const text1 = m1.object.get("text") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("world", text1.string);
+}
+
+test "add_messages reducer - replace by id" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .add_messages,
+        \\[{"id":"1","text":"old"}]
+    ,
+        \\{"id":"1","text":"new"}
+    );
+    defer alloc.free(result);
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .array);
+    try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len);
+    const m0 = parsed.value.array.items[0];
+    const text = m0.object.get("text") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("new", text.string);
+}
+
+test "add_messages reducer - remove by id" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .add_messages,
+        \\[{"id":"1","text":"hello"},{"id":"2","text":"world"}]
+    ,
+        \\{"id":"1","remove":true}
+    );
+    defer alloc.free(result);
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .array);
+    try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len);
+    const m0 = parsed.value.array.items[0];
+    const id0 = m0.object.get("id") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("2", id0.string);
+}
+
+test "add_messages reducer - null old" {
+    const alloc = std.testing.allocator;
+    const result = try applyReducer(alloc, .add_messages, null,
+        \\{"id":"1","text":"first"}
+    );
+    defer alloc.free(result);
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .array);
+    try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len);
+    const m0 = parsed.value.array.items[0];
+    const id0 = m0.object.get("id") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("1", id0.string);
+    const text0 = m0.object.get("text") orelse return error.TestUnexpectedResult;
+    try std.testing.expectEqualStrings("first", text0.string);
+}
+
+test "overwrite bypasses reducer" {
+    const alloc = std.testing.allocator;
+    // count has "add" reducer, but __overwrite should bypass it
+    const state =
+        \\{"count":10}
+    ;
+    const updates =
+        \\{"count":{"__overwrite":true,"value":42}}
+    ;
+    const schema =
+        \\{"count":{"type":"number","reducer":"add"}}
+    ;
+
+    const result = try applyUpdates(alloc, state, updates, schema);
+    defer alloc.free(result);
+
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .object);
+    const count = parsed.value.object.get("count") orelse return error.TestUnexpectedResult;
+    // Should be 42 (overwritten), not 52 (10 + 42 via add reducer)
+    try std.testing.expectEqual(@as(i64, 42), count.integer);
+}
+
+test "overwrite with array value" {
+    const alloc = std.testing.allocator;
+    const state =
+        \\{"items":[1,2,3]}
+    ;
+    const updates =
+        \\{"items":{"__overwrite":true,"value":[99]}}
+    ;
+    const schema =
+        \\{"items":{"type":"array","reducer":"append"}}
+    ;
+
+    const result = try applyUpdates(alloc, state, updates, schema);
+    defer alloc.free(result);
+
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    const items = parsed.value.object.get("items") orelse return error.TestUnexpectedResult;
+    try std.testing.expect(items == .array);
+    // Should be [99] (overwritten), not [1,2,3,99] (appended)
+    try std.testing.expectEqual(@as(usize, 1), items.array.items.len);
+    try std.testing.expectEqual(@as(i64, 99), items.array.items[0].integer);
+}
+
+test "stripEphemeralKeys removes ephemeral keys" {
+    const alloc = std.testing.allocator;
+    const state =
+        \\{"messages":["hello"],"temp_data":"scratch","count":5}
+    ;
+    const schema =
+        \\{"messages":{"type":"array","reducer":"append"},"temp_data":{"type":"string","reducer":"last_value","ephemeral":true},"count":{"type":"number","reducer":"add"}}
+    ;
+
+    const result = try stripEphemeralKeys(alloc, state, schema);
+    defer alloc.free(result);
+
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .object);
+    // temp_data should be stripped
+    try std.testing.expect(parsed.value.object.get("temp_data") == null);
+    // messages and count should remain
+    try std.testing.expect(parsed.value.object.get("messages") != null);
+    try std.testing.expect(parsed.value.object.get("count") != null);
+}
+
+test "stripEphemeralKeys no-op when no ephemeral keys" {
+    const alloc = std.testing.allocator;
+    const state =
+        \\{"messages":["hello"],"count":5}
+    ;
+    const schema =
+        \\{"messages":{"type":"array","reducer":"append"},"count":{"type":"number","reducer":"add"}}
+    ;
+
+    const result = try stripEphemeralKeys(alloc, state, schema);
+    defer alloc.free(result);
+
+    const parsed = try parseTestJson(alloc, result);
+    defer parsed.deinit();
+    try std.testing.expect(parsed.value == .object);
+    try std.testing.expect(parsed.value.object.get("messages") != null);
+    try std.testing.expect(parsed.value.object.get("count") != null);
+}
+
+test "stripEphemeralKeys with empty state" {
+    const alloc = std.testing.allocator;
+    const result = try stripEphemeralKeys(alloc, "{}", "{}");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("{}", result);
+}
diff --git a/src/store.zig b/src/store.zig
index 4eeb2dc..ebf0ca5 100644
--- a/src/store.zig
+++ b/src/store.zig
@@ -140,6 +140,17 @@ pub const Store = struct {
             }
             return error.MigrationFailed;
         }
+
+        // Migration 004 — orchestration schema (workflows, checkpoints, agent_events)
+        const sql_004 = @embedFile("migrations/004_orchestration.sql");
+        prc = c.sqlite3_exec(self.db, sql_004.ptr, null, null, &err_msg);
+        if (prc != c.SQLITE_OK) {
+            if (err_msg) |msg| {
+                log.err("migration 004 failed (rc={d}): {s}", .{ prc, std.mem.span(msg) });
+                c.sqlite3_free(msg);
+            }
+            return error.MigrationFailed;
+        }
     }
 
     pub fn beginTransaction(self: *Self) !void {
@@ -392,7 +403,7 @@ pub const Store = struct {
     }
 
     pub fn getRun(self: *Self, allocator: std.mem.Allocator, id: []const u8) !?types.RunRow {
-        const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE id = ?";
+        const sql = "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id FROM runs WHERE id = ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
@@ -407,19 +418,23 @@ pub const Store = struct {
             .id = try allocStr(allocator, stmt, 0),
             .idempotency_key = try allocStrOpt(allocator, stmt, 1),
             .status = try allocStr(allocator, stmt, 2),
-            .workflow_json = try allocStr(allocator, stmt, 3),
-            .input_json = try allocStr(allocator, stmt, 4),
-            .callbacks_json = try allocStr(allocator, stmt, 5),
-            .error_text = try allocStrOpt(allocator, stmt, 6),
-            .created_at_ms = colInt(stmt, 7),
-            .updated_at_ms = colInt(stmt, 8),
-            .started_at_ms = colIntOpt(stmt, 9),
-            .ended_at_ms = colIntOpt(stmt, 10),
+            .workflow_id = try allocStrOpt(allocator, stmt, 3),
+            .workflow_json = try allocStr(allocator, stmt, 4),
+            .input_json = try allocStr(allocator, stmt, 5),
+            .callbacks_json = try allocStr(allocator, stmt, 6),
+            .error_text = try allocStrOpt(allocator, stmt, 7),
+            .created_at_ms = colInt(stmt, 8),
+            .updated_at_ms = colInt(stmt, 9),
+            .started_at_ms = colIntOpt(stmt, 10),
+            .ended_at_ms = colIntOpt(stmt, 11),
+            .state_json = try allocStrOpt(allocator, stmt, 12),
+            .config_json = try allocStrOpt(allocator, stmt, 13),
+            .parent_run_id = try allocStrOpt(allocator, stmt, 14),
         };
     }
 
     pub fn getRunByIdempotencyKey(self: *Self, allocator: std.mem.Allocator, key: []const u8) !?types.RunRow {
-        const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE idempotency_key = ? ORDER BY created_at_ms DESC LIMIT 1";
+        const sql = "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id FROM runs WHERE idempotency_key = ? ORDER BY created_at_ms DESC LIMIT 1";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
@@ -433,35 +448,45 @@ pub const Store = struct {
             .id = try allocStr(allocator, stmt, 0),
             .idempotency_key = try allocStrOpt(allocator, stmt, 1),
             .status = try allocStr(allocator, stmt, 2),
-            .workflow_json = try allocStr(allocator, stmt, 3),
-            .input_json = try allocStr(allocator, stmt, 4),
-            .callbacks_json = try allocStr(allocator, stmt, 5),
-            .error_text = try allocStrOpt(allocator, stmt, 6),
-            .created_at_ms = colInt(stmt, 7),
-            .updated_at_ms = colInt(stmt, 8),
-            .started_at_ms = colIntOpt(stmt, 9),
-            .ended_at_ms = colIntOpt(stmt, 10),
+            .workflow_id = try allocStrOpt(allocator, stmt, 3),
+            .workflow_json = try allocStr(allocator, stmt, 4),
+            .input_json = try allocStr(allocator, stmt, 5),
+            .callbacks_json = try allocStr(allocator, stmt, 6),
+            .error_text = try allocStrOpt(allocator, stmt, 7),
+            .created_at_ms = colInt(stmt, 8),
+            .updated_at_ms = colInt(stmt, 9),
+            .started_at_ms = colIntOpt(stmt, 10),
+            .ended_at_ms = colIntOpt(stmt, 11),
+            .state_json = try allocStrOpt(allocator, stmt, 12),
+            .config_json = try allocStrOpt(allocator, stmt, 13),
+            .parent_run_id = try allocStrOpt(allocator, stmt, 14),
         };
     }
 
-    pub fn listRuns(self: *Self, allocator: std.mem.Allocator, status_filter: ?[]const u8, limit: i64, offset: i64) ![]types.RunRow {
+    pub fn listRuns(self: *Self, allocator: std.mem.Allocator, status_filter: ?[]const u8, workflow_id_filter: ?[]const u8, limit: i64, offset: i64) ![]types.RunRow {
         var stmt: ?*c.sqlite3_stmt = null;
-        if (status_filter != null) {
-            const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE status = ? ORDER BY created_at_ms DESC LIMIT ? OFFSET ?";
-            if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
-                return error.SqlitePrepareFailed;
-            }
-            _ = c.sqlite3_bind_text(stmt, 1, status_filter.?.ptr, @intCast(status_filter.?.len), SQLITE_STATIC);
-            _ = c.sqlite3_bind_int64(stmt, 2, limit);
-            _ = c.sqlite3_bind_int64(stmt, 3, offset);
+        const sql =
+            "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id " ++
+            "FROM runs WHERE (? IS NULL OR status = ?) AND (? IS NULL OR workflow_id = ?) ORDER BY created_at_ms DESC LIMIT ? OFFSET ?";
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        if (status_filter) |status| {
+            _ = c.sqlite3_bind_text(stmt, 1, status.ptr, @intCast(status.len), SQLITE_STATIC);
+            _ = c.sqlite3_bind_text(stmt, 2, status.ptr, @intCast(status.len), SQLITE_STATIC);
         } else {
-            const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs ORDER BY created_at_ms DESC LIMIT ? OFFSET ?";
-            if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
-                return error.SqlitePrepareFailed;
-            }
-            _ = c.sqlite3_bind_int64(stmt, 1, limit);
-            _ = c.sqlite3_bind_int64(stmt, 2, offset);
+            _ = c.sqlite3_bind_null(stmt, 1);
+            _ = c.sqlite3_bind_null(stmt, 2);
+        }
+        if (workflow_id_filter) |workflow_id| {
+            _ = c.sqlite3_bind_text(stmt, 3, workflow_id.ptr, @intCast(workflow_id.len), SQLITE_STATIC);
+            _ = c.sqlite3_bind_text(stmt, 4, workflow_id.ptr, @intCast(workflow_id.len), SQLITE_STATIC);
+        } else {
+            _ = c.sqlite3_bind_null(stmt, 3);
+            _ = c.sqlite3_bind_null(stmt, 4);
         }
+        _ = c.sqlite3_bind_int64(stmt, 5, limit);
+        _ = c.sqlite3_bind_int64(stmt, 6, offset);
         defer _ = c.sqlite3_finalize(stmt);
 
         var list: std.ArrayListUnmanaged(types.RunRow) = .empty;
@@ -470,14 +495,18 @@ pub const Store = struct {
                 .id = try allocStr(allocator, stmt, 0),
                 .idempotency_key = try allocStrOpt(allocator, stmt, 1),
                 .status = try allocStr(allocator, stmt, 2),
-                .workflow_json = try allocStr(allocator, stmt, 3),
-                .input_json = try allocStr(allocator, stmt, 4),
-                .callbacks_json = try allocStr(allocator, stmt, 5),
-                .error_text = try allocStrOpt(allocator, stmt, 6),
-                .created_at_ms = colInt(stmt, 7),
-                .updated_at_ms = colInt(stmt, 8),
-                .started_at_ms = colIntOpt(stmt, 9),
-                .ended_at_ms = colIntOpt(stmt, 10),
+                .workflow_id = try allocStrOpt(allocator, stmt, 3),
+                .workflow_json = try allocStr(allocator, stmt, 4),
+                .input_json = try allocStr(allocator, stmt, 5),
+                .callbacks_json = try allocStr(allocator, stmt, 6),
+                .error_text = try allocStrOpt(allocator, stmt, 7),
+                .created_at_ms = colInt(stmt, 8),
+                .updated_at_ms = colInt(stmt, 9),
+                .started_at_ms = colIntOpt(stmt, 10),
+                .ended_at_ms = colIntOpt(stmt, 11),
+                .state_json = try allocStrOpt(allocator, stmt, 12),
+                .config_json = try allocStrOpt(allocator, stmt, 13),
+                .parent_run_id = try allocStrOpt(allocator, stmt, 14),
             });
         }
         return list.toOwnedSlice(allocator);
@@ -502,7 +531,7 @@ pub const Store = struct {
     }
 
     pub fn getActiveRuns(self: *Self, allocator: std.mem.Allocator) ![]types.RunRow {
-        const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE status IN ('running', 'paused') ORDER BY created_at_ms DESC";
+        const sql = "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id FROM runs WHERE status = 'running' ORDER BY created_at_ms DESC";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
@@ -515,14 +544,18 @@ pub const Store = struct {
                 .id = try allocStr(allocator, stmt, 0),
                 .idempotency_key = try allocStrOpt(allocator, stmt, 1),
                 .status = try allocStr(allocator, stmt, 2),
-                .workflow_json = try allocStr(allocator, stmt, 3),
-                .input_json = try allocStr(allocator, stmt, 4),
-                .callbacks_json = try allocStr(allocator, stmt, 5),
-                .error_text = try allocStrOpt(allocator, stmt, 6),
-                .created_at_ms = colInt(stmt, 7),
-                .updated_at_ms = colInt(stmt, 8),
-                .started_at_ms = colIntOpt(stmt, 9),
-                .ended_at_ms = colIntOpt(stmt, 10),
+                .workflow_id = try allocStrOpt(allocator, stmt, 3),
+                .workflow_json = try allocStr(allocator, stmt, 4),
+                .input_json = try allocStr(allocator, stmt, 5),
+                .callbacks_json = try allocStr(allocator, stmt, 6),
+                .error_text = try allocStrOpt(allocator, stmt, 7),
+                .created_at_ms = colInt(stmt, 8),
+                .updated_at_ms = colInt(stmt, 9),
+                .started_at_ms = colIntOpt(stmt, 10),
+                .ended_at_ms = colIntOpt(stmt, 11),
+                .state_json = try allocStrOpt(allocator, stmt, 12),
+                .config_json = try allocStrOpt(allocator, stmt, 13),
+                .parent_run_id = try allocStrOpt(allocator, stmt, 14),
             });
         }
         return list.toOwnedSlice(allocator);
@@ -693,13 +726,10 @@ pub const Store = struct {
         }
     }
 
-    pub fn getReadySteps(self: *Self, allocator: std.mem.Allocator, run_id: []const u8) ![]types.StepRow {
-        const sql =
-            "SELECT s.id, s.run_id, s.def_step_id, s.type, s.status, s.worker_id, s.input_json, s.output_json, s.error_text, s.attempt, s.max_attempts, s.timeout_ms, s.next_attempt_at_ms, s.parent_step_id, s.item_index, s.created_at_ms, s.updated_at_ms, s.started_at_ms, s.ended_at_ms, s.child_run_id, s.iteration_index " ++
-            "FROM steps s WHERE s.run_id = ? AND s.status = 'ready' " ++
-            "AND NOT EXISTS (" ++
-            "SELECT 1 FROM step_deps d JOIN steps dep ON dep.id = d.depends_on " ++
-            "WHERE d.step_id = s.id AND dep.status NOT IN ('completed', 'skipped'))";
+    /// Get a retrying step for a given run and node name (def_step_id).
+    /// Returns the step if it exists with status='ready' and next_attempt_at_ms set.
+    pub fn getRetryingStepForNode(self: *Self, allocator: std.mem.Allocator, run_id: []const u8, node_name: []const u8) !?types.StepRow {
+        const sql = "SELECT id, run_id, def_step_id, type, status, worker_id, input_json, output_json, error_text, attempt, max_attempts, timeout_ms, next_attempt_at_ms, parent_step_id, item_index, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, child_run_id, iteration_index FROM steps WHERE run_id = ? AND def_step_id = ? AND status = 'ready' AND next_attempt_at_ms IS NOT NULL ORDER BY created_at_ms DESC LIMIT 1";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
@@ -707,12 +737,11 @@ pub const Store = struct {
         defer _ = c.sqlite3_finalize(stmt);
 
         _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, node_name.ptr, @intCast(node_name.len), SQLITE_STATIC);
 
-        var list: std.ArrayListUnmanaged(types.StepRow) = .empty;
-        while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-            try list.append(allocator, try readStepRow(allocator, stmt));
-        }
-        return list.toOwnedSlice(allocator);
+        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null;
+
+        return try readStepRow(allocator, stmt);
     }
 
     pub fn countStepsByStatus(self: *Self, run_id: []const u8, status: []const u8) !i64 {
@@ -747,55 +776,55 @@ pub const Store = struct {
         return list.toOwnedSlice(allocator);
     }
 
-    /// Get the IDs of steps that a given step depends on.
-    pub fn getStepDeps(self: *Self, allocator: std.mem.Allocator, step_id: []const u8) ![][]const u8 {
-        const sql = "SELECT depends_on FROM step_deps WHERE step_id = ?";
+    /// Delete steps for a run that were created after a given timestamp.
+    /// Used during replay to remove steps that will be re-executed.
+    pub fn deleteStepsAfterTimestamp(self: *Self, run_id: []const u8, after_ms: i64) !void {
+        const sql = "DELETE FROM steps WHERE run_id = ? AND created_at_ms > ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 2, after_ms);
 
-        var list: std.ArrayListUnmanaged([]const u8) = .empty;
-        while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-            try list.append(allocator, try allocStr(allocator, stmt, 0));
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
         }
-        return list.toOwnedSlice(allocator);
     }
 
-    /// Count how many running tasks a worker currently has.
-    pub fn countRunningStepsByWorker(self: *Self, worker_id: []const u8) !i64 {
-        const sql = "SELECT COUNT(*) FROM steps WHERE worker_id = ? AND status = 'running'";
+    /// Delete checkpoints for a run with version greater than a given version.
+    /// Used during replay to remove checkpoints that will be superseded.
+    pub fn deleteCheckpointsAfterVersion(self: *Self, run_id: []const u8, after_version: i64) !void {
+        const sql = "DELETE FROM checkpoints WHERE run_id = ? AND version > ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, worker_id.ptr, @intCast(worker_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 2, after_version);
 
-        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return 0;
-        return colInt(stmt, 0);
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
     }
 
-    /// Set started_at_ms for a step (used by wait steps to track timer start).
-    pub fn setStepStartedAt(self: *Self, step_id: []const u8, ts_ms: i64) !void {
-        const sql = "UPDATE steps SET started_at_ms = ?, updated_at_ms = ? WHERE id = ?";
+    /// Count how many running tasks a worker currently has.
+    pub fn countRunningStepsByWorker(self: *Self, worker_id: []const u8) !i64 {
+        const sql = "SELECT COUNT(*) FROM steps WHERE worker_id = ? AND status = 'running'";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_int64(stmt, 1, ts_ms);
-        _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs());
-        _ = c.sqlite3_bind_text(stmt, 3, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 1, worker_id.ptr, @intCast(worker_id.len), SQLITE_STATIC);
 
-        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
-            return error.SqliteStepFailed;
-        }
+        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return 0;
+        return colInt(stmt, 0);
     }
 
     fn readStepRow(allocator: std.mem.Allocator, stmt: ?*c.sqlite3_stmt) !types.StepRow {
@@ -1084,134 +1113,268 @@ pub const Store = struct {
         };
     }
 
-    // ── Cycle State CRUD ─────────────────────────────────────────────
+    // ── Sub-workflow Helper ──────────────────────────────────────────
 
-    pub fn getCycleState(self: *Self, run_id: []const u8, cycle_key: []const u8) !?struct { iteration_count: i64, max_iterations: i64 } {
-        const sql = "SELECT iteration_count, max_iterations FROM cycle_state WHERE run_id = ? AND cycle_key = ?";
+    pub fn updateStepInputJson(self: *Self, step_id: []const u8, input_json: []const u8) !void {
+        const sql = "UPDATE steps SET input_json = ? WHERE id = ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 2, cycle_key.ptr, @intCast(cycle_key.len), SQLITE_STATIC);
-
-        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null;
+        _ = c.sqlite3_bind_text(stmt, 1, input_json.ptr, @intCast(input_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
 
-        return .{
-            .iteration_count = colInt(stmt, 0),
-            .max_iterations = colInt(stmt, 1),
-        };
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
     }
 
-    pub fn upsertCycleState(self: *Self, run_id: []const u8, cycle_key: []const u8, iteration_count: i64, max_iterations: i64) !void {
-        const sql = "INSERT OR REPLACE INTO cycle_state (run_id, cycle_key, iteration_count, max_iterations) VALUES (?, ?, ?, ?)";
+    pub fn updateStepChildRunId(self: *Self, step_id: []const u8, child_run_id: []const u8) !void {
+        const sql = "UPDATE steps SET child_run_id = ? WHERE id = ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 2, cycle_key.ptr, @intCast(cycle_key.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_int64(stmt, 3, iteration_count);
-        _ = c.sqlite3_bind_int64(stmt, 4, max_iterations);
+        _ = c.sqlite3_bind_text(stmt, 1, child_run_id.ptr, @intCast(child_run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
 
         if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
             return error.SqliteStepFailed;
         }
     }
 
-    // ── Chat Message CRUD ────────────────────────────────────────────
+    // ── Workflow CRUD ─────────────────────────────────────────────────
+
+    pub fn createWorkflow(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8) !void {
+        return self.createWorkflowWithVersion(id, name, definition_json, 1);
+    }
 
-    pub fn insertChatMessage(self: *Self, run_id: []const u8, step_id: []const u8, round: i64, role: []const u8, worker_id: ?[]const u8, message: []const u8) !void {
-        const sql = "INSERT INTO chat_messages (run_id, step_id, round, role, worker_id, message, ts_ms) VALUES (?, ?, ?, ?, ?, ?, ?)";
+    pub fn createWorkflowWithVersion(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8, version: i64) !void {
+        const sql = "INSERT INTO workflows (id, name, definition_json, version, created_at_ms, updated_at_ms) VALUES (?, ?, ?, ?, ?, ?)";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_int64(stmt, 3, round);
-        _ = c.sqlite3_bind_text(stmt, 4, role.ptr, @intCast(role.len), SQLITE_STATIC);
-        bindTextOpt(stmt, 5, worker_id);
-        _ = c.sqlite3_bind_text(stmt, 6, message.ptr, @intCast(message.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_int64(stmt, 7, ids.nowMs());
+        const now = ids.nowMs();
+        _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, name.ptr, @intCast(name.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 3, definition_json.ptr, @intCast(definition_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 4, version);
+        _ = c.sqlite3_bind_int64(stmt, 5, now);
+        _ = c.sqlite3_bind_int64(stmt, 6, now);
 
         if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
             return error.SqliteStepFailed;
         }
     }
 
-    pub fn getChatMessages(self: *Self, allocator: std.mem.Allocator, step_id: []const u8) ![]types.ChatMessageRow {
-        const sql = "SELECT id, run_id, step_id, round, role, worker_id, message, ts_ms FROM chat_messages WHERE step_id = ? ORDER BY round, id";
+    pub fn getWorkflow(self: *Self, alloc: std.mem.Allocator, id: []const u8) !?types.WorkflowRow {
+        const sql = "SELECT id, name, definition_json, version, created_at_ms, updated_at_ms FROM workflows WHERE id = ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null;
+
+        return types.WorkflowRow{
+            .id = try allocStr(alloc, stmt, 0),
+            .name = try allocStr(alloc, stmt, 1),
+            .definition_json = try allocStr(alloc, stmt, 2),
+            .version = colInt(stmt, 3),
+            .created_at_ms = colInt(stmt, 4),
+            .updated_at_ms = colInt(stmt, 5),
+        };
+    }
+
+    pub fn listWorkflows(self: *Self, alloc: std.mem.Allocator) ![]types.WorkflowRow {
+        const sql = "SELECT id, name, definition_json, version, created_at_ms, updated_at_ms FROM workflows ORDER BY created_at_ms DESC";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
 
-        var list: std.ArrayListUnmanaged(types.ChatMessageRow) = .empty;
+        var list: std.ArrayListUnmanaged(types.WorkflowRow) = .empty;
         while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-            try list.append(allocator, .{
-                .id = colInt(stmt, 0),
-                .run_id = try allocStr(allocator, stmt, 1),
-                .step_id = try allocStr(allocator, stmt, 2),
-                .round = colInt(stmt, 3),
-                .role = try allocStr(allocator, stmt, 4),
-                .worker_id = try allocStrOpt(allocator, stmt, 5),
-                .message = try allocStr(allocator, stmt, 6),
-                .ts_ms = colInt(stmt, 7),
+            try list.append(alloc, .{
+                .id = try allocStr(alloc, stmt, 0),
+                .name = try allocStr(alloc, stmt, 1),
+                .definition_json = try allocStr(alloc, stmt, 2),
+                .version = colInt(stmt, 3),
+                .created_at_ms = colInt(stmt, 4),
+                .updated_at_ms = colInt(stmt, 5),
             });
         }
-        return list.toOwnedSlice(allocator);
+        return list.toOwnedSlice(alloc);
+    }
+
+    pub fn updateWorkflow(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8) !void {
+        return self.updateWorkflowWithVersion(id, name, definition_json, null);
     }
 
-    // ── Saga State CRUD ──────────────────────────────────────────────
+    pub fn updateWorkflowWithVersion(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8, version: ?i64) !void {
+        if (version) |v| {
+            const sql = "UPDATE workflows SET name = ?, definition_json = ?, version = ?, updated_at_ms = ? WHERE id = ?";
+            var stmt: ?*c.sqlite3_stmt = null;
+            if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+                return error.SqlitePrepareFailed;
+            }
+            defer _ = c.sqlite3_finalize(stmt);
+
+            _ = c.sqlite3_bind_text(stmt, 1, name.ptr, @intCast(name.len), SQLITE_STATIC);
+            _ = c.sqlite3_bind_text(stmt, 2, definition_json.ptr, @intCast(definition_json.len), SQLITE_STATIC);
+            _ = c.sqlite3_bind_int64(stmt, 3, v);
+            _ = c.sqlite3_bind_int64(stmt, 4, ids.nowMs());
+            _ = c.sqlite3_bind_text(stmt, 5, id.ptr, @intCast(id.len), SQLITE_STATIC);
+
+            if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+                return error.SqliteStepFailed;
+            }
+        } else {
+            const sql = "UPDATE workflows SET name = ?, definition_json = ?, updated_at_ms = ? WHERE id = ?";
+            var stmt: ?*c.sqlite3_stmt = null;
+            if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+                return error.SqlitePrepareFailed;
+            }
+            defer _ = c.sqlite3_finalize(stmt);
+
+            _ = c.sqlite3_bind_text(stmt, 1, name.ptr, @intCast(name.len), SQLITE_STATIC);
+            _ = c.sqlite3_bind_text(stmt, 2, definition_json.ptr, @intCast(definition_json.len), SQLITE_STATIC);
+            _ = c.sqlite3_bind_int64(stmt, 3, ids.nowMs());
+            _ = c.sqlite3_bind_text(stmt, 4, id.ptr, @intCast(id.len), SQLITE_STATIC);
+
+            if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+                return error.SqliteStepFailed;
+            }
+        }
+    }
 
-    pub fn insertSagaState(self: *Self, run_id: []const u8, saga_step_id: []const u8, body_step_id: []const u8, compensation_step_id: ?[]const u8) !void {
-        const sql = "INSERT INTO saga_state (run_id, saga_step_id, body_step_id, compensation_step_id, status) VALUES (?, ?, ?, ?, 'pending')";
+    pub fn deleteWorkflow(self: *Self, id: []const u8) !void {
+        const sql = "DELETE FROM workflows WHERE id = ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 2, saga_step_id.ptr, @intCast(saga_step_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 3, body_step_id.ptr, @intCast(body_step_id.len), SQLITE_STATIC);
-        bindTextOpt(stmt, 4, compensation_step_id);
+        _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC);
 
         if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
             return error.SqliteStepFailed;
         }
     }
 
-    pub fn updateSagaState(self: *Self, run_id: []const u8, saga_step_id: []const u8, body_step_id: []const u8, status: []const u8) !void {
-        const sql = "UPDATE saga_state SET status = ? WHERE run_id = ? AND saga_step_id = ? AND body_step_id = ?";
+    // ── Token Accounting ──────────────────────────────────────────────
+
+    pub fn updateStepTokens(self: *Self, step_id: []const u8, input_tokens: i64, output_tokens: i64) !void {
+        const sql = "UPDATE steps SET input_tokens = ?, output_tokens = ?, total_tokens = ? WHERE id = ?";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, status.ptr, @intCast(status.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 1, input_tokens);
+        _ = c.sqlite3_bind_int64(stmt, 2, output_tokens);
+        _ = c.sqlite3_bind_int64(stmt, 3, input_tokens + output_tokens);
+        _ = c.sqlite3_bind_text(stmt, 4, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    pub fn updateRunTokens(self: *Self, run_id: []const u8, input_delta: i64, output_delta: i64) !void {
+        const sql = "UPDATE runs SET total_input_tokens = total_input_tokens + ?, total_output_tokens = total_output_tokens + ?, total_tokens = total_tokens + ? WHERE id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_int64(stmt, 1, input_delta);
+        _ = c.sqlite3_bind_int64(stmt, 2, output_delta);
+        _ = c.sqlite3_bind_int64(stmt, 3, input_delta + output_delta);
+        _ = c.sqlite3_bind_text(stmt, 4, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    pub fn getRunTokens(self: *Self, run_id: []const u8) !struct { input: i64, output: i64, total: i64 } {
+        const sql = "SELECT COALESCE(total_input_tokens, 0), COALESCE(total_output_tokens, 0), COALESCE(total_tokens, 0) FROM runs WHERE id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) {
+            return .{ .input = 0, .output = 0, .total = 0 };
+        }
+
+        return .{
+            .input = colInt(stmt, 0),
+            .output = colInt(stmt, 1),
+            .total = colInt(stmt, 2),
+        };
+    }
+
+    // ── Checkpoint CRUD ───────────────────────────────────────────────
+
+    pub fn createCheckpoint(self: *Self, id: []const u8, run_id: []const u8, step_id: []const u8, parent_id: ?[]const u8, state_json: []const u8, completed_nodes_json: []const u8, version: i64, metadata_json: ?[]const u8) !void {
+        const sql = "INSERT INTO checkpoints (id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC);
         _ = c.sqlite3_bind_text(stmt, 2, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 3, saga_step_id.ptr, @intCast(saga_step_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 4, body_step_id.ptr, @intCast(body_step_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 3, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
+        bindTextOpt(stmt, 4, parent_id);
+        _ = c.sqlite3_bind_text(stmt, 5, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 6, completed_nodes_json.ptr, @intCast(completed_nodes_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 7, version);
+        bindTextOpt(stmt, 8, metadata_json);
+        _ = c.sqlite3_bind_int64(stmt, 9, ids.nowMs());
 
         if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
             return error.SqliteStepFailed;
         }
     }
 
-    pub fn getSagaStates(self: *Self, allocator: std.mem.Allocator, run_id: []const u8, saga_step_id: []const u8) ![]types.SagaStateRow {
-        const sql = "SELECT run_id, saga_step_id, body_step_id, compensation_step_id, status FROM saga_state WHERE run_id = ? AND saga_step_id = ? ORDER BY rowid";
+    pub fn getCheckpoint(self: *Self, alloc: std.mem.Allocator, id: []const u8) !?types.CheckpointRow {
+        const sql = "SELECT id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms FROM checkpoints WHERE id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null;
+
+        return try readCheckpointRow(alloc, stmt);
+    }
+
+    pub fn listCheckpoints(self: *Self, alloc: std.mem.Allocator, run_id: []const u8) ![]types.CheckpointRow {
+        const sql = "SELECT id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms FROM checkpoints WHERE run_id = ? ORDER BY version ASC";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
@@ -1219,88 +1382,445 @@ pub const Store = struct {
         defer _ = c.sqlite3_finalize(stmt);
 
         _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
-        _ = c.sqlite3_bind_text(stmt, 2, saga_step_id.ptr, @intCast(saga_step_id.len), SQLITE_STATIC);
 
-        var list: std.ArrayListUnmanaged(types.SagaStateRow) = .empty;
+        var list: std.ArrayListUnmanaged(types.CheckpointRow) = .empty;
         while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
-            try list.append(allocator, .{
-                .run_id = try allocStr(allocator, stmt, 0),
-                .saga_step_id = try allocStr(allocator, stmt, 1),
-                .body_step_id = try allocStr(allocator, stmt, 2),
-                .compensation_step_id = try allocStrOpt(allocator, stmt, 3),
-                .status = try allocStr(allocator, stmt, 4),
-            });
+            try list.append(alloc, try readCheckpointRow(alloc, stmt));
         }
-        return list.toOwnedSlice(allocator);
+        return list.toOwnedSlice(alloc);
     }
 
-    // ── Sub-workflow Helper ──────────────────────────────────────────
+    pub fn getLatestCheckpoint(self: *Self, alloc: std.mem.Allocator, run_id: []const u8) !?types.CheckpointRow {
+        const sql = "SELECT id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms FROM checkpoints WHERE run_id = ? ORDER BY version DESC LIMIT 1";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
 
-    pub fn updateStepInputJson(self: *Self, step_id: []const u8, input_json: []const u8) !void {
-        const sql = "UPDATE steps SET input_json = ? WHERE id = ?";
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null;
+
+        return try readCheckpointRow(alloc, stmt);
+    }
+
+    fn readCheckpointRow(alloc: std.mem.Allocator, stmt: ?*c.sqlite3_stmt) !types.CheckpointRow {
+        return .{
+            .id = try allocStr(alloc, stmt, 0),
+            .run_id = try allocStr(alloc, stmt, 1),
+            .step_id = try allocStr(alloc, stmt, 2),
+            .parent_id = try allocStrOpt(alloc, stmt, 3),
+            .state_json = try allocStr(alloc, stmt, 4),
+            .completed_nodes_json = try allocStr(alloc, stmt, 5),
+            .version = colInt(stmt, 6),
+            .metadata_json = try allocStrOpt(alloc, stmt, 7),
+            .created_at_ms = colInt(stmt, 8),
+        };
+    }
+
+    // ── Agent Event CRUD ──────────────────────────────────────────────
+
+    pub fn createAgentEvent(self: *Self, run_id: []const u8, step_id: []const u8, iteration: i64, tool: ?[]const u8, args_json: ?[]const u8, result_text: ?[]const u8, status: []const u8) !void {
+        const sql = "INSERT INTO agent_events (run_id, step_id, iteration, tool, args_json, result_text, status, created_at_ms) VALUES (?, ?, ?, ?, ?, ?, ?, ?)";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, input_json.ptr, @intCast(input_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
         _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 3, iteration);
+        bindTextOpt(stmt, 4, tool);
+        bindTextOpt(stmt, 5, args_json);
+        bindTextOpt(stmt, 6, result_text);
+        _ = c.sqlite3_bind_text(stmt, 7, status.ptr, @intCast(status.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 8, ids.nowMs());
 
         if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
             return error.SqliteStepFailed;
         }
     }
 
-    pub fn updateStepChildRunId(self: *Self, step_id: []const u8, child_run_id: []const u8) !void {
-        const sql = "UPDATE steps SET child_run_id = ? WHERE id = ?";
+    pub fn listAgentEvents(self: *Self, alloc: std.mem.Allocator, run_id: []const u8, step_id: []const u8) ![]types.AgentEventRow {
+        const sql = "SELECT id, run_id, step_id, iteration, tool, args_json, result_text, status, created_at_ms FROM agent_events WHERE run_id = ? AND step_id = ? ORDER BY id ASC";
         var stmt: ?*c.sqlite3_stmt = null;
         if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
             return error.SqlitePrepareFailed;
         }
         defer _ = c.sqlite3_finalize(stmt);
 
-        _ = c.sqlite3_bind_text(stmt, 1, child_run_id.ptr, @intCast(child_run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
         _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
 
+        var list: std.ArrayListUnmanaged(types.AgentEventRow) = .empty;
+        while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
+            try list.append(alloc, .{
+                .id = colInt(stmt, 0),
+                .run_id = try allocStr(alloc, stmt, 1),
+                .step_id = try allocStr(alloc, stmt, 2),
+                .iteration = colInt(stmt, 3),
+                .tool = try allocStrOpt(alloc, stmt, 4),
+                .args_json = try allocStrOpt(alloc, stmt, 5),
+                .result_text = try allocStrOpt(alloc, stmt, 6),
+                .status = try allocStr(alloc, stmt, 7),
+                .created_at_ms = colInt(stmt, 8),
+            });
+        }
+        return list.toOwnedSlice(alloc);
+    }
+
+    // ── Run State Management ──────────────────────────────────────────
+
+    pub fn updateRunState(self: *Self, run_id: []const u8, state_json: []const u8) !void {
+        const sql = "UPDATE runs SET state_json = ?, updated_at_ms = ? WHERE id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs());
+        _ = c.sqlite3_bind_text(stmt, 3, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
         if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
             return error.SqliteStepFailed;
         }
     }
-};
 
-// ── Tests ─────────────────────────────────────────────────────────────
+    pub fn incrementCheckpointCount(self: *Self, run_id: []const u8) !void {
+        const sql = "UPDATE runs SET checkpoint_count = COALESCE(checkpoint_count, 0) + 1, updated_at_ms = ? WHERE id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
 
-test "Store: init and deinit" {
-    const allocator = std.testing.allocator;
-    var s = try Store.init(allocator, ":memory:");
-    defer s.deinit();
-}
+        _ = c.sqlite3_bind_int64(stmt, 1, ids.nowMs());
+        _ = c.sqlite3_bind_text(stmt, 2, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
 
-test "Store: insert and get worker" {
-    const allocator = std.testing.allocator;
-    var s = try Store.init(allocator, ":memory:");
-    defer s.deinit();
-    try s.insertWorker("w1", "http://localhost:3001", "tok", "webhook", null, "[\"coder\"]", 3, "config");
-    const w = (try s.getWorker(allocator, "w1")).?;
-    defer allocator.free(w.id);
-    defer allocator.free(w.url);
-    defer allocator.free(w.token);
-    defer allocator.free(w.protocol);
-    if (w.model) |m| allocator.free(m);
-    defer allocator.free(w.tags_json);
-    defer allocator.free(w.source);
-    defer allocator.free(w.status);
-    try std.testing.expectEqualStrings("w1", w.id);
-    try std.testing.expectEqualStrings("http://localhost:3001", w.url);
-    try std.testing.expectEqualStrings("webhook", w.protocol);
-    try std.testing.expect(w.model == null);
-    try std.testing.expectEqual(@as(i64, 3), w.max_concurrent);
-}
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
 
-test "Store: insert and list workers" {
-    const allocator = std.testing.allocator;
-    var s = try Store.init(allocator, ":memory:");
+    pub fn createRunWithState(self: *Self, id: []const u8, workflow_id: ?[]const u8, workflow_json: []const u8, input_json: []const u8, state_json: []const u8) !void {
+        return self.createRunWithStateAndStatus(id, workflow_id, workflow_json, input_json, state_json, "pending");
+    }
+
+    /// Create a run with explicit initial status. Use "running" to avoid the
+    /// race window between creating with "pending" and updating to "running".
+    pub fn createRunWithStateAndStatus(self: *Self, id: []const u8, workflow_id: ?[]const u8, workflow_json: []const u8, input_json: []const u8, state_json: []const u8, status: []const u8) !void {
+        const sql = "INSERT INTO runs (id, status, workflow_id, workflow_json, input_json, callbacks_json, state_json, created_at_ms, updated_at_ms) VALUES (?, ?, ?, ?, ?, '[]', ?, ?, ?)";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        const now = ids.nowMs();
+        _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, status.ptr, @intCast(status.len), SQLITE_STATIC);
+        bindTextOpt(stmt, 3, workflow_id);
+        _ = c.sqlite3_bind_text(stmt, 4, workflow_json.ptr, @intCast(workflow_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 5, input_json.ptr, @intCast(input_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 6, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 7, now);
+        _ = c.sqlite3_bind_int64(stmt, 8, now);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    pub fn setParentRunId(self: *Self, run_id: []const u8, parent_run_id: []const u8) !void {
+        const sql = "UPDATE runs SET parent_run_id = ?, updated_at_ms = ? WHERE id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, parent_run_id.ptr, @intCast(parent_run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs());
+        _ = c.sqlite3_bind_text(stmt, 3, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    pub fn setConfigJson(self: *Self, run_id: []const u8, config_json: []const u8) !void {
+        const sql = "UPDATE runs SET config_json = ?, updated_at_ms = ? WHERE id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, config_json.ptr, @intCast(config_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs());
+        _ = c.sqlite3_bind_text(stmt, 3, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    pub fn createForkedRun(self: *Self, id: []const u8, workflow_json: []const u8, state_json: []const u8, forked_from_run_id: []const u8, forked_from_checkpoint_id: []const u8) !void {
+        const sql = "INSERT INTO runs (id, status, workflow_json, input_json, callbacks_json, state_json, forked_from_run_id, forked_from_checkpoint_id, created_at_ms, updated_at_ms) VALUES (?, 'pending', ?, '{}', '[]', ?, ?, ?, ?, ?)";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        const now = ids.nowMs();
+        _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, workflow_json.ptr, @intCast(workflow_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 3, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 4, forked_from_run_id.ptr, @intCast(forked_from_run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 5, forked_from_checkpoint_id.ptr, @intCast(forked_from_checkpoint_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 6, now);
+        _ = c.sqlite3_bind_int64(stmt, 7, now);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    // ── Pending State Injection CRUD ──────────────────────────────────
+
+    pub fn createPendingInjection(self: *Self, run_id: []const u8, updates_json: []const u8, apply_after_step: ?[]const u8) !void {
+        const sql = "INSERT INTO pending_state_injections (run_id, updates_json, apply_after_step, created_at_ms) VALUES (?, ?, ?, ?)";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, updates_json.ptr, @intCast(updates_json.len), SQLITE_STATIC);
+        bindTextOpt(stmt, 3, apply_after_step);
+        _ = c.sqlite3_bind_int64(stmt, 4, ids.nowMs());
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    pub fn consumePendingInjections(self: *Self, alloc: std.mem.Allocator, run_id: []const u8, completed_step: []const u8) ![]types.PendingInjectionRow {
+        // Select injections where apply_after_step matches the completed step or is NULL
+        const sql = "SELECT id, run_id, updates_json, apply_after_step, created_at_ms FROM pending_state_injections WHERE run_id = ? AND (apply_after_step IS NULL OR apply_after_step = ?) ORDER BY id ASC";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, completed_step.ptr, @intCast(completed_step.len), SQLITE_STATIC);
+
+        var list: std.ArrayListUnmanaged(types.PendingInjectionRow) = .empty;
+        while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
+            try list.append(alloc, .{
+                .id = colInt(stmt, 0),
+                .run_id = try allocStr(alloc, stmt, 1),
+                .updates_json = try allocStr(alloc, stmt, 2),
+                .apply_after_step = try allocStrOpt(alloc, stmt, 3),
+                .created_at_ms = colInt(stmt, 4),
+            });
+        }
+
+        const result = try list.toOwnedSlice(alloc);
+
+        // Delete consumed injections
+        if (result.len > 0) {
+            const del_sql = "DELETE FROM pending_state_injections WHERE run_id = ? AND (apply_after_step IS NULL OR apply_after_step = ?)";
+            var del_stmt: ?*c.sqlite3_stmt = null;
+            if (c.sqlite3_prepare_v2(self.db, del_sql, -1, &del_stmt, null) != c.SQLITE_OK) {
+                return error.SqlitePrepareFailed;
+            }
+            defer _ = c.sqlite3_finalize(del_stmt);
+
+            _ = c.sqlite3_bind_text(del_stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+            _ = c.sqlite3_bind_text(del_stmt, 2, completed_step.ptr, @intCast(completed_step.len), SQLITE_STATIC);
+
+            if (c.sqlite3_step(del_stmt) != c.SQLITE_DONE) {
+                return error.SqliteStepFailed;
+            }
+        }
+
+        return result;
+    }
+
+    pub fn discardPendingInjections(self: *Self, run_id: []const u8) !void {
+        const sql = "DELETE FROM pending_state_injections WHERE run_id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    // ── Node Cache (Gap 3) ───────────────────────────────────────────
+
+    pub fn getCachedResult(self: *Self, alloc: std.mem.Allocator, cache_key: []const u8) !?[]const u8 {
+        const sql = "SELECT result_json, created_at_ms, ttl_ms FROM node_cache WHERE cache_key = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, cache_key.ptr, @intCast(cache_key.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null;
+
+        const result_json = try allocStr(alloc, stmt, 0);
+        const created_at_ms = colInt(stmt, 1);
+        const ttl_ms = colIntOpt(stmt, 2);
+
+        // Check expiration
+        if (ttl_ms) |ttl| {
+            const now_ms = ids.nowMs();
+            if (now_ms - created_at_ms > ttl) {
+                // Expired — delete and return null
+                const del_sql = "DELETE FROM node_cache WHERE cache_key = ?";
+                var del_stmt: ?*c.sqlite3_stmt = null;
+                if (c.sqlite3_prepare_v2(self.db, del_sql, -1, &del_stmt, null) == c.SQLITE_OK) {
+                    _ = c.sqlite3_bind_text(del_stmt, 1, cache_key.ptr, @intCast(cache_key.len), SQLITE_STATIC);
+                    _ = c.sqlite3_step(del_stmt);
+                    _ = c.sqlite3_finalize(del_stmt);
+                }
+                alloc.free(result_json);
+                return null;
+            }
+        }
+
+        return result_json;
+    }
+
+    pub fn setCachedResult(self: *Self, cache_key: []const u8, node_name: []const u8, result_json: []const u8, ttl_ms: ?i64) !void {
+        const sql = "INSERT OR REPLACE INTO node_cache (cache_key, node_name, result_json, created_at_ms, ttl_ms) VALUES (?, ?, ?, ?, ?)";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, cache_key.ptr, @intCast(cache_key.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, node_name.ptr, @intCast(node_name.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 3, result_json.ptr, @intCast(result_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 4, ids.nowMs());
+        bindIntOpt(stmt, 5, ttl_ms);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    // ── Pending Writes (Gap 4) ───────────────────────────────────────
+
+    pub fn savePendingWrite(self: *Self, run_id: []const u8, step_id: []const u8, channel: []const u8, value_json: []const u8) !void {
+        const sql = "INSERT INTO pending_writes (run_id, step_id, channel, value_json, created_at_ms) VALUES (?, ?, ?, ?, ?)";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 3, channel.ptr, @intCast(channel.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_text(stmt, 4, value_json.ptr, @intCast(value_json.len), SQLITE_STATIC);
+        _ = c.sqlite3_bind_int64(stmt, 5, ids.nowMs());
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+
+    pub fn getPendingWrites(self: *Self, alloc: std.mem.Allocator, run_id: []const u8) ![]types.PendingWriteRow {
+        const sql = "SELECT id, run_id, step_id, channel, value_json, created_at_ms FROM pending_writes WHERE run_id = ? ORDER BY id ASC";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        var list: std.ArrayListUnmanaged(types.PendingWriteRow) = .empty;
+        while (c.sqlite3_step(stmt) == c.SQLITE_ROW) {
+            try list.append(alloc, .{
+                .id = colInt(stmt, 0),
+                .run_id = try allocStr(alloc, stmt, 1),
+                .step_id = try allocStr(alloc, stmt, 2),
+                .channel = try allocStr(alloc, stmt, 3),
+                .value_json = try allocStr(alloc, stmt, 4),
+                .created_at_ms = colInt(stmt, 5),
+            });
+        }
+        return list.toOwnedSlice(alloc);
+    }
+
+    pub fn clearPendingWrites(self: *Self, run_id: []const u8) !void {
+        const sql = "DELETE FROM pending_writes WHERE run_id = ?";
+        var stmt: ?*c.sqlite3_stmt = null;
+        if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) {
+            return error.SqlitePrepareFailed;
+        }
+        defer _ = c.sqlite3_finalize(stmt);
+
+        _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC);
+
+        if (c.sqlite3_step(stmt) != c.SQLITE_DONE) {
+            return error.SqliteStepFailed;
+        }
+    }
+};
+
+// ── Tests ─────────────────────────────────────────────────────────────
+
+test "Store: init and deinit" {
+    const allocator = std.testing.allocator;
+    var s = try Store.init(allocator, ":memory:");
+    defer s.deinit();
+}
+
+test "Store: insert and get worker" {
+    const allocator = std.testing.allocator;
+    var s = try Store.init(allocator, ":memory:");
+    defer s.deinit();
+    try s.insertWorker("w1", "http://localhost:3001", "tok", "webhook", null, "[\"coder\"]", 3, "config");
+    const w = (try s.getWorker(allocator, "w1")).?;
+    defer allocator.free(w.id);
+    defer allocator.free(w.url);
+    defer allocator.free(w.token);
+    defer allocator.free(w.protocol);
+    if (w.model) |m| allocator.free(m);
+    defer allocator.free(w.tags_json);
+    defer allocator.free(w.source);
+    defer allocator.free(w.status);
+    try std.testing.expectEqualStrings("w1", w.id);
+    try std.testing.expectEqualStrings("http://localhost:3001", w.url);
+    try std.testing.expectEqualStrings("webhook", w.protocol);
+    try std.testing.expect(w.model == null);
+    try std.testing.expectEqual(@as(i64, 3), w.max_concurrent);
+}
+
+test "Store: insert and list workers" {
+    const allocator = std.testing.allocator;
+    var s = try Store.init(allocator, ":memory:");
     defer s.deinit();
     try s.insertWorker("w1", "http://localhost:3001", "tok", "webhook", null, "[]", 1, "config");
     try s.insertWorker("w2", "http://localhost:3002", "tok", "webhook", null, "[]", 2, "registered");
@@ -1386,6 +1906,7 @@ test "Store: insert and get run" {
         allocator.free(run.id);
         if (run.idempotency_key) |ik| allocator.free(ik);
         allocator.free(run.status);
+        if (run.workflow_id) |wid| allocator.free(wid);
         allocator.free(run.workflow_json);
         allocator.free(run.input_json);
         allocator.free(run.callbacks_json);
@@ -1421,6 +1942,7 @@ test "Store: transaction commit persists inserted run" {
         allocator.free(run.id);
         if (run.idempotency_key) |ik| allocator.free(ik);
         allocator.free(run.status);
+        if (run.workflow_id) |wid| allocator.free(wid);
         allocator.free(run.workflow_json);
         allocator.free(run.input_json);
         allocator.free(run.callbacks_json);
@@ -1435,34 +1957,67 @@ test "Store: list runs with filter" {
     try s.insertRun("r1", null, "running", "{}", "{}", "[]");
     try s.insertRun("r2", null, "pending", "{}", "{}", "[]");
     try s.insertRun("r3", null, "running", "{}", "{}", "[]");
+    try s.createWorkflow("wf_filter", "Filter WF", "{\"nodes\":{}}");
+    try s.createRunWithState("r4", "wf_filter", "{\"nodes\":{}}", "{}", "{}");
 
-    const running = try s.listRuns(allocator, "running", 100, 0);
+    const running = try s.listRuns(allocator, "running", null, 100, 0);
     defer {
         for (running) |r| {
             allocator.free(r.id);
             if (r.idempotency_key) |ik| allocator.free(ik);
             allocator.free(r.status);
+            if (r.workflow_id) |wid| allocator.free(wid);
             allocator.free(r.workflow_json);
             allocator.free(r.input_json);
             allocator.free(r.callbacks_json);
+            if (r.error_text) |et| allocator.free(et);
+            if (r.state_json) |sj| allocator.free(sj);
+            if (r.config_json) |cj| allocator.free(cj);
+            if (r.parent_run_id) |pid| allocator.free(pid);
         }
         allocator.free(running);
     }
     try std.testing.expectEqual(@as(usize, 2), running.len);
 
-    const all = try s.listRuns(allocator, null, 100, 0);
+    const all = try s.listRuns(allocator, null, null, 100, 0);
     defer {
         for (all) |r| {
             allocator.free(r.id);
             if (r.idempotency_key) |ik| allocator.free(ik);
             allocator.free(r.status);
+            if (r.workflow_id) |wid| allocator.free(wid);
             allocator.free(r.workflow_json);
             allocator.free(r.input_json);
             allocator.free(r.callbacks_json);
+            if (r.error_text) |et| allocator.free(et);
+            if (r.state_json) |sj| allocator.free(sj);
+            if (r.config_json) |cj| allocator.free(cj);
+            if (r.parent_run_id) |pid| allocator.free(pid);
         }
         allocator.free(all);
     }
-    try std.testing.expectEqual(@as(usize, 3), all.len);
+    try std.testing.expectEqual(@as(usize, 4), all.len);
+
+    const filtered = try s.listRuns(allocator, null, "wf_filter", 100, 0);
+    defer {
+        for (filtered) |r| {
+            allocator.free(r.id);
+            if (r.idempotency_key) |ik| allocator.free(ik);
+            allocator.free(r.status);
+            if (r.workflow_id) |wid| allocator.free(wid);
+            allocator.free(r.workflow_json);
+            allocator.free(r.input_json);
+            allocator.free(r.callbacks_json);
+            if (r.error_text) |et| allocator.free(et);
+            if (r.state_json) |sj| allocator.free(sj);
+            if (r.config_json) |cj| allocator.free(cj);
+            if (r.parent_run_id) |pid| allocator.free(pid);
+        }
+        allocator.free(filtered);
+    }
+    try std.testing.expectEqual(@as(usize, 1), filtered.len);
+    try std.testing.expectEqualStrings("r4", filtered[0].id);
+    try std.testing.expectEqualStrings("wf_filter", filtered[0].workflow_id.?);
 }
 
 test "Store: update run status" {
@@ -1476,6 +2031,7 @@ test "Store: update run status" {
         allocator.free(run.id);
         if (run.idempotency_key) |ik| allocator.free(ik);
         allocator.free(run.status);
+        if (run.workflow_id) |wid| allocator.free(wid);
         allocator.free(run.workflow_json);
         allocator.free(run.input_json);
         allocator.free(run.callbacks_json);
@@ -1491,8 +2047,7 @@ test "Store: get active runs" {
     defer s.deinit();
     try s.insertRun("r1", null, "running", "{}", "{}", "[]");
     try s.insertRun("r2", null, "pending", "{}", "{}", "[]");
-    try s.insertRun("r3", null, "paused", "{}", "{}", "[]");
-    try s.insertRun("r4", null, "completed", "{}", "{}", "[]");
+    try s.insertRun("r3", null, "completed", "{}", "{}", "[]");
 
     const active = try s.getActiveRuns(allocator);
     defer {
@@ -1500,40 +2055,18 @@ test "Store: get active runs" {
             allocator.free(r.id);
             if (r.idempotency_key) |ik| allocator.free(ik);
             allocator.free(r.status);
+            if (r.workflow_id) |wid| allocator.free(wid);
             allocator.free(r.workflow_json);
             allocator.free(r.input_json);
             allocator.free(r.callbacks_json);
+            if (r.error_text) |et| allocator.free(et);
+            if (r.state_json) |sj| allocator.free(sj);
+            if (r.config_json) |cj| allocator.free(cj);
+            if (r.parent_run_id) |pid| allocator.free(pid);
         }
         allocator.free(active);
     }
-    try std.testing.expectEqual(@as(usize, 2), active.len);
-}
-
-test "Store: step deps and ready steps" {
-    const allocator = std.testing.allocator;
-    var s = try Store.init(allocator, ":memory:");
-    defer s.deinit();
-
-    try s.insertRun("r1", null, "running", "{}", "{}", "[]");
-    try s.insertStep("s1", "r1", "step1", "task", "ready", "{}", 1, null, null, null);
-    try s.insertStep("s2", "r1", "step2", "task", "ready", "{}", 1, null, null, null);
-    try s.insertStepDep("s2", "s1");
-
-    // s1 should be ready (no unsatisfied deps), s2 should NOT (depends on s1 which is 'ready' not 'completed')
-    const ready = try s.getReadySteps(allocator, "r1");
-    defer {
-        for (ready) |step| {
-            allocator.free(step.id);
-            allocator.free(step.run_id);
-            allocator.free(step.def_step_id);
-            allocator.free(step.type);
-            allocator.free(step.status);
-            allocator.free(step.input_json);
-        }
-        allocator.free(ready);
-    }
-    try std.testing.expectEqual(@as(usize, 1), ready.len);
-    try std.testing.expectEqualStrings("s1", ready[0].id);
+    try std.testing.expectEqual(@as(usize, 1), active.len);
 }
 
 test "Store: count steps by status" {
@@ -1679,137 +2212,444 @@ test "Store: get nonexistent step returns null" {
     try std.testing.expect(step == null);
 }
 
-test "cycle state: upsert and get" {
+test "updateStepChildRunId: sets child_run_id on step" {
     const allocator = std.testing.allocator;
     var s = try Store.init(allocator, ":memory:");
     defer s.deinit();
 
-    // Insert a run first (cycle_state references runs(id))
+    // Create a run and step
     try s.insertRun("r1", null, "running", "{}", "{}", "[]");
+    try s.insertRun("child_r1", null, "running", "{}", "{}", "[]");
+    try s.insertStep("s1", "r1", "sub_wf", "sub_workflow", "running", "{}", 1, null, null, null);
+
+    // Update child_run_id
+    try s.updateStepChildRunId("s1", "child_r1");
+
+    // Get step and verify child_run_id is set
+    const step = (try s.getStep(allocator, "s1")).?;
+    defer {
+        allocator.free(step.id);
+        allocator.free(step.run_id);
+        allocator.free(step.def_step_id);
+        allocator.free(step.type);
+        allocator.free(step.status);
+        allocator.free(step.input_json);
+        if (step.child_run_id) |crid| allocator.free(crid);
+    }
+    try std.testing.expectEqualStrings("child_r1", step.child_run_id.?);
+}
+
+test "workflow CRUD" {
+    const allocator = std.testing.allocator;
+    var s = try Store.init(allocator, ":memory:");
+    defer s.deinit();
+
+    // Create
+    try s.createWorkflow("wf1", "My Workflow", "{\"steps\":[]}");
 
-    // Upsert cycle state
-    try s.upsertCycleState("r1", "loop_A", 1, 10);
+    // Get
+    const wf = (try s.getWorkflow(allocator, "wf1")).?;
+    defer {
+        allocator.free(wf.id);
+        allocator.free(wf.name);
+        allocator.free(wf.definition_json);
+    }
+    try std.testing.expectEqualStrings("wf1", wf.id);
+    try std.testing.expectEqualStrings("My Workflow", wf.name);
+    try std.testing.expectEqualStrings("{\"steps\":[]}", wf.definition_json);
+    try std.testing.expect(wf.created_at_ms > 0);
+    try std.testing.expect(wf.updated_at_ms > 0);
+
+    // Update
+    try s.updateWorkflow("wf1", "Updated Workflow", "{\"steps\":[{\"id\":\"s1\"}]}");
+    const wf2 = (try s.getWorkflow(allocator, "wf1")).?;
+    defer {
+        allocator.free(wf2.id);
+        allocator.free(wf2.name);
+        allocator.free(wf2.definition_json);
+    }
+    try std.testing.expectEqualStrings("Updated Workflow", wf2.name);
+    try std.testing.expectEqualStrings("{\"steps\":[{\"id\":\"s1\"}]}", wf2.definition_json);
 
-    // Get and verify values
-    const cs = (try s.getCycleState("r1", "loop_A")).?;
-    try std.testing.expectEqual(@as(i64, 1), cs.iteration_count);
-    try std.testing.expectEqual(@as(i64, 10), cs.max_iterations);
+    // List
+    try s.createWorkflow("wf2", "Second Workflow", "{}");
+    const workflows = try s.listWorkflows(allocator);
+    defer {
+        for (workflows) |w| {
+            allocator.free(w.id);
+            allocator.free(w.name);
+            allocator.free(w.definition_json);
+        }
+        allocator.free(workflows);
+    }
+    try std.testing.expectEqual(@as(usize, 2), workflows.len);
 
-    // Upsert again with new iteration_count
-    try s.upsertCycleState("r1", "loop_A", 5, 10);
+    // Delete
+    try s.deleteWorkflow("wf1");
+    const deleted = try s.getWorkflow(allocator, "wf1");
+    try std.testing.expect(deleted == null);
 
-    // Verify updated value
-    const cs2 = (try s.getCycleState("r1", "loop_A")).?;
-    try std.testing.expectEqual(@as(i64, 5), cs2.iteration_count);
-    try std.testing.expectEqual(@as(i64, 10), cs2.max_iterations);
+    // Remaining list
+    const remaining = try s.listWorkflows(allocator);
+    defer {
+        for (remaining) |w| {
+            allocator.free(w.id);
+            allocator.free(w.name);
+            allocator.free(w.definition_json);
+        }
+        allocator.free(remaining);
+    }
+    try std.testing.expectEqual(@as(usize, 1), remaining.len);
+    try std.testing.expectEqualStrings("wf2", remaining[0].id);
 }
 
-test "cycle state: get returns null for nonexistent" {
+test "checkpoint lifecycle" {
     const allocator = std.testing.allocator;
     var s = try Store.init(allocator, ":memory:");
     defer s.deinit();
 
-    const cs = try s.getCycleState("no_run", "no_key");
-    try std.testing.expect(cs == null);
+    // Create a run
+    try s.insertRun("r1", null, "running", "{}", "{}", "[]");
+
+    // Create checkpoints with parent chain
+    try s.createCheckpoint("cp1", "r1", "step_a", null, "{\"x\":1}", "[\"step_a\"]", 1, null);
+    try s.createCheckpoint("cp2", "r1", "step_b", "cp1", "{\"x\":2}", "[\"step_a\",\"step_b\"]", 2, "{\"note\":\"test\"}");
+    try s.createCheckpoint("cp3", "r1", "step_c", "cp2", "{\"x\":3}", "[\"step_a\",\"step_b\",\"step_c\"]", 3, null);
+
+    // Get single checkpoint
+    const cp1 = (try s.getCheckpoint(allocator, "cp1")).?;
+    defer {
+        allocator.free(cp1.id);
+        allocator.free(cp1.run_id);
+        allocator.free(cp1.step_id);
+        if (cp1.parent_id) |pid| allocator.free(pid);
+        allocator.free(cp1.state_json);
+        allocator.free(cp1.completed_nodes_json);
+        if (cp1.metadata_json) |mj| allocator.free(mj);
+    }
+    try std.testing.expectEqualStrings("cp1", cp1.id);
+    try std.testing.expectEqualStrings("r1", cp1.run_id);
+    try std.testing.expectEqualStrings("step_a", cp1.step_id);
+    try std.testing.expect(cp1.parent_id == null);
+    try std.testing.expectEqualStrings("{\"x\":1}", cp1.state_json);
+    try std.testing.expectEqual(@as(i64, 1), cp1.version);
+    try std.testing.expect(cp1.metadata_json == null);
+
+    // Get checkpoint with parent and metadata
+    const cp2 = (try s.getCheckpoint(allocator, "cp2")).?;
+    defer {
+        allocator.free(cp2.id);
+        allocator.free(cp2.run_id);
+        allocator.free(cp2.step_id);
+        if (cp2.parent_id) |pid| allocator.free(pid);
+        allocator.free(cp2.state_json);
+        allocator.free(cp2.completed_nodes_json);
+        if (cp2.metadata_json) |mj| allocator.free(mj);
+    }
+    try std.testing.expectEqualStrings("cp1", cp2.parent_id.?);
+    try std.testing.expectEqualStrings("{\"note\":\"test\"}", cp2.metadata_json.?);
+
+    // List checkpoints (ordered by version ASC)
+    const cps = try s.listCheckpoints(allocator, "r1");
+    defer {
+        for (cps) |cp| {
+            allocator.free(cp.id);
+            allocator.free(cp.run_id);
+            allocator.free(cp.step_id);
+            if (cp.parent_id) |pid| allocator.free(pid);
+            allocator.free(cp.state_json);
+            allocator.free(cp.completed_nodes_json);
+            if (cp.metadata_json) |mj| allocator.free(mj);
+        }
+        allocator.free(cps);
+    }
+    try std.testing.expectEqual(@as(usize, 3), cps.len);
+    try std.testing.expectEqualStrings("cp1", cps[0].id);
+    try std.testing.expectEqualStrings("cp3", cps[2].id);
+
+    // Get latest checkpoint
+    const latest = (try s.getLatestCheckpoint(allocator, "r1")).?;
+    defer {
+        allocator.free(latest.id);
+        allocator.free(latest.run_id);
+        allocator.free(latest.step_id);
+        if (latest.parent_id) |pid| allocator.free(pid);
+        allocator.free(latest.state_json);
+        allocator.free(latest.completed_nodes_json);
+        if (latest.metadata_json) |mj| allocator.free(mj);
+    }
+    try std.testing.expectEqualStrings("cp3", latest.id);
+    try std.testing.expectEqual(@as(i64, 3), latest.version);
+
+    // Get nonexistent checkpoint
+    const none = try s.getCheckpoint(allocator, "nonexistent");
+    try std.testing.expect(none == null);
+
+    // Get latest for run with no checkpoints
+    const no_latest = try s.getLatestCheckpoint(allocator, "no_run");
+    try std.testing.expect(no_latest == null);
 }
 
-test "chat messages: insert and get ordered by round" {
+test "agent events" {
     const allocator = std.testing.allocator;
     var s = try Store.init(allocator, ":memory:");
     defer s.deinit();
 
+    // Create a run
     try s.insertRun("r1", null, "running", "{}", "{}", "[]");
-    try s.insertStep("s1", "r1", "chat_step", "group_chat", "running", "{}", 1, null, null, null);
 
-    // Insert messages with different rounds (out of order)
-    try s.insertChatMessage("r1", "s1", 2, "assistant", "w1", "round 2 message");
-    try s.insertChatMessage("r1", "s1", 1, "user", null, "round 1 message");
-    try s.insertChatMessage("r1", "s1", 1, "assistant", "w1", "round 1 reply");
+    // Create agent events
+    try s.createAgentEvent("r1", "step_a", 1, "read_file", "{\"path\":\"foo.txt\"}", "contents here", "completed");
+    try s.createAgentEvent("r1", "step_a", 2, "write_file", "{\"path\":\"bar.txt\"}", null, "completed");
+    try s.createAgentEvent("r1", "step_a", 3, null, null, null, "thinking");
+    try s.createAgentEvent("r1", "step_b", 1, "search", "{}", "results", "completed");
 
-    // Verify getChatMessages returns them ordered by round, id
-    const msgs = try s.getChatMessages(allocator, "s1");
+    // List by run+step
+    const events_a = try s.listAgentEvents(allocator, "r1", "step_a");
+    defer {
+        for (events_a) |ev| {
+            allocator.free(ev.run_id);
+            allocator.free(ev.step_id);
+            if (ev.tool) |t| allocator.free(t);
+            if (ev.args_json) |a| allocator.free(a);
+            if (ev.result_text) |r| allocator.free(r);
+            allocator.free(ev.status);
+        }
+        allocator.free(events_a);
+    }
+    try std.testing.expectEqual(@as(usize, 3), events_a.len);
+    try std.testing.expectEqualStrings("read_file", events_a[0].tool.?);
+    try std.testing.expectEqual(@as(i64, 1), events_a[0].iteration);
+    try std.testing.expectEqualStrings("contents here", events_a[0].result_text.?);
+    try std.testing.expect(events_a[2].tool == null);
+    try std.testing.expectEqualStrings("thinking", events_a[2].status);
+
+    // List different step
+    const events_b = try s.listAgentEvents(allocator, "r1", "step_b");
     defer {
-        for (msgs) |m| {
-            allocator.free(m.run_id);
-            allocator.free(m.step_id);
-            allocator.free(m.role);
-            if (m.worker_id) |wid| allocator.free(wid);
-            allocator.free(m.message);
-        }
-        allocator.free(msgs);
-    }
-    try std.testing.expectEqual(@as(usize, 3), msgs.len);
-    // First two should be round 1 (ordered by id within round)
-    try std.testing.expectEqual(@as(i64, 1), msgs[0].round);
-    try std.testing.expectEqual(@as(i64, 1), msgs[1].round);
-    try std.testing.expectEqual(@as(i64, 2), msgs[2].round);
-    try std.testing.expectEqualStrings("round 1 message", msgs[0].message);
-    try std.testing.expectEqualStrings("round 1 reply", msgs[1].message);
-    try std.testing.expectEqualStrings("round 2 message", msgs[2].message);
+        for (events_b) |ev| {
+            allocator.free(ev.run_id);
+            allocator.free(ev.step_id);
+            if (ev.tool) |t| allocator.free(t);
+            if (ev.args_json) |a| allocator.free(a);
+            if (ev.result_text) |r| allocator.free(r);
+            allocator.free(ev.status);
+        }
+        allocator.free(events_b);
+    }
+    try std.testing.expectEqual(@as(usize, 1), events_b.len);
+    try std.testing.expectEqualStrings("search", events_b[0].tool.?);
+
+    // Empty list for nonexistent
+    const empty = try s.listAgentEvents(allocator, "r1", "nonexistent");
+    defer allocator.free(empty);
+    try std.testing.expectEqual(@as(usize, 0), empty.len);
 }
 
-test "saga state: insert, update status, and get" {
+test "pending state injections" {
     const allocator = std.testing.allocator;
     var s = try Store.init(allocator, ":memory:");
     defer s.deinit();
 
+    // Create a run
     try s.insertRun("r1", null, "running", "{}", "{}", "[]");
-    try s.insertStep("saga1", "r1", "saga_def", "saga", "running", "{}", 1, null, null, null);
-    try s.insertStep("body1", "r1", "body_def1", "task", "pending", "{}", 1, null, "saga1", null);
-    try s.insertStep("body2", "r1", "body_def2", "task", "pending", "{}", 1, null, "saga1", null);
-    try s.insertStep("comp1", "r1", "comp_def1", "task", "pending", "{}", 1, null, "saga1", null);
 
-    // Insert saga states for body steps
-    try s.insertSagaState("r1", "saga1", "body1", "comp1");
-    try s.insertSagaState("r1", "saga1", "body2", null);
+    // Create pending injections
+    try s.createPendingInjection("r1", "{\"counter\":5}", "step_a");
+    try s.createPendingInjection("r1", "{\"flag\":true}", "step_b");
+    try s.createPendingInjection("r1", "{\"immediate\":1}", null); // apply immediately (NULL apply_after_step)
 
-    // Update one to 'completed'
-    try s.updateSagaState("r1", "saga1", "body1", "completed");
+    // Consume by step_a -- should get the step_a injection and the NULL one
+    const consumed_a = try s.consumePendingInjections(allocator, "r1", "step_a");
+    defer {
+        for (consumed_a) |inj| {
+            allocator.free(inj.run_id);
+            allocator.free(inj.updates_json);
+            if (inj.apply_after_step) |s_a| allocator.free(s_a);
+        }
+        allocator.free(consumed_a);
+    }
+    try std.testing.expectEqual(@as(usize, 2), consumed_a.len);
+    try std.testing.expectEqualStrings("{\"counter\":5}", consumed_a[0].updates_json);
+    try std.testing.expectEqualStrings("{\"immediate\":1}", consumed_a[1].updates_json);
+
+    // Consuming again for step_a should return empty (already consumed)
+    const consumed_again = try s.consumePendingInjections(allocator, "r1", "step_a");
+    defer allocator.free(consumed_again);
+    try std.testing.expectEqual(@as(usize, 0), consumed_again.len);
 
-    // Verify getSagaStates returns correct statuses
-    const states = try s.getSagaStates(allocator, "r1", "saga1");
+    // step_b injection should still be pending
+    const consumed_b = try s.consumePendingInjections(allocator, "r1", "step_b");
     defer {
-        for (states) |st| {
-            allocator.free(st.run_id);
-            allocator.free(st.saga_step_id);
-            allocator.free(st.body_step_id);
-            if (st.compensation_step_id) |cid| allocator.free(cid);
-            allocator.free(st.status);
-        }
-        allocator.free(states);
-    }
-    try std.testing.expectEqual(@as(usize, 2), states.len);
-    try std.testing.expectEqualStrings("body1", states[0].body_step_id);
-    try std.testing.expectEqualStrings("completed", states[0].status);
-    try std.testing.expectEqualStrings("comp1", states[0].compensation_step_id.?);
-    try std.testing.expectEqualStrings("body2", states[1].body_step_id);
-    try std.testing.expectEqualStrings("pending", states[1].status);
-    try std.testing.expect(states[1].compensation_step_id == null);
+        for (consumed_b) |inj| {
+            allocator.free(inj.run_id);
+            allocator.free(inj.updates_json);
+            if (inj.apply_after_step) |s_a| allocator.free(s_a);
+        }
+        allocator.free(consumed_b);
+    }
+    try std.testing.expectEqual(@as(usize, 1), consumed_b.len);
+    try std.testing.expectEqualStrings("{\"flag\":true}", consumed_b[0].updates_json);
+
+    // Test discard
+    try s.createPendingInjection("r1", "{\"discard_me\":true}", "step_c");
+    try s.discardPendingInjections("r1");
+    const after_discard = try s.consumePendingInjections(allocator, "r1", "step_c");
+    defer allocator.free(after_discard);
+    try std.testing.expectEqual(@as(usize, 0), after_discard.len);
 }
 
-test "updateStepChildRunId: sets child_run_id on step" {
+test "run state management" {
     const allocator = std.testing.allocator;
     var s = try Store.init(allocator, ":memory:");
     defer s.deinit();
 
-    // Create a run and step
-    try s.insertRun("r1", null, "running", "{}", "{}", "[]");
-    try s.insertRun("child_r1", null, "running", "{}", "{}", "[]");
-    try s.insertStep("s1", "r1", "sub_wf", "sub_workflow", "running", "{}", 1, null, null, null);
+    // Create run with state
+    try s.createRunWithState("r1", null, "{\"steps\":[]}", "{\"input\":1}", "{\"counter\":0}");
+    const run = (try s.getRun(allocator, "r1")).?;
+    defer {
+        allocator.free(run.id);
+        if (run.idempotency_key) |ik| allocator.free(ik);
+        allocator.free(run.status);
+        if (run.workflow_id) |wid| allocator.free(wid);
+        allocator.free(run.workflow_json);
+        allocator.free(run.input_json);
+        allocator.free(run.callbacks_json);
+        if (run.error_text) |et| allocator.free(et);
+        if (run.state_json) |sj| allocator.free(sj);
+    }
+    try std.testing.expectEqualStrings("r1", run.id);
+    try std.testing.expectEqualStrings("pending", run.status);
+    try std.testing.expectEqualStrings("{\"steps\":[]}", run.workflow_json);
 
-    // Update child_run_id
-    try s.updateStepChildRunId("s1", "child_r1");
+    // Create run with workflow_id
+    try s.createWorkflow("wf1", "Test WF", "{\"steps\":[]}");
+    try s.createRunWithState("r2", "wf1", "{\"steps\":[]}", "{}", "{}");
+    const run2 = (try s.getRun(allocator, "r2")).?;
+    defer {
+        allocator.free(run2.id);
+        if (run2.idempotency_key) |ik| allocator.free(ik);
+        allocator.free(run2.status);
+        if (run2.workflow_id) |wid| allocator.free(wid);
+        allocator.free(run2.workflow_json);
+        allocator.free(run2.input_json);
+        allocator.free(run2.callbacks_json);
+        if (run2.error_text) |et| allocator.free(et);
+        if (run2.state_json) |sj| allocator.free(sj);
+    }
+    try std.testing.expectEqualStrings("r2", run2.id);
+    try std.testing.expectEqualStrings("wf1", run2.workflow_id.?);
+
+    // Update run state
+    try s.updateRunState("r1", "{\"counter\":42}");
+
+    // Increment checkpoint count
+    try s.incrementCheckpointCount("r1");
+    try s.incrementCheckpointCount("r1");
+
+    // Create forked run
+    try s.createCheckpoint("cp1", "r1", "step_a", null, "{}", "[]", 1, null);
+    try s.createForkedRun("r3", "{\"steps\":[]}", "{\"counter\":42}", "r1", "cp1");
+    const forked = (try s.getRun(allocator, "r3")).?;
+    defer {
+        allocator.free(forked.id);
+        if (forked.idempotency_key) |ik| allocator.free(ik);
+        allocator.free(forked.status);
+        if (forked.workflow_id) |wid| allocator.free(wid);
+        allocator.free(forked.workflow_json);
+        allocator.free(forked.input_json);
+        allocator.free(forked.callbacks_json);
+        if (forked.error_text) |et| allocator.free(et);
+        if (forked.state_json) |sj| allocator.free(sj);
+    }
+    try std.testing.expectEqualStrings("r3", forked.id);
+    try std.testing.expectEqualStrings("pending", forked.status);
+}
 
-    // Get step and verify child_run_id is set
-    const step = (try s.getStep(allocator, "s1")).?;
+test "token accounting: update step and run tokens" {
+    const allocator = std.testing.allocator;
+    var s = try Store.init(allocator, ":memory:");
+    defer s.deinit();
+
+    try s.createRunWithState("r-tok", null, "{}", "{}", "{}");
+    try s.updateRunStatus("r-tok", "running", null);
+    try s.insertStep("s-tok", "r-tok", "task1", "task", "completed", "{}", 1, null, null, null);
+
+    // Update step tokens
+    try s.updateStepTokens("s-tok", 100, 200);
+
+    // Update run tokens
+    try s.updateRunTokens("r-tok", 100, 200);
+
+    // Verify run tokens
+    const tokens = try s.getRunTokens("r-tok");
+    try std.testing.expectEqual(@as(i64, 100), tokens.input);
+    try std.testing.expectEqual(@as(i64, 200), tokens.output);
+    try std.testing.expectEqual(@as(i64, 300), tokens.total);
+
+    // Accumulate more tokens
+    try s.updateRunTokens("r-tok", 50, 75);
+    const tokens2 = try s.getRunTokens("r-tok");
+    try std.testing.expectEqual(@as(i64, 150), tokens2.input);
+    try std.testing.expectEqual(@as(i64, 275), tokens2.output);
+    try std.testing.expectEqual(@as(i64, 425), tokens2.total);
+}
+
+test "workflow version CRUD" {
+    const allocator = std.testing.allocator;
+    var s = try Store.init(allocator, ":memory:");
+    defer s.deinit();
+
+    // Create workflow with default version (1)
+    try s.createWorkflow("wf1", "Test Workflow", "{\"nodes\":{}}");
+    const wf1 = (try s.getWorkflow(allocator, "wf1")).?;
     defer {
-        allocator.free(step.id);
-        allocator.free(step.run_id);
-        allocator.free(step.def_step_id);
-        allocator.free(step.type);
-        allocator.free(step.status);
-        allocator.free(step.input_json);
-        if (step.child_run_id) |crid| allocator.free(crid);
+        allocator.free(wf1.id);
+        allocator.free(wf1.name);
+        allocator.free(wf1.definition_json);
     }
-    try std.testing.expectEqualStrings("child_r1", step.child_run_id.?);
+    try std.testing.expectEqual(@as(i64, 1), wf1.version);
+
+    // Create workflow with explicit version
+    try s.createWorkflowWithVersion("wf2", "Versioned Workflow", "{\"nodes\":{}}", 5);
+    const wf2 = (try s.getWorkflow(allocator, "wf2")).?;
+    defer {
+        allocator.free(wf2.id);
+        allocator.free(wf2.name);
+        allocator.free(wf2.definition_json);
+    }
+    try std.testing.expectEqual(@as(i64, 5), wf2.version);
+
+    // Update workflow with new version
+    try s.updateWorkflowWithVersion("wf2", "Updated", "{\"nodes\":{\"a\":{}}}", 6);
+    const wf3 = (try s.getWorkflow(allocator, "wf2")).?;
+    defer {
+        allocator.free(wf3.id);
+        allocator.free(wf3.name);
+        allocator.free(wf3.definition_json);
+    }
+    try std.testing.expectEqual(@as(i64, 6), wf3.version);
+    try std.testing.expectEqualStrings("Updated", wf3.name);
+
+    // Update without changing version
+    try s.updateWorkflow("wf1", "Still v1", "{\"nodes\":{\"b\":{}}}");
+    const wf4 = (try s.getWorkflow(allocator, "wf1")).?;
+    defer {
+        allocator.free(wf4.id);
+        allocator.free(wf4.name);
+        allocator.free(wf4.definition_json);
+    }
+    try std.testing.expectEqual(@as(i64, 1), wf4.version);
+
+    // List workflows should include version
+    const workflows = try s.listWorkflows(allocator);
+    defer {
+        for (workflows) |w| {
+            allocator.free(w.id);
+            allocator.free(w.name);
+            allocator.free(w.definition_json);
+        }
+        allocator.free(workflows);
+    }
+    try std.testing.expectEqual(@as(usize, 2), workflows.len);
 }
diff --git a/src/templates.zig b/src/templates.zig
index d928522..0332f02 100644
--- a/src/templates.zig
+++ b/src/templates.zig
@@ -1,38 +1,39 @@
 /// Template engine for prompt rendering.
 /// Resolves `{{...}}` expressions against workflow context.
 ///
-/// Supported expressions:
+/// Legacy Context + render():
 ///   - `{{input.X}}`          -- look up key X in the workflow input JSON
 ///   - `{{input.X.Y}}`        -- nested object lookups inside workflow input JSON
-///   - `{{steps.ID.output}}`  -- output of a single completed step
-///   - `{{steps.ID.outputs}}` -- JSON array of outputs from map/fan_out child steps
 ///   - `{{item}}`             -- current item string for map iterations
-///   - `{{task.X}}`           -- look up field X in the NullTickets task JSON (supports nested paths like `task.metadata.repo_url`)
+///   - `{{task.X}}`           -- look up field X in the NullTickets task JSON
+///   - `{{attempt}}`          -- current retry attempt number
+///
+/// State-based renderTemplate():
+///   - `{{state.X}}`          -- look up key X in the unified state JSON
+///   - `{{state.X.Y}}`        -- nested paths with optional [-1] array indexing
+///   - `{{input.X}}`          -- look up key X in the workflow input JSON
+///   - `{{item}}`             -- current item string for send iterations
+///   - `{{store.ns.key}}`     -- fetch NullTickets store entry value
 ///
 /// Conditional blocks:
 ///   - `{% if <expr> %}...{% endif %}`
 ///   - `{% if <expr> %}...{% else %}...{% endif %}`
 ///   Conditionals are processed before expression substitution.
 ///   Truthiness: non-null, non-empty, not "false", not "null" string values are truthy.
-
 const std = @import("std");
 
 // ── Context ───────────────────────────────────────────────────────────
 
 pub const Context = struct {
     input_json: []const u8, // raw JSON string of workflow input
-    step_outputs: []const StepOutput, // completed step outputs
-    item: ?[]const u8, // current map item (null if not in map)
-    debate_responses: ?[]const u8 = null, // JSON array string for debate judge template
-    chat_history: ?[]const u8 = null, // formatted chat transcript for group_chat round_template
-    role: ?[]const u8 = null, // participant role for group_chat round_template
+    step_outputs: []const StepOutput, // completed step outputs (legacy, for tracker.zig)
+    item: ?[]const u8, // current item string (null if not in map/send)
     task_json: ?[]const u8 = null, // raw JSON string of NullTickets task data
     attempt: ?u32 = null, // current retry attempt number
 
     pub const StepOutput = struct {
         step_id: []const u8,
         output: ?[]const u8, // single output (for task steps)
-        outputs: ?[]const []const u8, // array of outputs (for fan_out/map parent)
     };
 };
 
@@ -222,27 +223,6 @@ fn resolveExpression(allocator: std.mem.Allocator, expr: []const u8, ctx: Contex
         return error.ItemNotAvailable;
     }
 
-    if (std.mem.eql(u8, expr, "debate_responses")) {
-        if (ctx.debate_responses) |dr| {
-            return allocator.dupe(u8, dr) catch return error.OutOfMemory;
-        }
-        return allocator.dupe(u8, "[]") catch return error.OutOfMemory;
-    }
-
-    if (std.mem.eql(u8, expr, "chat_history")) {
-        if (ctx.chat_history) |ch| {
-            return allocator.dupe(u8, ch) catch return error.OutOfMemory;
-        }
-        return allocator.dupe(u8, "") catch return error.OutOfMemory;
-    }
-
-    if (std.mem.eql(u8, expr, "role")) {
-        if (ctx.role) |r| {
-            return allocator.dupe(u8, r) catch return error.OutOfMemory;
-        }
-        return allocator.dupe(u8, "") catch return error.OutOfMemory;
-    }
-
     if (std.mem.eql(u8, expr, "attempt")) {
         if (ctx.attempt) |a| {
             return std.fmt.allocPrint(allocator, "{d}", .{a}) catch return error.OutOfMemory;
@@ -292,7 +272,7 @@ fn resolveInputField(allocator: std.mem.Allocator, input_json: []const u8, field
 }
 
 fn resolveStepRef(allocator: std.mem.Allocator, rest: []const u8, step_outputs: []const Context.StepOutput) RenderError![]const u8 {
-    // rest is "ID.output" or "ID.outputs"
+    // rest is "ID.output"
     const dot_pos = std.mem.lastIndexOfScalar(u8, rest, '.') orelse return error.UnknownExpression;
     const step_id = rest[0..dot_pos];
     const field = rest[dot_pos + 1 ..];
@@ -306,9 +286,6 @@ fn resolveStepRef(allocator: std.mem.Allocator, rest: []const u8, step_outputs:
                 }
                 return allocator.dupe(u8, "") catch return error.OutOfMemory;
             }
-            if (std.mem.eql(u8, field, "outputs")) {
-                return serializeOutputs(allocator, so.outputs);
-            }
             return error.UnknownExpression;
         }
     }
@@ -336,38 +313,6 @@ fn resolveTaskField(allocator: std.mem.Allocator, task_json: []const u8, field_p
     return jsonValueToString(allocator, current);
 }
 
-fn serializeOutputs(allocator: std.mem.Allocator, outputs: ?[]const []const u8) RenderError![]const u8 {
-    const items = outputs orelse {
-        return allocator.dupe(u8, "[]") catch return error.OutOfMemory;
-    };
-
-    var buf: std.ArrayListUnmanaged(u8) = .empty;
-    errdefer buf.deinit(allocator);
-
-    buf.append(allocator, '[') catch return error.OutOfMemory;
-    for (items, 0..) |item, i| {
-        if (i > 0) {
-            buf.append(allocator, ',') catch return error.OutOfMemory;
-        }
-        // Write JSON-escaped string
-        buf.append(allocator, '"') catch return error.OutOfMemory;
-        for (item) |c| {
-            switch (c) {
-                '"' => buf.appendSlice(allocator, "\\\"") catch return error.OutOfMemory,
-                '\\' => buf.appendSlice(allocator, "\\\\") catch return error.OutOfMemory,
-                '\n' => buf.appendSlice(allocator, "\\n") catch return error.OutOfMemory,
-                '\r' => buf.appendSlice(allocator, "\\r") catch return error.OutOfMemory,
-                '\t' => buf.appendSlice(allocator, "\\t") catch return error.OutOfMemory,
-                else => buf.append(allocator, c) catch return error.OutOfMemory,
-            }
-        }
-        buf.append(allocator, '"') catch return error.OutOfMemory;
-    }
-    buf.append(allocator, ']') catch return error.OutOfMemory;
-
-    return buf.toOwnedSlice(allocator) catch return error.OutOfMemory;
-}
-
 fn jsonValueToString(allocator: std.mem.Allocator, val: std.json.Value) RenderError![]const u8 {
     switch (val) {
         .string => |s| {
@@ -403,7 +348,446 @@ fn jsonValueToString(allocator: std.mem.Allocator, val: std.json.Value) RenderEr
     }
 }
 
-// ── Tests ─────────────────────────────────────────────────────────────
+// ── New state-based template engine ───────────────────────────────────
+
+const state_mod = @import("state.zig");
+const tracker_client = @import("tracker_client.zig");
+const Allocator = std.mem.Allocator;
+
+pub const StoreFetcher = *const fn (
+    alloc: Allocator,
+    base_url: []const u8,
+    api_token: ?[]const u8,
+    namespace: []const u8,
+    key: []const u8,
+) anyerror!?[]const u8;
+
+pub const StoreAccess = struct {
+    base_url: []const u8,
+    api_token: ?[]const u8 = null,
+    fetcher: StoreFetcher,
+};
+
+pub fn fetchStoreValueHttp(
+    alloc: Allocator,
+    base_url: []const u8,
+    api_token: ?[]const u8,
+    namespace: []const u8,
+    key: []const u8,
+) !?[]const u8 {
+    var client = tracker_client.TrackerClient.init(alloc, base_url, api_token);
+    return client.storeGetValue(namespace, key);
+}
+
+/// Strip surrounding double quotes from a JSON string value.
+/// `"hello"` -> `hello`, `42` -> `42`, `[1,2]` -> `[1,2]`
+fn stripJsonQuotes(s: []const u8) []const u8 {
+    if (s.len >= 2 and s[0] == '"' and s[s.len - 1] == '"') {
+        return s[1 .. s.len - 1];
+    }
+    return s;
+}
+
+/// Look up a value from a JSON blob by dotted path (no prefix stripping).
+/// E.g. lookupJsonPath(alloc, '{"topic":"AI"}', "topic") -> "AI"
+fn lookupJsonPath(alloc: Allocator, json_bytes: []const u8, path: []const u8) !?[]const u8 {
+    // Reuse state_mod.getStateValue but without "state." prefix.
+    // getStateValue strips "state." if present, otherwise uses path as-is.
+    return try state_mod.getStateValue(alloc, json_bytes, path);
+}
+
+/// Resolve a template expression (the text inside `{{ }}`) to a string value.
+/// Handles state.X, input.X, item, item.X expressions.
+fn resolveNewExpression(
+    alloc: Allocator,
+    expr: []const u8,
+    state_json: []const u8,
+    input_json: ?[]const u8,
+    item_json: ?[]const u8,
+    store_access: ?StoreAccess,
+) ![]const u8 {
+    if (std.mem.startsWith(u8, expr, "state.")) {
+        // Use getStateValue which handles "state." prefix, nested paths, [-1] indexing
+        const raw = try state_mod.getStateValue(alloc, state_json, expr);
+        if (raw) |r| {
+            // Strip quotes for strings; leave numbers/bools/arrays/objects as-is
+            const stripped = stripJsonQuotes(r);
+            if (stripped.ptr != r.ptr or stripped.len != r.len) {
+                // It was a quoted string — dupe the unquoted version and free the original
+                const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory;
+                alloc.free(r);
+                return result;
+            }
+            return r;
+        }
+        return alloc.dupe(u8, "") catch return error.OutOfMemory;
+    }
+
+    if (std.mem.startsWith(u8, expr, "input.")) {
+        const ij = input_json orelse {
+            return alloc.dupe(u8, "") catch return error.OutOfMemory;
+        };
+        const field = expr["input.".len..];
+        const raw = try lookupJsonPath(alloc, ij, field);
+        if (raw) |r| {
+            const stripped = stripJsonQuotes(r);
+            if (stripped.ptr != r.ptr or stripped.len != r.len) {
+                const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory;
+                alloc.free(r);
+                return result;
+            }
+            return r;
+        }
+        return alloc.dupe(u8, "") catch return error.OutOfMemory;
+    }
+
+    if (std.mem.eql(u8, expr, "item")) {
+        if (item_json) |ij| {
+            const stripped = stripJsonQuotes(ij);
+            return alloc.dupe(u8, stripped) catch return error.OutOfMemory;
+        }
+        return alloc.dupe(u8, "") catch return error.OutOfMemory;
+    }
+
+    if (std.mem.startsWith(u8, expr, "item.")) {
+        const ij = item_json orelse {
+            return alloc.dupe(u8, "") catch return error.OutOfMemory;
+        };
+        const field = expr["item.".len..];
+        const raw = try lookupJsonPath(alloc, ij, field);
+        if (raw) |r| {
+            const stripped = stripJsonQuotes(r);
+            if (stripped.ptr != r.ptr or stripped.len != r.len) {
+                const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory;
+                alloc.free(r);
+                return result;
+            }
+            return r;
+        }
+        return alloc.dupe(u8, "") catch return error.OutOfMemory;
+    }
+
+    // {{config.X}} — alias for {{state.__config.X}}
+    if (std.mem.startsWith(u8, expr, "config.")) {
+        const config_path = try std.fmt.allocPrint(alloc, "state.__config.{s}", .{expr["config.".len..]});
+        defer alloc.free(config_path);
+        const raw = try state_mod.getStateValue(alloc, state_json, config_path);
+        if (raw) |r| {
+            const stripped = stripJsonQuotes(r);
+            if (stripped.ptr != r.ptr or stripped.len != r.len) {
+                const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory;
+                alloc.free(r);
+                return result;
+            }
+            return r;
+        }
+        return alloc.dupe(u8, "") catch return error.OutOfMemory;
+    }
+
+    if (std.mem.startsWith(u8, expr, "store.")) {
+        const access = store_access orelse return error.StoreNotConfigured;
+        const store_expr = expr["store.".len..];
+        const dot = std.mem.indexOfScalar(u8, store_expr, '.') orelse return error.InvalidStoreExpression;
+        const namespace = store_expr[0..dot];
+        const key = store_expr[dot + 1 ..];
+        if (namespace.len == 0 or key.len == 0) return error.InvalidStoreExpression;
+
+        const raw = try access.fetcher(alloc, access.base_url, access.api_token, namespace, key);
+        if (raw) |r| {
+            const stripped = stripJsonQuotes(r);
+            if (stripped.ptr != r.ptr or stripped.len != r.len) {
+                const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory;
+                alloc.free(r);
+                return result;
+            }
+            return r;
+        }
+        return alloc.dupe(u8, "") catch return error.OutOfMemory;
+    }
+
+    // Unknown expression — return empty
+    return alloc.dupe(u8, "") catch return error.OutOfMemory;
+}
+
+/// Check if a condition expression is truthy for the new template engine.
+/// Truthy: non-null, non-empty, not "false", not "0", not "null", not empty array "[]"
+fn isNewTruthy(
+    alloc: Allocator,
+    expr: []const u8,
+    state_json: []const u8,
+    input_json: ?[]const u8,
+    item_json: ?[]const u8,
+    store_access: ?StoreAccess,
+) bool {
+    const value = resolveNewExpression(alloc, expr, state_json, input_json, item_json, store_access) catch return false;
+    defer alloc.free(value);
+
+    if (value.len == 0) return false;
+    if (std.mem.eql(u8, value, "false")) return false;
+    if (std.mem.eql(u8, value, "0")) return false;
+    if (std.mem.eql(u8, value, "null")) return false;
+    if (std.mem.eql(u8, value, "[]")) return false;
+    return true;
+}
+
+/// Process `{% if expr %}...{% endif %}` conditional blocks for the new engine.
+fn processNewConditionals(
+    alloc: Allocator,
+    template: []const u8,
+    state_json: []const u8,
+    input_json: ?[]const u8,
+    item_json: ?[]const u8,
+    store_access: ?StoreAccess,
+) ![]const u8 {
+    var result: std.ArrayListUnmanaged(u8) = .empty;
+    errdefer result.deinit(alloc);
+
+    var pos: usize = 0;
+
+    while (pos < template.len) {
+        if (std.mem.indexOfPos(u8, template, pos, "{%")) |open| {
+            result.appendSlice(alloc, template[pos..open]) catch return error.OutOfMemory;
+
+            const after_open = open + 2;
+            const close = std.mem.indexOfPos(u8, template, after_open, "%}") orelse
+                return error.OutOfMemory;
+            const tag_content = std.mem.trim(u8, template[after_open..close], " \t\n\r");
+            const after_tag = close + 2;
+
+            if (std.mem.startsWith(u8, tag_content, "if ")) {
+                const expr = std.mem.trim(u8, tag_content["if ".len..], " \t\n\r");
+
+                // Find matching {% endif %} at this nesting level
+                var depth: usize = 0;
+                var scan: usize = after_tag;
+                var else_start: ?usize = null;
+                var else_end: ?usize = null;
+                var endif_start: ?usize = null;
+                var endif_end: ?usize = null;
+
+                while (scan < template.len) {
+                    if (std.mem.indexOfPos(u8, template, scan, "{%")) |inner_open| {
+                        const inner_after = inner_open + 2;
+                        const inner_close = std.mem.indexOfPos(u8, template, inner_after, "%}") orelse
+                            return error.OutOfMemory;
+                        const inner_tag = std.mem.trim(u8, template[inner_after..inner_close], " \t\n\r");
+                        const inner_after_tag = inner_close + 2;
+
+                        if (std.mem.startsWith(u8, inner_tag, "if ")) {
+                            depth += 1;
+                            scan = inner_after_tag;
+                        } else if (std.mem.eql(u8, inner_tag, "else") and depth == 0) {
+                            else_start = inner_open;
+                            else_end = inner_after_tag;
+                            scan = inner_after_tag;
+                        } else if (std.mem.eql(u8, inner_tag, "endif")) {
+                            if (depth == 0) {
+                                endif_start = inner_open;
+                                endif_end = inner_after_tag;
+                                break;
+                            }
+                            depth -= 1;
+                            scan = inner_after_tag;
+                        } else {
+                            scan = inner_after_tag;
+                        }
+                    } else {
+                        break;
+                    }
+                }
+
+                if (endif_end == null) {
+                    return error.OutOfMemory;
+                }
+
+                const truthy = isNewTruthy(alloc, expr, state_json, input_json, item_json, store_access);
+
+                if (truthy) {
+                    const branch_end = else_start orelse endif_start.?;
+                    const branch = template[after_tag..branch_end];
+                    const processed = try processNewConditionals(alloc, branch, state_json, input_json, item_json, store_access);
+                    defer alloc.free(processed);
+                    result.appendSlice(alloc, processed) catch return error.OutOfMemory;
+                } else {
+                    if (else_end) |ee| {
+                        const branch = template[ee..endif_start.?];
+                        const processed = try processNewConditionals(alloc, branch, state_json, input_json, item_json, store_access);
+                        defer alloc.free(processed);
+                        result.appendSlice(alloc, processed) catch return error.OutOfMemory;
+                    }
+                }
+
+                pos = endif_end.?;
+            } else {
+                result.appendSlice(alloc, template[open..after_tag]) catch return error.OutOfMemory;
+                pos = after_tag;
+            }
+        } else {
+            result.appendSlice(alloc, template[pos..]) catch return error.OutOfMemory;
+            break;
+        }
+    }
+
+    return result.toOwnedSlice(alloc) catch return error.OutOfMemory;
+}
+
+/// Render a template using the new state-based interpolation syntax.
+///
+/// Supported expressions:
+///   - `{{state.X}}` — state key value
+///   - `{{state.X.Y}}` — nested state access
+///   - `{{state.X[-1]}}` — last array element from state
+///   - `{{input.X}}` — original input (read-only)
+///   - `{{item}}` — current item in send context
+///   - `{{item.X}}` — nested access on item
+///   - `{% if state.X %}...{% endif %}` — conditionals
+///
+/// Processing order:
+///   1. Process `{% if ... %}...{% endif %}` blocks
+///   2. Process `{{...}}` interpolations
+pub fn renderTemplate(
+    alloc: Allocator,
+    template: []const u8,
+    state_json: []const u8,
+    input_json: ?[]const u8,
+    item_json: ?[]const u8,
+) ![]const u8 {
+    return renderTemplateWithStore(alloc, template, state_json, input_json, item_json, null);
+}
+
+pub fn renderTemplateWithStore(
+    alloc: Allocator,
+    template: []const u8,
+    state_json: []const u8,
+    input_json: ?[]const u8,
+    item_json: ?[]const u8,
+    store_access: ?StoreAccess,
+) ![]const u8 {
+    // Phase 1: Process conditional blocks
+    const preprocessed = try processNewConditionals(alloc, template, state_json, input_json, item_json, store_access);
+    defer alloc.free(preprocessed);
+
+    // Phase 2: Resolve {{expression}} substitutions
+    var result: std.ArrayListUnmanaged(u8) = .empty;
+    errdefer result.deinit(alloc);
+
+    var pos: usize = 0;
+
+    while (pos < preprocessed.len) {
+        if (std.mem.indexOfPos(u8, preprocessed, pos, "{{")) |open| {
+            result.appendSlice(alloc, preprocessed[pos..open]) catch return error.OutOfMemory;
+
+            const after_open = open + 2;
+            if (std.mem.indexOfPos(u8, preprocessed, after_open, "}}")) |close| {
+                const raw_expr = preprocessed[after_open..close];
+                const expr = std.mem.trim(u8, raw_expr, " \t\n\r");
+
+                const value = try resolveNewExpression(alloc, expr, state_json, input_json, item_json, store_access);
+                defer alloc.free(value);
+
+                result.appendSlice(alloc, value) catch return error.OutOfMemory;
+                pos = close + 2;
+            } else {
+                // Unterminated — just append the rest as literal
+                result.appendSlice(alloc, preprocessed[pos..]) catch return error.OutOfMemory;
+                break;
+            }
+        } else {
+            result.appendSlice(alloc, preprocessed[pos..]) catch return error.OutOfMemory;
+            break;
+        }
+    }
+
+    return result.toOwnedSlice(alloc) catch return error.OutOfMemory;
+}
+
+// ── New template engine tests ─────────────────────────────────────────
+
+test "template state interpolation" {
+    const alloc = std.testing.allocator;
+    const s = "{\"name\":\"test\",\"count\":42}";
+    const result = try renderTemplate(alloc, "Hello {{state.name}}, count={{state.count}}", s, null, null);
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("Hello test, count=42", result);
+}
+
+test "template input interpolation" {
+    const alloc = std.testing.allocator;
+    const result = try renderTemplate(alloc, "Topic: {{input.topic}}", "{}", "{\"topic\":\"AI\"}", null);
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("Topic: AI", result);
+}
+
+test "template item interpolation" {
+    const alloc = std.testing.allocator;
+    const result = try renderTemplate(alloc, "File: {{item.path}}", "{}", null, "{\"path\":\"main.py\"}");
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("File: main.py", result);
+}
+
+test "template conditional true" {
+    const alloc = std.testing.allocator;
+    const result = try renderTemplate(alloc, "{% if state.name %}Hi {{state.name}}{% endif %}", "{\"name\":\"Bob\"}", null, null);
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("Hi Bob", result);
+}
+
+test "template conditional false" {
+    const alloc = std.testing.allocator;
+    const result = try renderTemplate(alloc, "{% if state.missing %}hidden{% endif %}visible", "{}", null, null);
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("visible", result);
+}
+
+test "template no interpolation" {
+    const alloc = std.testing.allocator;
+    const result = try renderTemplate(alloc, "plain text", "{}", null, null);
+    defer alloc.free(result);
+    try std.testing.expectEqualStrings("plain text", result);
+}
+
+fn mockStoreFetcher(
+    alloc: Allocator,
+    base_url: []const u8,
+    api_token: ?[]const u8,
+    namespace: []const u8,
+    key: []const u8,
+) !?[]const u8 {
+    _ = base_url;
+    _ = api_token;
+    if (std.mem.eql(u8, namespace, "prefs") and std.mem.eql(u8, key, "theme")) {
+        return try alloc.dupe(u8, "\"dark\"");
+    }
+    return null;
+}
+
+test "template store interpolation" {
+    const alloc = std.testing.allocator;
+    const result = try renderTemplateWithStore(
+        alloc,
+        "Theme: {{store.prefs.theme}}",
+        "{}",
+        null,
+        null,
+        .{
+            .base_url = "http://example.test",
+            .fetcher = mockStoreFetcher,
+        },
+    );
+    defer alloc.free(result);
+
+    try std.testing.expectEqualStrings("Theme: dark", result);
+}
+
+test "template store interpolation errors without store access" {
+    const alloc = std.testing.allocator;
+    try std.testing.expectError(
+        error.StoreNotConfigured,
+        renderTemplateWithStore(alloc, "Theme: {{store.prefs.theme}}", "{}", null, null, null),
+    );
+}
+
+// ── Old template engine tests ─────────────────────────────────────────
 
 test "render literal text unchanged" {
     const allocator = std.testing.allocator;
@@ -443,7 +827,7 @@ test "render step output" {
     const result = try render(allocator, "Result: {{steps.s1.output}}", .{
         .input_json = "{}",
         .step_outputs = &.{
-            .{ .step_id = "s1", .output = "found data", .outputs = null },
+            .{ .step_id = "s1", .output = "found data" },
         },
         .item = null,
     });
@@ -451,22 +835,6 @@ test "render step output" {
     try std.testing.expectEqualStrings("Result: found data", result);
 }
 
-test "render step outputs array" {
-    const allocator = std.testing.allocator;
-    const outputs: []const []const u8 = &.{ "result1", "result2" };
-    const result = try render(allocator, "All: {{steps.s1.outputs}}", .{
-        .input_json = "{}",
-        .step_outputs = &.{
-            .{ .step_id = "s1", .output = null, .outputs = outputs },
-        },
-        .item = null,
-    });
-    defer allocator.free(result);
-    // Should produce a JSON array like: ["result1","result2"]
-    try std.testing.expect(std.mem.indexOf(u8, result, "result1") != null);
-    try std.testing.expect(std.mem.indexOf(u8, result, "result2") != null);
-}
-
 test "render item in map context" {
     const allocator = std.testing.allocator;
     const result = try render(allocator, "Research: {{item}}", .{
@@ -575,43 +943,6 @@ test "item without map context returns error" {
     try std.testing.expectError(error.ItemNotAvailable, err);
 }
 
-test "render debate_responses expression" {
-    const allocator = std.testing.allocator;
-    const result = try render(allocator, "Pick best:\n{{debate_responses}}", .{
-        .input_json = "{}",
-        .step_outputs = &.{},
-        .item = null,
-        .debate_responses = "[\"resp1\",\"resp2\"]",
-    });
-    defer allocator.free(result);
-    try std.testing.expect(std.mem.indexOf(u8, result, "resp1") != null);
-    try std.testing.expect(std.mem.indexOf(u8, result, "resp2") != null);
-}
-
-test "render chat_history and role expressions" {
-    const allocator = std.testing.allocator;
-    const result = try render(allocator, "Previous:\n{{chat_history}}\nYour role: {{role}}", .{
-        .input_json = "{}",
-        .step_outputs = &.{},
-        .item = null,
-        .chat_history = "Architect: design first",
-        .role = "Frontend Dev",
-    });
-    defer allocator.free(result);
-    try std.testing.expectEqualStrings("Previous:\nArchitect: design first\nYour role: Frontend Dev", result);
-}
-
-test "debate_responses defaults to empty array when not set" {
-    const allocator = std.testing.allocator;
-    const result = try render(allocator, "{{debate_responses}}", .{
-        .input_json = "{}",
-        .step_outputs = &.{},
-        .item = null,
-    });
-    defer allocator.free(result);
-    try std.testing.expectEqualStrings("[]", result);
-}
-
 test "render task.title variable" {
     const allocator = std.testing.allocator;
     const result = try render(allocator, "Work on: {{task.title}}", .{
diff --git a/src/tracker.zig b/src/tracker.zig
index 6c1805d..c61950b 100644
--- a/src/tracker.zig
+++ b/src/tracker.zig
@@ -257,6 +257,14 @@ pub const Tracker = struct {
         _ = self.used_ports.swapRemove(port);
     }
 
+    /// Startup cleanup: remove all stale workspaces from a previous run.
+    /// Workspaces are ephemeral and will be recreated by hooks when tasks are
+    /// claimed again, so a clean slate on restart is safe.
+    pub fn startupCleanup(self: *Tracker) void {
+        log.info("startup: cleaning terminal workspaces", .{});
+        workspace_mod.cleanAll(self.cfg.workspace.root);
+    }
+
     /// Thread entry point — run the poll loop until shutdown is requested.
     pub fn run(self: *Tracker) void {
         log.info("tracker started (poll_interval={d}ms, agent_id={s})", .{
@@ -265,7 +273,7 @@ pub const Tracker = struct {
         });
 
         // Startup cleanup: remove all stale workspaces from previous run
-        workspace_mod.cleanAll(self.cfg.workspace.root);
+        self.startupCleanup();
 
         const poll_ns: u64 = @as(u64, self.cfg.poll_interval_ms) * std.time.ns_per_ms;
 
@@ -378,6 +386,9 @@ pub const Tracker = struct {
     }
 
     /// Poll NullTickets for each workflow's claim_roles and claim available tasks.
+    // TODO(task14): When nulltickets schema changes are integrated, update WorkflowDef
+    // and pollAndClaim to handle the new workflow format (e.g. new claim fields, task
+    // shape, or execution modes introduced in the orchestration milestone).
     fn pollAndClaim(self: *Tracker, tick_alloc: std.mem.Allocator) void {
         const base_url = self.cfg.url orelse return;
 
diff --git a/src/tracker_client.zig b/src/tracker_client.zig
index 57b5c65..212e926 100644
--- a/src/tracker_client.zig
+++ b/src/tracker_client.zig
@@ -183,7 +183,8 @@ pub const TrackerClient = struct {
         const url = try std.fmt.allocPrint(self.allocator, "{s}/artifacts", .{self.base_url});
         defer self.allocator.free(url);
 
-        const body = try std.fmt.allocPrint(self.allocator,
+        const body = try std.fmt.allocPrint(
+            self.allocator,
             "{{\"task_id\":{f},\"run_id\":{f},\"kind\":{f},\"uri\":{f},\"meta\":{s}}}",
             .{
                 std.json.fmt(task_id, .{}),
@@ -226,6 +227,59 @@ pub const TrackerClient = struct {
         return result.body;
     }
 
+    pub fn storeGetValue(self: *TrackerClient, namespace: []const u8, key: []const u8) !?[]const u8 {
+        const namespace_enc = try encodePathSegment(self.allocator, namespace);
+        defer self.allocator.free(namespace_enc);
+        const key_enc = try encodePathSegment(self.allocator, key);
+        defer self.allocator.free(key_enc);
+
+        const url = try std.fmt.allocPrint(
+            self.allocator,
+            "{s}/store/{s}/{s}",
+            .{ trimTrailingSlash(self.base_url), namespace_enc, key_enc },
+        );
+        defer self.allocator.free(url);
+
+        const result = try self.httpRequest(url, .GET, null, null);
+        defer self.allocator.free(result.body);
+
+        if (result.status_code == 404) return null;
+        if (result.status_code < 200 or result.status_code >= 300) return null;
+
+        const parsed = std.json.parseFromSlice(std.json.Value, self.allocator, result.body, .{
+            .allocate = .alloc_always,
+            .ignore_unknown_fields = true,
+        }) catch return null;
+        defer parsed.deinit();
+        if (parsed.value != .object) return null;
+
+        const value = parsed.value.object.get("value") orelse return null;
+        const value_json = try std.json.Stringify.valueAlloc(self.allocator, value, .{});
+        return value_json;
+    }
+
+    pub fn storePutValue(self: *TrackerClient, namespace: []const u8, key: []const u8, value_json: []const u8) !bool {
+        const namespace_enc = try encodePathSegment(self.allocator, namespace);
+        defer self.allocator.free(namespace_enc);
+        const key_enc = try encodePathSegment(self.allocator, key);
+        defer self.allocator.free(key_enc);
+
+        const url = try std.fmt.allocPrint(
+            self.allocator,
+            "{s}/store/{s}/{s}",
+            .{ trimTrailingSlash(self.base_url), namespace_enc, key_enc },
+        );
+        defer self.allocator.free(url);
+
+        const body = try std.fmt.allocPrint(self.allocator, "{{\"value\":{s}}}", .{value_json});
+        defer self.allocator.free(body);
+
+        const result = try self.httpRequest(url, .PUT, body, null);
+        defer self.allocator.free(result.body);
+
+        return result.status_code >= 200 and result.status_code < 300;
+    }
+
     fn httpRequest(
         self: *TrackerClient,
         url: []const u8,
@@ -272,6 +326,36 @@ pub const TrackerClient = struct {
     }
 };
 
+fn trimTrailingSlash(url: []const u8) []const u8 {
+    if (url.len > 0 and url[url.len - 1] == '/') return url[0 .. url.len - 1];
+    return url;
+}
+
+fn encodePathSegment(allocator: std.mem.Allocator, value: []const u8) ![]const u8 {
+    var buf: std.ArrayListUnmanaged(u8) = .empty;
+    errdefer buf.deinit(allocator);
+
+    for (value) |ch| {
+        if (isUnreserved(ch)) {
+            try buf.append(allocator, ch);
+            continue;
+        }
+        try buf.writer(allocator).print("%{X:0>2}", .{ch});
+    }
+
+    return try buf.toOwnedSlice(allocator);
+}
+
+fn isUnreserved(ch: u8) bool {
+    return (ch >= 'A' and ch <= 'Z') or
+        (ch >= 'a' and ch <= 'z') or
+        (ch >= '0' and ch <= '9') or
+        ch == '-' or
+        ch == '_' or
+        ch == '.' or
+        ch == '~';
+}
+
 fn parseTaskInfo(allocator: std.mem.Allocator, task_value: std.json.Value) !TaskInfo {
     if (task_value != .object) return error.InvalidTaskPayload;
     const obj = task_value.object;
@@ -365,3 +449,11 @@ test "TrackerClient exposes optimistic transition support" {
     try std.testing.expect(@hasDecl(TrackerClient, "transition"));
     try std.testing.expect(@hasDecl(TrackerClient, "postArtifact"));
 }
+
+test "encodePathSegment percent-encodes reserved characters" {
+    const allocator = std.testing.allocator;
+    const encoded = try encodePathSegment(allocator, "team alpha/key");
+    defer allocator.free(encoded);
+
+    try std.testing.expectEqualStrings("team%20alpha%2Fkey", encoded);
+}
diff --git a/src/types.zig b/src/types.zig
index fbe28c2..b4dd51a 100644
--- a/src/types.zig
+++ b/src/types.zig
@@ -7,10 +7,11 @@ const std = @import("std");
 pub const RunStatus = enum {
     pending,
     running,
-    paused,
+    interrupted,
     completed,
     failed,
     cancelled,
+    forked,
 
     pub fn toString(self: RunStatus) []const u8 {
         return @tagName(self);
@@ -31,7 +32,7 @@ pub const StepStatus = enum {
     completed,
     failed,
     skipped,
-    waiting_approval,
+    interrupted,
 
     pub fn toString(self: StepStatus) []const u8 {
         return @tagName(self);
@@ -47,19 +48,12 @@ pub const StepStatus = enum {
 
 pub const StepType = enum {
     task,
-    fan_out,
-    map,
-    condition,
-    approval,
-    reduce,
-    loop,
-    sub_workflow,
-    wait,
-    router,
+    route,
+    interrupt,
+    agent,
+    send,
     transform,
-    saga,
-    debate,
-    group_chat,
+    subgraph,
 
     pub fn toString(self: StepType) []const u8 {
         return @tagName(self);
@@ -153,6 +147,7 @@ pub const RunRow = struct {
     id: []const u8,
     idempotency_key: ?[]const u8,
     status: []const u8,
+    workflow_id: ?[]const u8 = null,
     workflow_json: []const u8,
     input_json: []const u8,
     callbacks_json: []const u8,
@@ -161,6 +156,9 @@ pub const RunRow = struct {
     updated_at_ms: i64,
     started_at_ms: ?i64,
     ended_at_ms: ?i64,
+    state_json: ?[]const u8 = null,
+    config_json: ?[]const u8 = null,
+    parent_run_id: ?[]const u8 = null,
 };
 
 pub const StepRow = struct {
@@ -206,17 +204,6 @@ pub const ArtifactRow = struct {
     created_at_ms: i64,
 };
 
-pub const ChatMessageRow = struct {
-    id: i64,
-    run_id: []const u8,
-    step_id: []const u8,
-    round: i64,
-    role: []const u8,
-    worker_id: ?[]const u8,
-    message: []const u8,
-    ts_ms: i64,
-};
-
 pub const TrackerRunRow = struct {
     task_id: []const u8,
     tracker_run_id: []const u8,
@@ -238,12 +225,75 @@ pub const TrackerRunRow = struct {
     last_error_text: ?[]const u8,
 };
 
-pub const SagaStateRow = struct {
+pub const WorkflowRow = struct {
+    id: []const u8,
+    name: []const u8,
+    definition_json: []const u8,
+    version: i64 = 1,
+    created_at_ms: i64,
+    updated_at_ms: i64,
+};
+
+pub const CheckpointRow = struct {
+    id: []const u8,
     run_id: []const u8,
-    saga_step_id: []const u8,
-    body_step_id: []const u8,
-    compensation_step_id: ?[]const u8,
+    step_id: []const u8,
+    parent_id: ?[]const u8,
+    state_json: []const u8,
+    completed_nodes_json: []const u8,
+    version: i64,
+    metadata_json: ?[]const u8,
+    created_at_ms: i64,
+};
+
+pub const AgentEventRow = struct {
+    id: i64,
+    run_id: []const u8,
+    step_id: []const u8,
+    iteration: i64,
+    tool: ?[]const u8,
+    args_json: ?[]const u8,
+    result_text: ?[]const u8,
     status: []const u8,
+    created_at_ms: i64,
+};
+
+pub const PendingInjectionRow = struct {
+    id: i64,
+    run_id: []const u8,
+    updates_json: []const u8,
+    apply_after_step: ?[]const u8,
+    created_at_ms: i64,
+};
+
+pub const PendingWriteRow = struct {
+    id: i64,
+    run_id: []const u8,
+    step_id: []const u8,
+    channel: []const u8,
+    value_json: []const u8,
+    created_at_ms: i64,
+};
+
+pub const ReducerType = enum {
+    last_value,
+    append,
+    merge,
+    add,
+    min,
+    max,
+    add_messages,
+
+    pub fn toString(self: ReducerType) []const u8 {
+        return @tagName(self);
+    }
+
+    pub fn fromString(s: []const u8) ?ReducerType {
+        inline for (@typeInfo(ReducerType).@"enum".fields) |f| {
+            if (std.mem.eql(u8, s, f.name)) return @enumFromInt(f.value);
+        }
+        return null;
+    }
 };
 
 // ── API Response Types ─────────────────────────────────────────────────
@@ -275,17 +325,17 @@ test "RunStatus round-trip" {
 }
 
 test "StepStatus round-trip" {
-    const s = StepStatus.waiting_approval;
+    const s = StepStatus.interrupted;
     const name = s.toString();
-    try std.testing.expectEqualStrings("waiting_approval", name);
+    try std.testing.expectEqualStrings("interrupted", name);
     const parsed = StepStatus.fromString(name);
-    try std.testing.expectEqual(StepStatus.waiting_approval, parsed.?);
+    try std.testing.expectEqual(StepStatus.interrupted, parsed.?);
 }
 
 test "StepType round-trip" {
-    const s = StepType.fan_out;
-    try std.testing.expectEqualStrings("fan_out", s.toString());
-    try std.testing.expectEqual(StepType.fan_out, StepType.fromString("fan_out").?);
+    const s = StepType.route;
+    try std.testing.expectEqualStrings("route", s.toString());
+    try std.testing.expectEqual(StepType.route, StepType.fromString("route").?);
 }
 
 test "WorkerStatus round-trip" {
diff --git a/src/worker_protocol.zig b/src/worker_protocol.zig
index d560d38..59be6a6 100644
--- a/src/worker_protocol.zig
+++ b/src/worker_protocol.zig
@@ -6,6 +6,7 @@ pub const Protocol = enum {
     openai_chat,
     mqtt,
     redis_stream,
+    a2a,
 };
 
 pub fn parse(raw: []const u8) ?Protocol {
@@ -14,26 +15,27 @@ pub fn parse(raw: []const u8) ?Protocol {
     if (std.mem.eql(u8, raw, "openai_chat")) return .openai_chat;
     if (std.mem.eql(u8, raw, "mqtt")) return .mqtt;
     if (std.mem.eql(u8, raw, "redis_stream")) return .redis_stream;
+    if (std.mem.eql(u8, raw, "a2a")) return .a2a;
     return null;
 }
 
 pub fn requiresModel(protocol: Protocol) bool {
     return switch (protocol) {
         .openai_chat => true,
-        .webhook, .api_chat, .mqtt, .redis_stream => false,
+        .webhook, .api_chat, .mqtt, .redis_stream, .a2a => false,
     };
 }
 
 pub fn requiresExplicitPath(protocol: Protocol) bool {
     return switch (protocol) {
         .webhook => true,
-        .api_chat, .openai_chat, .mqtt, .redis_stream => false,
+        .api_chat, .openai_chat, .mqtt, .redis_stream, .a2a => false,
     };
 }
 
 pub fn validateUrlForProtocol(url: []const u8, protocol: Protocol) bool {
-    // mqtt and redis_stream URLs are validated by their own parsers
-    if (protocol == .mqtt or protocol == .redis_stream) return true;
+    // mqtt, redis_stream, and a2a URLs are validated by their own parsers / have fixed paths
+    if (protocol == .mqtt or protocol == .redis_stream or protocol == .a2a) return true;
     if (!requiresExplicitPath(protocol)) return true;
     return hasExplicitPath(url);
 }
@@ -47,6 +49,9 @@ pub fn buildRequestUrl(
     if (requiresExplicitPath(protocol) and !hasExplicitPath(trimmed)) {
         return error.WebhookUrlPathRequired;
     }
+    if (protocol == .a2a) {
+        return try std.fmt.allocPrint(allocator, "{s}/a2a", .{trimmed});
+    }
     return try allocator.dupe(u8, trimmed);
 }
 
@@ -140,6 +145,7 @@ test "parse protocol supports known values" {
     try std.testing.expectEqual(Protocol.webhook, parse("webhook").?);
     try std.testing.expectEqual(Protocol.api_chat, parse("api_chat").?);
     try std.testing.expectEqual(Protocol.openai_chat, parse("openai_chat").?);
+    try std.testing.expectEqual(Protocol.a2a, parse("a2a").?);
     try std.testing.expect(parse("unknown") == null);
 }
 
@@ -173,11 +179,25 @@ test "validateUrlForProtocol enforces protocol-specific constraints" {
     try std.testing.expect(validateUrlForProtocol("http://localhost:42617/api/chat", .api_chat));
     try std.testing.expect(validateUrlForProtocol("mqtt://broker:1883/topic", .mqtt));
     try std.testing.expect(validateUrlForProtocol("redis://redis:6379/stream", .redis_stream));
+    try std.testing.expect(validateUrlForProtocol("http://localhost:3000", .a2a));
+}
+
+test "buildRequestUrl appends /a2a for a2a protocol" {
+    const allocator = std.testing.allocator;
+    const url = try buildRequestUrl(allocator, "http://localhost:3000", .a2a);
+    defer allocator.free(url);
+    try std.testing.expectEqualStrings("http://localhost:3000/a2a", url);
+
+    // Trailing slash is trimmed before appending /a2a
+    const url2 = try buildRequestUrl(allocator, "http://localhost:3000/", .a2a);
+    defer allocator.free(url2);
+    try std.testing.expectEqualStrings("http://localhost:3000/a2a", url2);
 }
 
-test "parse supports mqtt and redis_stream" {
+test "parse supports mqtt, redis_stream, and a2a" {
     try std.testing.expectEqual(Protocol.mqtt, parse("mqtt").?);
     try std.testing.expectEqual(Protocol.redis_stream, parse("redis_stream").?);
+    try std.testing.expectEqual(Protocol.a2a, parse("a2a").?);
 }
 
 test "parseMqttUrl extracts host, port, topic" {
diff --git a/src/worker_response.zig b/src/worker_response.zig
index 778109d..55110b0 100644
--- a/src/worker_response.zig
+++ b/src/worker_response.zig
@@ -1,11 +1,24 @@
 const std = @import("std");
 
+pub const UsageInfo = struct {
+    input_tokens: i64 = 0,
+    output_tokens: i64 = 0,
+};
+
+pub const RateLimitData = struct {
+    remaining: i64 = 0,
+    limit: i64 = 0,
+    reset_ms: i64 = 0,
+};
+
 pub const ParseResult = struct {
     output: []const u8,
     success: bool,
     error_text: ?[]const u8,
     async_pending: bool = false,
     correlation_id: ?[]const u8 = null,
+    usage: ?UsageInfo = null,
+    rate_limit: ?RateLimitData = null,
 };
 
 pub const invalid_json_error = "worker response must be a JSON object";
@@ -34,6 +47,8 @@ pub fn parse(allocator: std.mem.Allocator, response_data: []const u8) !ParseResu
             .output = try allocator.dupe(u8, output),
             .success = true,
             .error_text = null,
+            .usage = extractUsage(obj),
+            .rate_limit = extractRateLimit(obj),
         };
     }
 
@@ -94,6 +109,53 @@ fn extractErrorMessage(allocator: std.mem.Allocator, obj: std.json.ObjectMap) !?
     return null;
 }
 
+fn extractUsage(obj: std.json.ObjectMap) ?UsageInfo {
+    const usage_val = obj.get("usage") orelse return null;
+    if (usage_val != .object) return null;
+    const usage_obj = usage_val.object;
+
+    var info = UsageInfo{};
+
+    // OpenAI format: prompt_tokens / completion_tokens
+    if (usage_obj.get("prompt_tokens")) |v| {
+        if (v == .integer) info.input_tokens = v.integer;
+    }
+    if (usage_obj.get("completion_tokens")) |v| {
+        if (v == .integer) info.output_tokens = v.integer;
+    }
+
+    // A2A/generic format: input_tokens / output_tokens
+    if (usage_obj.get("input_tokens")) |v| {
+        if (v == .integer) info.input_tokens = v.integer;
+    }
+    if (usage_obj.get("output_tokens")) |v| {
+        if (v == .integer) info.output_tokens = v.integer;
+    }
+
+    if (info.input_tokens == 0 and info.output_tokens == 0) return null;
+    return info;
+}
+
+fn extractRateLimit(obj: std.json.ObjectMap) ?RateLimitData {
+    const rl_val = obj.get("rate_limit") orelse return null;
+    if (rl_val != .object) return null;
+    const rl_obj = rl_val.object;
+
+    var info = RateLimitData{};
+    if (rl_obj.get("remaining")) |v| {
+        if (v == .integer) info.remaining = v.integer;
+    }
+    if (rl_obj.get("limit")) |v| {
+        if (v == .integer) info.limit = v.integer;
+    }
+    if (rl_obj.get("reset_ms")) |v| {
+        if (v == .integer) info.reset_ms = v.integer;
+    }
+
+    if (info.remaining == 0 and info.limit == 0) return null;
+    return info;
+}
+
 fn isAsyncAckWithoutOutput(obj: std.json.ObjectMap) bool {
     const status_val = obj.get("status") orelse return false;
     return status_val == .string and std.mem.eql(u8, status_val.string, "received");
@@ -165,3 +227,52 @@ test "parse rejects object without supported output fields" {
     try std.testing.expect(!result.success);
     try std.testing.expectEqualStrings(missing_output_error, result.error_text.?);
 }
+
+test "parse extracts usage info from OpenAI format" {
+    const allocator = std.testing.allocator;
+    const result = try parse(
+        allocator,
+        "{\"response\":\"done\",\"usage\":{\"prompt_tokens\":150,\"completion_tokens\":75}}",
+    );
+    defer allocator.free(result.output);
+    try std.testing.expect(result.success);
+    try std.testing.expect(result.usage != null);
+    try std.testing.expectEqual(@as(i64, 150), result.usage.?.input_tokens);
+    try std.testing.expectEqual(@as(i64, 75), result.usage.?.output_tokens);
+}
+
+test "parse extracts usage info from generic format" {
+    const allocator = std.testing.allocator;
+    const result = try parse(
+        allocator,
+        "{\"response\":\"done\",\"usage\":{\"input_tokens\":200,\"output_tokens\":100}}",
+    );
+    defer allocator.free(result.output);
+    try std.testing.expect(result.success);
+    try std.testing.expect(result.usage != null);
+    try std.testing.expectEqual(@as(i64, 200), result.usage.?.input_tokens);
+    try std.testing.expectEqual(@as(i64, 100), result.usage.?.output_tokens);
+}
+
+test "parse extracts rate limit info" {
+    const allocator = std.testing.allocator;
+    const result = try parse(
+        allocator,
+        "{\"response\":\"done\",\"rate_limit\":{\"remaining\":95,\"limit\":100,\"reset_ms\":1700000000000}}",
+    );
+    defer allocator.free(result.output);
+    try std.testing.expect(result.success);
+    try std.testing.expect(result.rate_limit != null);
+    try std.testing.expectEqual(@as(i64, 95), result.rate_limit.?.remaining);
+    try std.testing.expectEqual(@as(i64, 100), result.rate_limit.?.limit);
+    try std.testing.expectEqual(@as(i64, 1700000000000), result.rate_limit.?.reset_ms);
+}
+
+test "parse returns null usage when no usage field" {
+    const allocator = std.testing.allocator;
+    const result = try parse(allocator, "{\"response\":\"done\"}");
+    defer allocator.free(result.output);
+    try std.testing.expect(result.success);
+    try std.testing.expect(result.usage == null);
+    try std.testing.expect(result.rate_limit == null);
+}
diff --git a/src/workflow_loader.zig b/src/workflow_loader.zig
index 5f0fda4..f5d2fe0 100644
--- a/src/workflow_loader.zig
+++ b/src/workflow_loader.zig
@@ -1,4 +1,7 @@
 const std = @import("std");
+const ids = @import("ids.zig");
+const Store = @import("store.zig").Store;
+const log = std.log.scoped(.workflow_loader);
 
 // ── Types ─────────────────────────────────────────────────────────────
 
@@ -100,6 +103,102 @@ test "loadWorkflows: supports absolute workflow directories" {
     try std.testing.expectEqualStrings("absolute", map.get("absolute").?.pipeline_id);
 }
 
+// ── WorkflowWatcher ──────────────────────────────────────────────────
+
+pub const WorkflowWatcher = struct {
+    dir_path: []const u8,
+    store: *Store,
+    last_check_ms: i64,
+    file_hashes: std.StringHashMap(u64),
+    alloc: std.mem.Allocator,
+
+    pub fn init(alloc: std.mem.Allocator, dir_path: []const u8, store: *Store) WorkflowWatcher {
+        return .{
+            .dir_path = dir_path,
+            .store = store,
+            .last_check_ms = 0,
+            .file_hashes = std.StringHashMap(u64).init(alloc),
+            .alloc = alloc,
+        };
+    }
+
+    pub fn deinit(self: *WorkflowWatcher) void {
+        var it = self.file_hashes.iterator();
+        while (it.next()) |entry| {
+            self.alloc.free(entry.key_ptr.*);
+        }
+        self.file_hashes.deinit();
+    }
+
+    /// Check for changed workflow files. Called periodically from engine tick.
+    pub fn checkForChanges(self: *WorkflowWatcher) void {
+        const now = ids.nowMs();
+        if (now - self.last_check_ms < 5000) return; // check every 5 seconds
+        self.last_check_ms = now;
+
+        var dir = if (std.fs.path.isAbsolute(self.dir_path))
+            std.fs.openDirAbsolute(self.dir_path, .{ .iterate = true }) catch return
+        else
+            std.fs.cwd().openDir(self.dir_path, .{ .iterate = true }) catch return;
+        defer dir.close();
+
+        var iter = dir.iterate();
+        while (iter.next() catch null) |entry| {
+            if (entry.kind != .file) continue;
+            if (!std.mem.endsWith(u8, entry.name, ".json")) continue;
+
+            const contents = dir.readFileAlloc(self.alloc, entry.name, 1024 * 1024) catch continue;
+            defer self.alloc.free(contents);
+
+            // Compute FNV1a hash of content
+            const hash = std.hash.Fnv1a_64.hash(contents);
+
+            // Check if hash changed
+            const existing = self.file_hashes.get(entry.name);
+            if (existing) |prev_hash| {
+                if (prev_hash == hash) continue; // unchanged
+            }
+
+            // Parse and validate
+            const parsed = std.json.parseFromSlice(std.json.Value, self.alloc, contents, .{}) catch continue;
+            defer parsed.deinit();
+            if (parsed.value != .object) continue;
+
+            const obj = parsed.value.object;
+
+            // Extract name and id
+            const wf_name = if (obj.get("name")) |v| (if (v == .string) v.string else null) else null;
+            const wf_id = if (obj.get("id")) |v| (if (v == .string) v.string else null) else null;
+            if (wf_id == null and wf_name == null) continue;
+
+            const id = wf_id orelse wf_name.?;
+            const name = wf_name orelse wf_id.?;
+
+            // Upsert into workflows table
+            // Try insert first; if it fails (duplicate id), update instead
+            self.store.createWorkflow(id, name, contents) catch {
+                self.store.updateWorkflow(id, name, contents) catch continue;
+            };
+
+            // Store hash (need to dupe the key since entry.name is transient)
+            const key_dupe = self.alloc.dupe(u8, entry.name) catch continue;
+            if (existing != null) {
+                // Free old key if we're replacing
+                if (self.file_hashes.fetchPut(key_dupe, hash) catch null) |old| {
+                    self.alloc.free(old.key);
+                }
+            } else {
+                self.file_hashes.put(key_dupe, hash) catch {
+                    self.alloc.free(key_dupe);
+                    continue;
+                };
+            }
+
+            log.info("workflow {s} reloaded", .{id});
+        }
+    }
+};
+
 // ── getWorkflowForPipeline ────────────────────────────────────────────
 
 pub fn getWorkflowForPipeline(map: *const WorkflowMap, pipeline_id: []const u8) ?*const WorkflowDef {
@@ -299,3 +398,41 @@ test "parse workflow with continuation_prompt" {
     defer parsed.deinit();
     try std.testing.expectEqualStrings("Continue: attempt #{{attempt}}", parsed.value.subprocess.continuation_prompt.?);
 }
+
+test "WorkflowWatcher: detects file changes" {
+    const allocator = std.testing.allocator;
+    var s = try Store.init(allocator, ":memory:");
+    defer s.deinit();
+
+    var tmp = std.testing.tmpDir(.{});
+    defer tmp.cleanup();
+
+    const dir_path = try tmp.dir.realpathAlloc(allocator, ".");
+    defer allocator.free(dir_path);
+
+    var watcher = WorkflowWatcher.init(allocator, dir_path, &s);
+    defer watcher.deinit();
+
+    // Force last_check_ms to 0 so check runs immediately
+    watcher.last_check_ms = 0;
+
+    // Write a workflow file
+    try tmp.dir.writeFile(.{
+        .sub_path = "test_wf.json",
+        .data =
+        \\{"id":"wf-test","name":"Test WF","nodes":{}}
+        ,
+    });
+
+    watcher.checkForChanges();
+
+    // Verify workflow was inserted
+    const wf = try s.getWorkflow(allocator, "wf-test");
+    try std.testing.expect(wf != null);
+    allocator.free(wf.?.id);
+    allocator.free(wf.?.name);
+    allocator.free(wf.?.definition_json);
+
+    // Verify hash was stored
+    try std.testing.expectEqual(@as(usize, 1), watcher.file_hashes.count());
+}
diff --git a/src/workflow_validation.zig b/src/workflow_validation.zig
index 3374e94..c5419fb 100644
--- a/src/workflow_validation.zig
+++ b/src/workflow_validation.zig
@@ -1,4 +1,7 @@
 const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+// ── Legacy validation (used by api.zig for POST /runs) ────────────────
 
 pub const ValidateError = error{
     StepMustBeObject,
@@ -9,16 +12,6 @@ pub const ValidateError = error{
     DependsOnItemNotString,
     DependsOnDuplicate,
     DependsOnUnknownStepId,
-    LoopBodyRequired,
-    SubWorkflowRequired,
-    WaitConditionRequired,
-    WaitDurationInvalid,
-    WaitUntilInvalid,
-    WaitSignalInvalid,
-    RouterRoutesRequired,
-    SagaBodyRequired,
-    DebateCountRequired,
-    GroupChatParticipantsRequired,
     RetryMustBeObject,
     MaxAttemptsMustBePositiveInteger,
     TimeoutMsMustBePositiveInteger,
@@ -67,47 +60,9 @@ fn getJsonString(obj: std.json.ObjectMap, key: []const u8) ?[]const u8 {
 }
 
 fn validateStepTypeRules(step_type: []const u8, step_obj: std.json.ObjectMap) ValidateError!void {
-    if (std.mem.eql(u8, step_type, "loop") and step_obj.get("body") == null) {
-        return error.LoopBodyRequired;
-    }
-    if (std.mem.eql(u8, step_type, "sub_workflow") and step_obj.get("workflow") == null) {
-        return error.SubWorkflowRequired;
-    }
-    if (std.mem.eql(u8, step_type, "wait")) {
-        if (step_obj.get("duration_ms") == null and step_obj.get("until_ms") == null and step_obj.get("signal") == null) {
-            return error.WaitConditionRequired;
-        }
-        if (step_obj.get("duration_ms")) |duration_val| {
-            switch (duration_val) {
-                .integer => {
-                    if (duration_val.integer < 0) return error.WaitDurationInvalid;
-                },
-                else => return error.WaitDurationInvalid,
-            }
-        }
-        if (step_obj.get("until_ms")) |until_val| {
-            if (until_val != .integer or until_val.integer < 0) {
-                return error.WaitUntilInvalid;
-            }
-        }
-        if (step_obj.get("signal")) |signal_val| {
-            if (signal_val != .string or signal_val.string.len == 0) {
-                return error.WaitSignalInvalid;
-            }
-        }
-    }
-    if (std.mem.eql(u8, step_type, "router") and step_obj.get("routes") == null) {
-        return error.RouterRoutesRequired;
-    }
-    if (std.mem.eql(u8, step_type, "saga") and step_obj.get("body") == null) {
-        return error.SagaBodyRequired;
-    }
-    if (std.mem.eql(u8, step_type, "debate") and step_obj.get("count") == null) {
-        return error.DebateCountRequired;
-    }
-    if (std.mem.eql(u8, step_type, "group_chat") and step_obj.get("participants") == null) {
-        return error.GroupChatParticipantsRequired;
-    }
+    // No specific rules for current step types (task, route, interrupt, agent, send, transform, subgraph)
+    _ = step_type;
+    _ = step_obj;
 }
 
 fn validateDependsOnTypes(allocator: std.mem.Allocator, step_obj: std.json.ObjectMap) ValidateError!void {
@@ -140,7 +95,505 @@ fn validateExecutionControls(step_obj: std.json.ObjectMap) ValidateError!void {
     }
 }
 
-// ── Tests ─────────────────────────────────────────────────────────────
+// ── New graph-based workflow validation ───────────────────────────────
+
+pub const ValidationError = struct {
+    err_type: []const u8,
+    node: ?[]const u8,
+    key: ?[]const u8,
+    message: []const u8,
+};
+
+/// Validate a workflow definition JSON (new graph format).
+/// Returns a slice of ValidationError; caller must free with alloc.free().
+/// Individual string fields inside each ValidationError point into the
+/// parsed JSON tree (or are literals) and do not need separate freeing.
+pub fn validate(alloc: Allocator, definition_json: []const u8) ![]ValidationError {
+    var errors: std.ArrayListUnmanaged(ValidationError) = .empty;
+    defer errors.deinit(alloc);
+
+    const parsed = std.json.parseFromSlice(std.json.Value, alloc, definition_json, .{}) catch {
+        try errors.append(alloc, .{
+            .err_type = "parse_error",
+            .node = null,
+            .key = null,
+            .message = "failed to parse workflow JSON",
+        });
+        return errors.toOwnedSlice(alloc);
+    };
+    defer parsed.deinit();
+
+    if (parsed.value != .object) {
+        try errors.append(alloc, .{
+            .err_type = "parse_error",
+            .node = null,
+            .key = null,
+            .message = "workflow must be a JSON object",
+        });
+        return errors.toOwnedSlice(alloc);
+    }
+    const root = parsed.value.object;
+
+    // Extract nodes map
+    const nodes_val = root.get("nodes") orelse {
+        try errors.append(alloc, .{
+            .err_type = "missing_field",
+            .node = null,
+            .key = "nodes",
+            .message = "workflow must have a 'nodes' object",
+        });
+        return errors.toOwnedSlice(alloc);
+    };
+    if (nodes_val != .object) {
+        try errors.append(alloc, .{
+            .err_type = "missing_field",
+            .node = null,
+            .key = "nodes",
+            .message = "'nodes' must be an object",
+        });
+        return errors.toOwnedSlice(alloc);
+    }
+    const nodes = nodes_val.object;
+
+    // Extract edges array
+    const edges_val = root.get("edges") orelse {
+        try errors.append(alloc, .{
+            .err_type = "missing_field",
+            .node = null,
+            .key = "edges",
+            .message = "workflow must have an 'edges' array",
+        });
+        return errors.toOwnedSlice(alloc);
+    };
+    if (edges_val != .array) {
+        try errors.append(alloc, .{
+            .err_type = "missing_field",
+            .node = null,
+            .key = "edges",
+            .message = "'edges' must be an array",
+        });
+        return errors.toOwnedSlice(alloc);
+    }
+    const edges = edges_val.array.items;
+
+    // Extract state_schema (may be absent or empty object)
+    var state_schema: ?std.json.ObjectMap = null;
+    if (root.get("state_schema")) |ss_val| {
+        if (ss_val == .object) state_schema = ss_val.object;
+    }
+
+    // --- Collect send target_nodes (exempt from reachability) ---
+    var send_targets = std.StringHashMap(void).init(alloc);
+    defer send_targets.deinit();
+    var node_it = nodes.iterator();
+    while (node_it.next()) |entry| {
+        const nobj = entry.value_ptr.*;
+        if (nobj != .object) continue;
+        const ntype = getJsonStringFromObj(nobj.object, "type") orelse continue;
+        if (std.mem.eql(u8, ntype, "send")) {
+            if (getJsonStringFromObj(nobj.object, "target_node")) |tn| {
+                try send_targets.put(tn, {});
+            }
+        }
+    }
+
+    // --- Check 1: nodes_in_edges_exist ---
+    // Build adjacency list while we're at it
+    // Edge source format: "node" or "node:route_value"
+    // We'll parse edge sources to get the actual node name
+    var edge_sources: std.ArrayListUnmanaged([]const u8) = .empty;
+    defer edge_sources.deinit(alloc);
+    var edge_targets: std.ArrayListUnmanaged([]const u8) = .empty;
+    defer edge_targets.deinit(alloc);
+
+    for (edges) |edge_val| {
+        if (edge_val != .array or edge_val.array.items.len < 2) continue;
+        const src_raw = if (edge_val.array.items[0] == .string) edge_val.array.items[0].string else continue;
+        const tgt = if (edge_val.array.items[1] == .string) edge_val.array.items[1].string else continue;
+
+        // Parse "node:route_value" -> node name
+        const src_node = edgeSourceNode(src_raw);
+
+        try edge_sources.append(alloc, src_raw);
+        try edge_targets.append(alloc, tgt);
+
+        // Check source node exists (skip __start__, __end__)
+        if (!isReserved(src_node)) {
+            if (!nodes.contains(src_node)) {
+                try errors.append(alloc, .{
+                    .err_type = "nodes_in_edges_exist",
+                    .node = src_node,
+                    .key = null,
+                    .message = "edge source node does not exist in nodes map",
+                });
+            }
+        }
+        // Check target node exists (skip __start__, __end__)
+        if (!isReserved(tgt)) {
+            if (!nodes.contains(tgt)) {
+                try errors.append(alloc, .{
+                    .err_type = "nodes_in_edges_exist",
+                    .node = tgt,
+                    .key = null,
+                    .message = "edge target node does not exist in nodes map",
+                });
+            }
+        }
+    }
+
+    // --- Build reachability set from __start__ ---
+    // We do a BFS/DFS using static edges only (not send target_nodes).
+    var reachable = std.StringHashMap(void).init(alloc);
+    defer reachable.deinit();
+    var queue: std.ArrayListUnmanaged([]const u8) = .empty;
+    defer queue.deinit(alloc);
+
+    try reachable.put("__start__", {});
+    try queue.append(alloc, "__start__");
+
+    var qi: usize = 0;
+    while (qi < queue.items.len) : (qi += 1) {
+        const current = queue.items[qi];
+        for (edge_sources.items, edge_targets.items) |src_raw, tgt| {
+            const src_node = edgeSourceNode(src_raw);
+            if (std.mem.eql(u8, src_node, current) or std.mem.eql(u8, src_raw, current)) {
+                if (!reachable.contains(tgt)) {
+                    try reachable.put(tgt, {});
+                    try queue.append(alloc, tgt);
+                }
+            }
+        }
+    }
+
+    // --- Check 2: unreachable_node ---
+    node_it = nodes.iterator();
+    while (node_it.next()) |entry| {
+        const nname = entry.key_ptr.*;
+        if (reachable.contains(nname)) continue;
+        // Exempt send target_nodes
+        if (send_targets.contains(nname)) continue;
+        try errors.append(alloc, .{
+            .err_type = "unreachable_node",
+            .node = nname,
+            .key = null,
+            .message = "node is not reachable from __start__",
+        });
+    }
+
+    // --- Check 3: end_unreachable ---
+    // __end__ must be reachable from __start__ (simple check: it appears in
+    // reachable set, or at least one edge targets __end__).
+    // For leaf nodes that are not send_targets, there should be a path to __end__.
+    // We do a simplified check: __end__ must be in the reachable set.
+    if (!reachable.contains("__end__")) {
+        try errors.append(alloc, .{
+            .err_type = "end_unreachable",
+            .node = null,
+            .key = null,
+            .message = "__end__ is not reachable from __start__",
+        });
+    }
+
+    // --- Check 4: unintentional_cycle ---
+    // Detect cycles via DFS. Edges from route nodes (src contains ':') back to
+    // earlier nodes are intentional. Other back-edges are cycles (errors).
+    {
+        const CycleState = enum { unvisited, in_stack, done };
+        var cycle_state = std.StringHashMap(CycleState).init(alloc);
+        defer cycle_state.deinit();
+
+        // Initialize all known nodes
+        node_it = nodes.iterator();
+        while (node_it.next()) |entry| {
+            try cycle_state.put(entry.key_ptr.*, .unvisited);
+        }
+        try cycle_state.put("__start__", .unvisited);
+        try cycle_state.put("__end__", .unvisited);
+
+        // We need to track which src_raw produced the edge to know if it's a route edge
+        // Build adjacency: node -> list of (tgt, src_raw_is_route)
+        const EdgeInfo = struct { tgt: []const u8, from_route: bool };
+        var adj = std.StringHashMap(std.ArrayListUnmanaged(EdgeInfo)).init(alloc);
+        defer {
+            var adj_it = adj.iterator();
+            while (adj_it.next()) |e| e.value_ptr.deinit(alloc);
+            adj.deinit();
+        }
+
+        for (edge_sources.items, edge_targets.items) |src_raw, tgt| {
+            const src_node = edgeSourceNode(src_raw);
+            const is_route_edge = std.mem.indexOfScalar(u8, src_raw, ':') != null;
+            const res = try adj.getOrPut(src_node);
+            if (!res.found_existing) {
+                res.value_ptr.* = .empty;
+            }
+            try res.value_ptr.append(alloc, .{ .tgt = tgt, .from_route = is_route_edge });
+        }
+
+        // Iterative DFS
+        var visited_for_dfs = std.StringHashMap(CycleState).init(alloc);
+        defer visited_for_dfs.deinit();
+
+        // Initialize
+        var cs_it = cycle_state.iterator();
+        while (cs_it.next()) |e| {
+            try visited_for_dfs.put(e.key_ptr.*, .unvisited);
+        }
+
+        var dfs_nodes: std.ArrayListUnmanaged([]const u8) = .empty;
+        defer dfs_nodes.deinit(alloc);
+        var cs_it2 = cycle_state.iterator();
+        while (cs_it2.next()) |e| {
+            try dfs_nodes.append(alloc, e.key_ptr.*);
+        }
+
+        for (dfs_nodes.items) |start_node| {
+            const s = visited_for_dfs.get(start_node) orelse .unvisited;
+            if (s != .unvisited) continue;
+
+            // DFS iterative with path tracking
+            var path = std.StringHashMap(void).init(alloc);
+            defer path.deinit();
+
+            const DfsEntry = struct { node: []const u8, child_idx: usize };
+            var stack: std.ArrayListUnmanaged(DfsEntry) = .empty;
+            defer stack.deinit(alloc);
+
+            try stack.append(alloc, .{ .node = start_node, .child_idx = 0 });
+            try path.put(start_node, {});
+            visited_for_dfs.put(start_node, .in_stack) catch {};
+
+            while (stack.items.len > 0) {
+                const top = &stack.items[stack.items.len - 1];
+                const neighbors = adj.get(top.node);
+                if (neighbors == null or top.child_idx >= neighbors.?.items.len) {
+                    // Done with this node
+                    _ = path.remove(top.node);
+                    visited_for_dfs.put(top.node, .done) catch {};
+                    _ = stack.pop();
+                    continue;
+                }
+                const neighbor = neighbors.?.items[top.child_idx];
+                top.child_idx += 1;
+
+                const tgt = neighbor.tgt;
+                const from_route = neighbor.from_route;
+
+                // Skip reserved endpoints for cycle detection
+                if (isReserved(tgt)) continue;
+
+                const tgt_state = visited_for_dfs.get(tgt) orelse .unvisited;
+                if (tgt_state == .in_stack) {
+                    // Back edge found — cycle
+                    if (!from_route) {
+                        // Report cycle error only once per target
+                        var already_reported = false;
+                        for (errors.items) |e| {
+                            if (std.mem.eql(u8, e.err_type, "unintentional_cycle") and
+                                e.node != null and std.mem.eql(u8, e.node.?, tgt))
+                            {
+                                already_reported = true;
+                                break;
+                            }
+                        }
+                        if (!already_reported) {
+                            try errors.append(alloc, .{
+                                .err_type = "unintentional_cycle",
+                                .node = tgt,
+                                .key = null,
+                                .message = "cycle detected: non-route edge creates a cycle",
+                            });
+                        }
+                    }
+                    // Intentional route cycle — skip
+                } else if (tgt_state == .unvisited) {
+                    visited_for_dfs.put(tgt, .in_stack) catch {};
+                    try path.put(tgt, {});
+                    try stack.append(alloc, .{ .node = tgt, .child_idx = 0 });
+                }
+                // .done: already processed, no cycle through this path
+            }
+        }
+    }
+
+    // --- Check 5: undefined_state_key ---
+    if (state_schema) |schema| {
+        node_it = nodes.iterator();
+        while (node_it.next()) |entry| {
+            const nname = entry.key_ptr.*;
+            const nval = entry.value_ptr.*;
+            if (nval != .object) continue;
+            const nobj = nval.object;
+
+            // Check prompt field
+            if (getJsonStringFromObj(nobj, "prompt")) |prompt| {
+                try checkStateRefs(alloc, &errors, schema, nname, prompt);
+            }
+            // Check message field (interrupt)
+            if (getJsonStringFromObj(nobj, "message")) |msg| {
+                try checkStateRefs(alloc, &errors, schema, nname, msg);
+            }
+        }
+    }
+
+    // --- Check 6: invalid_route_target ---
+    node_it = nodes.iterator();
+    while (node_it.next()) |entry| {
+        const nname = entry.key_ptr.*;
+        const nval = entry.value_ptr.*;
+        if (nval != .object) continue;
+        const nobj = nval.object;
+        const ntype = getJsonStringFromObj(nobj, "type") orelse continue;
+        if (!std.mem.eql(u8, ntype, "route")) continue;
+
+        const routes_val = nobj.get("routes") orelse continue;
+        if (routes_val != .object) continue;
+        var routes_it = routes_val.object.iterator();
+        while (routes_it.next()) |re| {
+            const target = if (re.value_ptr.* == .string) re.value_ptr.*.string else continue;
+            if (!nodes.contains(target)) {
+                try errors.append(alloc, .{
+                    .err_type = "invalid_route_target",
+                    .node = nname,
+                    .key = re.key_ptr.*,
+                    .message = "route target node does not exist",
+                });
+            }
+            if (!hasRouteEdge(edge_sources.items, edge_targets.items, nname, re.key_ptr.*, target)) {
+                try errors.append(alloc, .{
+                    .err_type = "missing_route_edge",
+                    .node = nname,
+                    .key = re.key_ptr.*,
+                    .message = "route key is declared in routes but has no matching conditional edge",
+                });
+            }
+        }
+
+        if (getJsonStringFromObj(nobj, "default")) |default_route| {
+            if (!routes_val.object.contains(default_route)) {
+                try errors.append(alloc, .{
+                    .err_type = "invalid_route_default",
+                    .node = nname,
+                    .key = "default",
+                    .message = "route default must reference a declared routes key",
+                });
+            }
+        }
+    }
+
+    // --- Check 7: invalid_send_target ---
+    node_it = nodes.iterator();
+    while (node_it.next()) |entry| {
+        const nname = entry.key_ptr.*;
+        const nval = entry.value_ptr.*;
+        if (nval != .object) continue;
+        const nobj = nval.object;
+        const ntype = getJsonStringFromObj(nobj, "type") orelse continue;
+        if (!std.mem.eql(u8, ntype, "send")) continue;
+
+        if (getJsonStringFromObj(nobj, "target_node")) |tn| {
+            if (!nodes.contains(tn)) {
+                try errors.append(alloc, .{
+                    .err_type = "invalid_send_target",
+                    .node = nname,
+                    .key = "target_node",
+                    .message = "send target_node does not exist in nodes map",
+                });
+            }
+        }
+    }
+
+    // The errors list contains slices pointing into `parsed` which will be
+    // freed by `defer parsed.deinit()`. We need to copy all strings into
+    // alloc-owned memory before returning.
+    const result = try copyErrors(alloc, errors.items);
+    return result;
+}
+
+// ── Helpers ───────────────────────────────────────────────────────────
+
+fn isReserved(name: []const u8) bool {
+    return std.mem.eql(u8, name, "__start__") or std.mem.eql(u8, name, "__end__");
+}
+
+/// Given a raw edge source like "node:route_value", return "node".
+/// If no colon, returns the whole string.
+fn edgeSourceNode(src_raw: []const u8) []const u8 {
+    if (std.mem.indexOfScalar(u8, src_raw, ':')) |colon_pos| {
+        return src_raw[0..colon_pos];
+    }
+    return src_raw;
+}
+
+fn hasRouteEdge(edge_sources: []const []const u8, edge_targets: []const []const u8, node_name: []const u8, route_key: []const u8, target: []const u8) bool {
+    for (edge_sources, edge_targets) |src_raw, edge_target| {
+        if (!std.mem.eql(u8, edge_target, target)) continue;
+        const colon_pos = std.mem.indexOfScalar(u8, src_raw, ':') orelse continue;
+        if (!std.mem.eql(u8, src_raw[0..colon_pos], node_name)) continue;
+        if (std.mem.eql(u8, src_raw[colon_pos + 1 ..], route_key)) return true;
+    }
+    return false;
+}
+
+fn getJsonStringFromObj(obj: std.json.ObjectMap, key: []const u8) ?[]const u8 {
+    const val = obj.get(key) orelse return null;
+    if (val == .string) return val.string;
+    return null;
+}
+
+/// Scan `text` for {{state.KEY}} references and check them against schema.
+fn checkStateRefs(
+    alloc: Allocator,
+    errors: *std.ArrayListUnmanaged(ValidationError),
+    schema: std.json.ObjectMap,
+    node_name: []const u8,
+    text: []const u8,
+) !void {
+    var pos: usize = 0;
+    while (pos < text.len) {
+        // Find "{{"
+        const open = std.mem.indexOfPos(u8, text, pos, "{{") orelse break;
+        const close = std.mem.indexOfPos(u8, text, open + 2, "}}") orelse break;
+        const expr = text[open + 2 .. close];
+        pos = close + 2;
+
+        // Check if it's "state.KEY"
+        if (std.mem.startsWith(u8, expr, "state.")) {
+            const key = expr["state.".len..];
+            if (key.len > 0 and !schema.contains(key)) {
+                // Copy strings to avoid dangling references after parsed.deinit()
+                // (We'll do a bulk copy in copyErrors later, but here we need
+                // to store enough info. We store literals or slices into
+                // node_name/field_name which come from the parsed JSON tree;
+                // copyErrors will deep-copy them.)
+                try errors.append(alloc, .{
+                    .err_type = "undefined_state_key",
+                    .node = node_name,
+                    .key = key,
+                    .message = "state key referenced in template is not defined in state_schema",
+                });
+            }
+        }
+    }
+}
+
+/// Deep-copy all strings in the error list into alloc-owned memory.
+/// This is needed because the source strings point into a parsed JSON tree
+/// that will be freed after validate() returns.
+fn copyErrors(alloc: Allocator, src: []const ValidationError) ![]ValidationError {
+    const result = try alloc.alloc(ValidationError, src.len);
+    for (src, 0..) |e, i| {
+        result[i] = .{
+            .err_type = try alloc.dupe(u8, e.err_type),
+            .node = if (e.node) |n| try alloc.dupe(u8, n) else null,
+            .key = if (e.key) |k| try alloc.dupe(u8, k) else null,
+            .message = try alloc.dupe(u8, e.message),
+        };
+    }
+    return result;
+}
+
+// ── Tests: legacy ─────────────────────────────────────────────────────
 
 test "validateStepsForCreateRun: valid workflow" {
     const allocator = std.testing.allocator;
@@ -249,58 +702,6 @@ test "validateStepsForCreateRun: rejects duplicate depends_on item" {
     try std.testing.expectError(error.DependsOnDuplicate, validateStepsForCreateRun(allocator, parsed.value.array.items));
 }
 
-test "validateStepsForCreateRun: rejects missing sub_workflow workflow field" {
-    const allocator = std.testing.allocator;
-    const payload =
-        \\[
-        \\  {"id":"sw","type":"sub_workflow"}
-        \\]
-    ;
-
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{});
-    defer parsed.deinit();
-    try std.testing.expectError(error.SubWorkflowRequired, validateStepsForCreateRun(allocator, parsed.value.array.items));
-}
-
-test "validateStepsForCreateRun: rejects missing saga body field" {
-    const allocator = std.testing.allocator;
-    const payload =
-        \\[
-        \\  {"id":"sg","type":"saga"}
-        \\]
-    ;
-
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{});
-    defer parsed.deinit();
-    try std.testing.expectError(error.SagaBodyRequired, validateStepsForCreateRun(allocator, parsed.value.array.items));
-}
-
-test "validateStepsForCreateRun: rejects missing debate count field" {
-    const allocator = std.testing.allocator;
-    const payload =
-        \\[
-        \\  {"id":"db","type":"debate","prompt_template":"x"}
-        \\]
-    ;
-
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{});
-    defer parsed.deinit();
-    try std.testing.expectError(error.DebateCountRequired, validateStepsForCreateRun(allocator, parsed.value.array.items));
-}
-
-test "validateStepsForCreateRun: rejects missing group_chat participants field" {
-    const allocator = std.testing.allocator;
-    const payload =
-        \\[
-        \\  {"id":"gc","type":"group_chat","prompt_template":"x"}
-        \\]
-    ;
-
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{});
-    defer parsed.deinit();
-    try std.testing.expectError(error.GroupChatParticipantsRequired, validateStepsForCreateRun(allocator, parsed.value.array.items));
-}
-
 test "validateStepsForCreateRun: rejects non-object retry field" {
     const allocator = std.testing.allocator;
     const payload =
@@ -340,41 +741,122 @@ test "validateStepsForCreateRun: rejects non-positive timeout_ms" {
     try std.testing.expectError(error.TimeoutMsMustBePositiveInteger, validateStepsForCreateRun(allocator, parsed.value.array.items));
 }
 
-test "validateStepsForCreateRun: rejects invalid wait duration string" {
-    const allocator = std.testing.allocator;
-    const payload =
-        \\[
-        \\  {"id":"w","type":"wait","duration_ms":"abc"}
-        \\]
+// ── Tests: new graph validation ────────────────────────────────────────
+
+test "validate valid simple workflow" {
+    const alloc = std.testing.allocator;
+    const wf =
+        \\{"state_schema":{"msg":{"type":"string","reducer":"last_value"}},"nodes":{"a":{"type":"task","prompt":"{{state.msg}}"}},"edges":[["__start__","a"],["a","__end__"]]}
     ;
+    const errors = try validate(alloc, wf);
+    defer {
+        for (errors) |e| {
+            alloc.free(e.err_type);
+            if (e.node) |n| alloc.free(n);
+            if (e.key) |k| alloc.free(k);
+            alloc.free(e.message);
+        }
+        alloc.free(errors);
+    }
+    try std.testing.expectEqual(@as(usize, 0), errors.len);
+}
 
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{});
-    defer parsed.deinit();
-    try std.testing.expectError(error.WaitDurationInvalid, validateStepsForCreateRun(allocator, parsed.value.array.items));
+test "validate unreachable node" {
+    const alloc = std.testing.allocator;
+    const wf =
+        \\{"state_schema":{},"nodes":{"a":{"type":"task","prompt":"x"},"orphan":{"type":"task","prompt":"y"}},"edges":[["__start__","a"],["a","__end__"]]}
+    ;
+    const errors = try validate(alloc, wf);
+    defer {
+        for (errors) |e| {
+            alloc.free(e.err_type);
+            if (e.node) |n| alloc.free(n);
+            if (e.key) |k| alloc.free(k);
+            alloc.free(e.message);
+        }
+        alloc.free(errors);
+    }
+    try std.testing.expect(errors.len > 0);
+    try std.testing.expectEqualStrings("unreachable_node", errors[0].err_type);
 }
 
-test "validateStepsForCreateRun: rejects negative wait duration" {
-    const allocator = std.testing.allocator;
-    const payload =
-        \\[
-        \\  {"id":"w","type":"wait","duration_ms":-1}
-        \\]
+test "validate undefined state key" {
+    const alloc = std.testing.allocator;
+    const wf =
+        \\{"state_schema":{"msg":{"type":"string","reducer":"last_value"}},"nodes":{"a":{"type":"task","prompt":"{{state.typo}}"}},"edges":[["__start__","a"],["a","__end__"]]}
     ;
+    const errors = try validate(alloc, wf);
+    defer {
+        for (errors) |e| {
+            alloc.free(e.err_type);
+            if (e.node) |n| alloc.free(n);
+            if (e.key) |k| alloc.free(k);
+            alloc.free(e.message);
+        }
+        alloc.free(errors);
+    }
+    try std.testing.expect(errors.len > 0);
+    try std.testing.expectEqualStrings("undefined_state_key", errors[0].err_type);
+}
 
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{});
-    defer parsed.deinit();
-    try std.testing.expectError(error.WaitDurationInvalid, validateStepsForCreateRun(allocator, parsed.value.array.items));
+test "validate send target exempt from reachability" {
+    const alloc = std.testing.allocator;
+    const wf =
+        \\{"state_schema":{"items":{"type":"array","reducer":"last_value"},"results":{"type":"array","reducer":"append"}},"nodes":{"s":{"type":"send","items_key":"state.items","target_node":"worker","output_key":"results"},"worker":{"type":"task","prompt":"do work"}},"edges":[["__start__","s"],["s","__end__"]]}
+    ;
+    const errors = try validate(alloc, wf);
+    defer {
+        for (errors) |e| {
+            alloc.free(e.err_type);
+            if (e.node) |n| alloc.free(n);
+            if (e.key) |k| alloc.free(k);
+            alloc.free(e.message);
+        }
+        alloc.free(errors);
+    }
+    try std.testing.expectEqual(@as(usize, 0), errors.len);
 }
 
-test "validateStepsForCreateRun: rejects invalid wait signal type" {
-    const allocator = std.testing.allocator;
-    const payload =
-        \\[
-        \\  {"id":"w","type":"wait","signal":1}
-        \\]
+test "validate invalid route target" {
+    const alloc = std.testing.allocator;
+    const wf =
+        \\{"state_schema":{"x":{"type":"string","reducer":"last_value"}},"nodes":{"r":{"type":"route","input":"state.x","routes":{"a":"nonexistent"}}},"edges":[["__start__","r"],["r:a","nonexistent"]]}
     ;
+    const errors = try validate(alloc, wf);
+    defer {
+        for (errors) |e| {
+            alloc.free(e.err_type);
+            if (e.node) |n| alloc.free(n);
+            if (e.key) |k| alloc.free(k);
+            alloc.free(e.message);
+        }
+        alloc.free(errors);
+    }
+    // Should have error about nonexistent node (either in route target or edge target)
+    try std.testing.expect(errors.len > 0);
+}
 
-    const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{});
-    defer parsed.deinit();
-    try std.testing.expectError(error.WaitSignalInvalid, validateStepsForCreateRun(allocator, parsed.value.array.items));
+test "validate route requires matching conditional edges for declared routes" {
+    const alloc = std.testing.allocator;
+    const wf =
+        \\{"state_schema":{"x":{"type":"string","reducer":"last_value"}},"nodes":{"r":{"type":"route","input":"state.x","routes":{"yes":"approved"},"default":"yes"},"approved":{"type":"task","prompt":"approve"}},"edges":[["__start__","r"],["r:no","approved"],["approved","__end__"]]}
+    ;
+    const errors = try validate(alloc, wf);
+    defer {
+        for (errors) |e| {
+            alloc.free(e.err_type);
+            if (e.node) |n| alloc.free(n);
+            if (e.key) |k| alloc.free(k);
+            alloc.free(e.message);
+        }
+        alloc.free(errors);
+    }
+    var found_missing_route_edge = false;
+    for (errors) |err| {
+        if (std.mem.eql(u8, err.err_type, "missing_route_edge")) {
+            found_missing_route_edge = true;
+            break;
+        }
+    }
+    try std.testing.expect(found_missing_route_edge);
 }
diff --git a/src/workspace.zig b/src/workspace.zig
index c8b0d39..b7ecd9b 100644
--- a/src/workspace.zig
+++ b/src/workspace.zig
@@ -14,6 +14,52 @@ pub fn sanitizeId(allocator: std.mem.Allocator, id: []const u8) ![]const u8 {
     return buf;
 }
 
+/// Validate that a workspace path is safely contained within the workspace root.
+/// Returns true if the canonical workspace_path starts with the canonical root
+/// and contains no invalid characters. Returns false if a symlink escape or
+/// directory traversal is detected.
+pub fn validateWorkspacePath(allocator: std.mem.Allocator, workspace_root: []const u8, workspace_path: []const u8) bool {
+    // Check for invalid characters (\n, \r, \0) in the raw path
+    for (workspace_path) |ch| {
+        if (ch == '\n' or ch == '\r' or ch == 0) {
+            log.warn("workspace path contains invalid character: {s}", .{workspace_path});
+            return false;
+        }
+    }
+
+    // Canonicalize both paths (resolves symlinks)
+    const canon_root = std.fs.cwd().realpathAlloc(allocator, workspace_root) catch {
+        log.warn("workspace: cannot resolve root {s}", .{workspace_root});
+        return false;
+    };
+    defer allocator.free(canon_root);
+
+    const canon_path = std.fs.cwd().realpathAlloc(allocator, workspace_path) catch {
+        log.warn("workspace: cannot resolve path {s}", .{workspace_path});
+        return false;
+    };
+    defer allocator.free(canon_path);
+
+    // Check that canonical workspace_path starts with canonical workspace_root
+    if (!std.mem.startsWith(u8, canon_path, canon_root)) {
+        log.warn("workspace path escape detected: {s} is not under {s}", .{ canon_path, canon_root });
+        return false;
+    }
+
+    // Ensure there's a separator after the root (not just a prefix match on a longer name)
+    if (canon_path.len > canon_root.len and canon_path[canon_root.len] != std.fs.path.sep) {
+        log.warn("workspace path escape detected: {s} is not under {s}", .{ canon_path, canon_root });
+        return false;
+    }
+
+    return true;
+}
+
+/// Sanitize a directory name by replacing any character not in [A-Za-z0-9._-]
+/// with '_'. This prevents directory traversal via task identifiers.
+/// Alias for sanitizeId — same logic, exported under the canonical name.
+pub const sanitizeDirectoryName = sanitizeId;
+
 /// An isolated workspace directory for a single task.
 pub const Workspace = struct {
     root: []const u8,
@@ -45,6 +91,13 @@ pub const Workspace = struct {
             return err;
         };
 
+        // Validate the created path is safely under the workspace root
+        if (!validateWorkspacePath(allocator, root, path)) {
+            log.warn("workspace: path validation failed for {s}, refusing to use", .{path});
+            allocator.free(path);
+            return error.PathValidationFailed;
+        }
+
         // If the directory already had contents it was not freshly created
         var dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
         defer dir.close();
@@ -114,7 +167,11 @@ pub fn cleanAll(root: []const u8) void {
 /// Returns true when the command exits with code 0, false otherwise.
 /// Times out after `timeout_ms` milliseconds (the child is killed on timeout).
 pub fn runHook(allocator: std.mem.Allocator, command: []const u8, cwd: []const u8, timeout_ms: u64) !bool {
-    const argv = [_][]const u8{ "/bin/sh", "-lc", command };
+    const native = @import("builtin").os.tag;
+    const argv = if (native == .windows)
+        [_][]const u8{ "cmd.exe", "/C", command }
+    else
+        [_][]const u8{ "/bin/sh", "-lc", command };
 
     var child = std.process.Child.init(&argv, allocator);
     child.cwd = cwd;
@@ -215,6 +272,9 @@ test "Workspace create and remove" {
 }
 
 test "runHook executes shell command" {
+    const native = @import("builtin").os.tag;
+    if (native == .windows) return error.SkipZigTest;
+
     const allocator = std.testing.allocator;
     var tmp = std.testing.tmpDir(.{});
     defer tmp.cleanup();
@@ -233,6 +293,9 @@ test "runHook executes shell command" {
 }
 
 test "runHook returns false for failing command" {
+    const native = @import("builtin").os.tag;
+    if (native == .windows) return error.SkipZigTest;
+
     const allocator = std.testing.allocator;
     var tmp = std.testing.tmpDir(.{});
     defer tmp.cleanup();
@@ -262,3 +325,38 @@ test "cleanAll removes all subdirectories" {
     try std.testing.expectError(error.FileNotFound, tmp.dir.openDir("task-001", .{}));
     try std.testing.expectError(error.FileNotFound, tmp.dir.openDir("task-002", .{}));
 }
+
+test "validateWorkspacePath accepts safe path" {
+    const allocator = std.testing.allocator;
+    var tmp = std.testing.tmpDir(.{});
+    defer tmp.cleanup();
+
+    const root = try tmp.dir.realpathAlloc(allocator, ".");
+    defer allocator.free(root);
+
+    // Create a subdirectory
+    try tmp.dir.makeDir("safe-task");
+    const sub_path = try std.fs.path.join(allocator, &.{ root, "safe-task" });
+    defer allocator.free(sub_path);
+
+    try std.testing.expect(validateWorkspacePath(allocator, root, sub_path));
+}
+
+test "validateWorkspacePath rejects path outside root" {
+    const allocator = std.testing.allocator;
+    var tmp = std.testing.tmpDir(.{});
+    defer tmp.cleanup();
+
+    const root = try tmp.dir.realpathAlloc(allocator, ".");
+    defer allocator.free(root);
+
+    // /tmp is definitely not under the test temp dir
+    try std.testing.expect(!validateWorkspacePath(allocator, root, "/tmp"));
+}
+
+test "sanitizeDirectoryName replaces invalid chars" {
+    const allocator = std.testing.allocator;
+    const result = try sanitizeDirectoryName(allocator, "../../etc/passwd");
+    defer allocator.free(result);
+    try std.testing.expectEqualStrings(".._.._etc_passwd", result);
+}