diff --git a/CLAUDE.md b/CLAUDE.md index a2f719c..8989137 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,76 +1,104 @@ # NullBoiler -DAG-based workflow orchestrator for NullClaw AI bot agents. Part of the Null ecosystem (NullTracker, NullClaw). +Graph-based workflow orchestrator with unified state model for NullClaw AI bot agents. Part of the Null ecosystem (NullTracker, NullClaw). ## Tech Stack - **Language**: Zig 0.15.2 - **Database**: SQLite (vendored in `deps/sqlite/`), WAL mode - **Protocol**: HTTP/1.1 REST API with JSON payloads -- **Dispatch**: HTTP (webhook/api_chat/openai_chat), MQTT, Redis Streams +- **Dispatch**: HTTP (webhook/api_chat/openai_chat/a2a), MQTT, Redis Streams - **Vendored C libs**: SQLite (`deps/sqlite/`), hiredis (`deps/hiredis/`), libmosquitto (`deps/mosquitto/`) ## Module Map | File | Role | |------|------| -| `main.zig` | CLI args (`--port`, `--db`, `--config`, `--version`), HTTP accept loop, engine thread, tracker thread | -| `api.zig` | REST API routing and 19 endpoint handlers (incl. signal, chat, tracker status) | -| `store.zig` | SQLite layer, 30+ CRUD methods, schema migrations | -| `engine.zig` | DAG scheduler: tick loop, 14 step type handlers, graph cycles, worker handoff | -| `dispatch.zig` | Worker selection (tags, capacity), protocol-aware dispatch (`webhook`, `api_chat`, `openai_chat`, `mqtt`, `redis_stream`) | +| `main.zig` | CLI args (`--port`, `--db`, `--config`, `--version`, `--export-manifest`, `--from-json`), HTTP accept loop, engine thread, tracker thread | +| `api.zig` | REST API routing and 30+ endpoint handlers (runs, workers, workflows, checkpoints, state, SSE stream, tracker) | +| `store.zig` | SQLite layer, CRUD methods for all tables, schema migrations (4 migration files) | +| `engine.zig` | Graph-based state scheduler: tick loop, 7 node type handlers, checkpoints, reducers, goto, breakpoints, deferred nodes, reconciliation | +| `state.zig` | Unified state model: 7 reducer types (last_value, append, merge, add, min, max, add_messages), overwrite bypass, ephemeral keys, state path resolution | +| `sse.zig` | Server-Sent Events hub: per-run event queues, 5 stream modes (values, updates, tasks, debug, custom) | +| `dispatch.zig` | Worker selection (tags, capacity, A2A preference), protocol-aware dispatch | | `async_dispatch.zig` | Thread-safe response queue for async MQTT/Redis dispatch (keyed by correlation_id) | | `redis_client.zig` | Hiredis wrapper: connect, XADD, listener thread for response streams | | `mqtt_client.zig` | Libmosquitto wrapper: connect, publish, subscribe, listener thread for response topics | -| `templates.zig` | Prompt template rendering: `{{input.X}}`, `{{steps.ID.output}}`, `{{item}}`, `{{task.X}}`, `{{debate_responses}}`, `{{chat_history}}`, `{{role}}` | +| `templates.zig` | Prompt template rendering: state-based `{{state.X}}`, legacy `{{input.X}}`, `{{item}}`, `{{task.X}}`, `{{attempt}}`, conditional blocks | | `callbacks.zig` | Fire-and-forget webhook callbacks on step/run events | | `config.zig` | JSON config loader (`Config`, `WorkerConfig`, `EngineConfig`, `TrackerConfig`) | -| `types.zig` | `RunStatus`, `StepStatus`, `StepType` (14 types), `WorkerStatus`, `TrackerTaskState`, row types | +| `types.zig` | `RunStatus`, `StepStatus`, `StepType` (7 types), `WorkerStatus`, `ReducerType`, row types | | `tracker.zig` | Pull-mode tracker thread: poll NullTickets, claim tasks, heartbeat leases, stall detection | | `tracker_client.zig` | HTTP client for NullTickets API (claim, heartbeat, transition, fail, artifacts) | | `workspace.zig` | Workspace lifecycle: create, hook execution, cleanup, path sanitization | | `subprocess.zig` | NullClaw subprocess: spawn, health check, prompt sending, kill | -| `workflow_loader.zig` | Load JSON workflow definitions from `workflows/` directory | +| `workflow_loader.zig` | Load JSON workflow definitions from `workflows/` directory, hot-reload watcher | +| `workflow_validation.zig` | Graph-based workflow validation: reachability, cycles, state key refs, route/send targets | | `ids.zig` | UUID v4 generation, `nowMs()` | -| `migrations/001_init.sql` | 6 tables: workers, runs, steps, step_deps, events, artifacts | -| `migrations/002_advanced_steps.sql` | 3 tables: cycle_state, chat_messages, saga_state + ALTER TABLE | +| `metrics.zig` | Prometheus-style metrics counters | +| `strategy.zig` | Pluggable strategy map for workflow execution | +| `worker_protocol.zig` | Protocol-specific request body builders | +| `worker_response.zig` | Protocol-specific response parsers | +| `export_manifest.zig` | Export tool manifest for CLI integration | +| `from_json.zig` | Import workflow from JSON CLI command | ## Build / Test / Run ```sh zig build # build -zig build test # unit tests +zig build test # unit tests (320 tests) zig build && bash tests/test_e2e.sh # e2e tests (requires Python 3 for mock workers) ./zig-out/bin/nullboiler --port 8080 --db nullboiler.db --config config.json ``` +## Step Types (7) + +`task`, `route`, `interrupt`, `agent`, `send`, `transform`, `subgraph` + +## Reducers (7) + +`last_value`, `append`, `merge`, `add`, `min`, `max`, `add_messages` + ## API Endpoints | Method | Path | Description | |--------|------|-------------| | GET | `/health` | Health check | +| GET | `/metrics` | Prometheus metrics | | POST | `/workers` | Register worker | | GET | `/workers` | List workers | | DELETE | `/workers/{id}` | Remove worker | -| POST | `/runs` | Create workflow run | -| GET | `/runs` | List runs | +| POST | `/runs` | Create workflow run (legacy step-array or graph format) | +| GET | `/runs` | List runs (supports ?status= filter) | | GET | `/runs/{id}` | Get run details | | POST | `/runs/{id}/cancel` | Cancel run | | POST | `/runs/{id}/retry` | Retry failed run | +| POST | `/runs/{id}/resume` | Resume interrupted run (with optional state updates) | +| POST | `/runs/{id}/state` | Inject state into running run (pending injection) | +| POST | `/runs/{id}/replay` | Replay run from a checkpoint | +| POST | `/runs/fork` | Fork run from a checkpoint into a new run | | GET | `/runs/{id}/steps` | List steps for run | | GET | `/runs/{id}/steps/{step_id}` | Get step details | -| POST | `/runs/{id}/steps/{step_id}/approve` | Approve approval step | -| POST | `/runs/{id}/steps/{step_id}/reject` | Reject approval step | | GET | `/runs/{id}/events` | List run events | -| POST | `/runs/{id}/steps/{step_id}/signal` | Signal a waiting step | -| GET | `/runs/{id}/steps/{step_id}/chat` | Get group_chat transcript | -| GET | `/tracker/status` | Pull-mode tracker status (running tasks, concurrency, counters) | +| GET | `/runs/{id}/checkpoints` | List checkpoints for run | +| GET | `/runs/{id}/checkpoints/{cpId}` | Get checkpoint details | +| GET | `/runs/{id}/stream` | SSE stream (supports ?mode=values\|updates\|tasks\|debug) | +| POST | `/workflows` | Create workflow definition | +| GET | `/workflows` | List workflow definitions | +| GET | `/workflows/{id}` | Get workflow definition | +| PUT | `/workflows/{id}` | Update workflow definition | +| DELETE | `/workflows/{id}` | Delete workflow definition | +| POST | `/workflows/{id}/validate` | Validate workflow definition | +| GET | `/workflows/{id}/mermaid` | Export workflow as Mermaid diagram | +| POST | `/workflows/{id}/run` | Start a run from a stored workflow | +| GET | `/rate-limits` | Get current rate limit info per worker | +| POST | `/admin/drain` | Enable drain mode | +| GET | `/tracker/status` | Pull-mode tracker status | | GET | `/tracker/tasks` | List running pull-mode tasks | | GET | `/tracker/tasks/{task_id}` | Get single pull-mode task details | - -## Step Types - -`task`, `fan_out`, `map`, `condition`, `approval`, `reduce`, `loop`, `sub_workflow`, `wait`, `router`, `transform`, `saga`, `debate`, `group_chat` +| GET | `/tracker/stats` | Tracker statistics | +| POST | `/tracker/refresh` | Force tracker poll | +| POST | `/internal/agent-events/{run_id}/{step_id}` | Agent event callback (from NullClaw) | ## Coding Conventions @@ -83,16 +111,47 @@ zig build && bash tests/test_e2e.sh # e2e tests (requires Python 3 for mock wo ## Architecture -- Single-threaded HTTP accept loop on main thread -- Background engine thread polls DB for active runs (+ polls async response queue for MQTT/Redis steps) -- `std.atomic.Value(bool)` for coordinated shutdown -- Config workers seeded into DB on startup (source = "config") -- Schema in `migrations/001_init.sql` + `002_advanced_steps.sql`, applied on `Store.init` -- Graph cycles: condition/router can route back to completed steps, engine creates new step instances per iteration -- Worker handoff: dispatch result can include `handoff_to` for chained delegation (max 5) -- Async dispatch: MQTT/Redis workers use two-phase dispatch (publish → engine polls response queue) -- Background listener threads (MQTT/Redis) started conditionally when async workers are configured -- Pull-mode tracker thread (conditional): polls NullTickets for tasks, claims work, manages subprocess lifecycles +- **Unified state model**: Every node reads from state, returns partial updates, engine applies reducers +- **Graph-based execution**: Workflow = `{nodes: {}, edges: [], state_schema: {}}` with `__start__` and `__end__` synthetic nodes +- **Checkpoints**: State snapshot after every node, enabling fork/replay/resume +- **Conditional edges**: Route nodes produce values, edges like `["router:yes", "next"]` are taken when route result matches +- **Deferred nodes**: Nodes with `"defer": true` execute right before `__end__` +- **Command primitive**: Workers can return `{"goto": "node_name"}` to override normal graph traversal +- **Breakpoints**: `interrupt_before` / `interrupt_after` arrays pause execution +- **Subgraph**: Inline child workflow execution with input/output mapping (max recursion depth 10) +- **Multi-turn agents**: Agent nodes can loop with `continuation_prompt` up to `max_turns` +- **Configurable runs**: Per-run config stored as `state.__config` +- **Node-level cache**: FNV hash of (node_name, rendered_prompt) with configurable TTL +- **Token accounting**: Cumulative input/output token tracking per step and per run +- **Workflow hot-reload**: `WorkflowWatcher` polls `workflows/` directory for JSON changes, upserts into DB +- **Reconciliation**: Check NullTickets task status between steps, cancel if task is terminal + +### Thread Model + +``` +Main thread: HTTP accept loop (push API) +Engine thread: Graph tick loop (state-based scheduler) +Tracker thread: Poll NullTickets -> claim -> workspace -> subprocess/dispatch +MQTT listener: (conditional, for async MQTT workers) +Redis listener: (conditional, for async Redis workers) +``` + +### SSE Streaming + +5 modes for real-time consumption via `GET /runs/{id}/stream?mode=X`: +- `values` -- full state after each step +- `updates` -- node name + partial state updates +- `tasks` -- task start/finish with metadata +- `debug` -- everything with step number + timestamp +- `custom` -- user-defined events from worker output (`ui_messages`, `stream_messages`) + +## Database + +SQLite with WAL mode. Schema across 4 migrations: +- `001_init.sql`: workers, runs, steps, step_deps, events, artifacts +- `002_advanced_steps.sql`: cycle_state, chat_messages, saga_state (legacy, unused by current engine) +- `003_tracker.sql`: tracker_runs +- `004_orchestration.sql`: workflows, checkpoints, agent_events, pending_state_injections, node_cache, pending_writes + ALTER TABLE extensions for state_json, config_json, parent_run_id, token accounting ## Pull-Mode (NullTickets Integration) @@ -131,27 +190,3 @@ Optional pull-mode where NullBoiler acts as an agent polling NullTickets for wor ``` If `tracker` is absent or null, the tracker thread does not start and push-mode operates unchanged. - -### Workflow Definitions - -JSON files in `workflows/` directory. Two execution modes: -- `subprocess` — spawn NullClaw child process per task (isolated workspace) -- `dispatch` — use existing registered workers (no workspace) - -Three-axis concurrency: global (`max_concurrent_tasks`) + per-pipeline + per-role limits. - -### Thread Model - -``` -Main thread: HTTP accept loop (push API — unchanged) -Engine thread: DAG tick loop (unchanged) -Tracker thread: Poll NullTickets → claim → workspace → subprocess/dispatch -MQTT listener: (unchanged, conditional) -Redis listener: (unchanged, conditional) -``` - -## Database - -SQLite with WAL mode. Schema: 9 tables across 2 migrations. -- `001_init.sql`: workers, runs, steps, step_deps, events, artifacts -- `002_advanced_steps.sql`: cycle_state, chat_messages, saga_state + iteration_index/child_run_id columns on steps diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..bc38dea --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 nullclaw contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 10d466e..7fda7e1 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,22 @@ This keeps the architecture modular, simpler to reason about, and easier to evol See additional integration docs in [`docs/`](./docs). +## Workflow Graph Features + +The orchestration graph runtime supports: + +- `task`, `agent`, `route`, `interrupt`, `send`, `transform`, and `subgraph` nodes +- run replay, checkpoint forking, breakpoint interrupts, and post-start state injection +- `send` fan-out with canonical `items_key` and configurable `output_key` +- task/agent output shaping via `output_key` and `output_mapping` +- template access to `state.*`, `input.*`, `item.*`, `config.*`, and `store..` +- `transform.store_updates` for writing durable workflow memory back to NullTickets + +Store-backed templates and `store_updates` require a NullTickets base URL. The +runtime resolves it from workflow fields such as `tracker_url` or from run config +(`config.tracker_url` / `config.tracker_api_token`), which are injected into +state as `__config`. + ## Config Location - Default config path: `~/.nullboiler/config.json` diff --git a/config.example.json b/config.example.json index d069183..fca25c5 100644 --- a/config.example.json +++ b/config.example.json @@ -3,6 +3,7 @@ "port": 8080, "db": "nullboiler.db", "api_token": null, + "self_url": null, "workers": [ { "id": "nullclaw-1", @@ -28,6 +29,14 @@ "model": "anthropic/claude-sonnet-4-6", "tags": ["writer", "editor"], "max_concurrent": 2 + }, + { + "id": "nullclaw-a2a", + "url": "http://localhost:3000", + "token": "set_same_value_as_nullclaw_gateway_paired_tokens", + "protocol": "a2a", + "tags": ["coder", "agent"], + "max_concurrent": 3 } ], "engine": { diff --git a/docs/superpowers/specs/2026-03-13-orchestration-gaps-design.md b/docs/superpowers/specs/2026-03-13-orchestration-gaps-design.md new file mode 100644 index 0000000..414ca81 --- /dev/null +++ b/docs/superpowers/specs/2026-03-13-orchestration-gaps-design.md @@ -0,0 +1,333 @@ +# Orchestration Gaps Design — Phase 2 + +**Date:** 2026-03-13 +**Status:** Draft +**Scope:** NullBoiler, NullTickets, NullHub +**Branch:** feat/orchestration (extends Phase 1) + +--- + +## Overview + +Phase 2 closes remaining gaps vs LangGraph and Symphony. No backward compatibility needed. + +--- + +## 1. Command Primitive + +Nodes can return `goto` alongside `state_updates` to control routing: + +```json +{ + "state_updates": {"review_grade": "approve"}, + "goto": "merge_step" +} +``` + +Engine behavior: if response contains `goto`, skip normal edge evaluation and jump directly to the named node. The node must exist in the workflow. `goto` can be a string (single node) or array (fan-out to multiple nodes). + +Worker response JSON: +```json +{"response": "Approved", "goto": "merge_step"} +``` + +Engine parses `goto` from worker response alongside the text response. For `task` and `agent` nodes only. `route`, `transform`, `interrupt` nodes don't use `goto`. + +--- + +## 2. Subgraphs + +New node type `subgraph`: + +```json +{ + "review_flow": { + "type": "subgraph", + "workflow_id": "code-review-workflow", + "input_mapping": { + "code": "state.fix_result", + "description": "state.task_description" + }, + "output_key": "review_result" + } +} +``` + +Engine behavior: +1. Load workflow definition from `workflows` table by `workflow_id` +2. Build subgraph input from parent state via `input_mapping` (key = subgraph input key, value = parent state path) +3. Create a child run with `createRunWithState()`, linking to parent via a new `parent_run_id` column +4. Execute child run to completion (inline, not spawning a separate engine tick loop — just call `processRun` recursively) +5. On completion, extract child's final state and write to parent's `output_key` +6. On failure, propagate failure to parent run + +### Schema changes + +```sql +ALTER TABLE runs ADD COLUMN parent_run_id TEXT REFERENCES runs(id); +``` + +### StepType update + +Add `subgraph` to StepType enum in types.zig. + +--- + +## 3. Breakpoints on Any Node + +Workflow-level config: + +```json +{ + "interrupt_before": ["review", "merge"], + "interrupt_after": ["generate"], + ... +} +``` + +Engine behavior: before executing a node, check if it's in `interrupt_before`. If so, save checkpoint and set run to `interrupted`. After executing a node, check `interrupt_after`. Same behavior. + +Resume works exactly like interrupt node resume — `POST /runs/{id}/resume` with optional `state_updates`. + +This is purely engine logic — no schema changes, no new API endpoints. + +--- + +## 4. Store API in NullTickets + +New table: + +```sql +CREATE TABLE store ( + namespace TEXT NOT NULL, + key TEXT NOT NULL, + value_json TEXT NOT NULL, + created_at_ms INTEGER NOT NULL, + updated_at_ms INTEGER NOT NULL, + PRIMARY KEY (namespace, key) +); +CREATE INDEX idx_store_namespace ON store(namespace); +``` + +### API endpoints + +``` +PUT /store/{namespace}/{key} — put (upsert) +GET /store/{namespace}/{key} — get single +GET /store/{namespace} — list all in namespace +DELETE /store/{namespace}/{key} — delete +DELETE /store/{namespace} — delete namespace +``` + +Request body for PUT: +```json +{"value": {"any": "json"}} +``` + +Response for GET: +```json +{ + "namespace": "user_123", + "key": "preferences", + "value": {"theme": "dark"}, + "created_at_ms": 1710300000000, + "updated_at_ms": 1710300005000 +} +``` + +### Usage from NullBoiler workflows + +New template syntax: `{{store.namespace.key}}` — engine fetches from nulltickets Store API during prompt rendering. + +Runtime resolution: + +- NullTickets base URL comes from workflow-level `tracker_url` / `nulltickets_url`, or from run config (`config.tracker_url`, surfaced as `state.__config.tracker_url`). +- Optional auth token comes from `tracker_api_token` / `nulltickets_api_token` on the workflow or run config. +- Missing store keys render as empty strings in templates. + +New node type isn't needed — `task` nodes can read via template, and `transform` nodes can write via a `store_updates` field (single object or array of objects): + +```json +{ + "save_context": { + "type": "transform", + "updates": {}, + "store_updates": { + "namespace": "project_context", + "key": "latest_review", + "value": "state.review_result" + } + } +} +``` + +`store_updates.value` can point at a state path such as `state.review_result`, or it can be inline JSON that will be written as-is. + +Engine calls nulltickets `PUT /store/{namespace}/{key}` after `updates` are applied, so writes can reference the node's freshly updated state. + +--- + +## 5. Multi-Turn Continuation + +Extend `agent` node with multi-turn support: + +```json +{ + "fix_bug": { + "type": "agent", + "prompt": "Fix this: {{state.task_description}}", + "continuation_prompt": "Task is still active. Continue from current state.", + "max_turns": 10, + "tags": ["coder"], + "output_key": "fix_result" + } +} +``` + +Engine behavior: +1. Turn 1: A2A `tasks/send` with rendered `prompt`, `contextId = "run_{id}_step_{name}"` +2. Parse response — check if agent indicated completion (response contains final answer, no pending tool calls) +3. If not complete and turn < `max_turns`: send `continuation_prompt` via A2A with same `contextId` (session persistence) +4. Repeat until complete or `max_turns` exhausted +5. Final response text → state_updates via `output_key` + +Between turns, engine can: +- Check if nulltickets task state changed (reconciliation) +- Apply pending state injections +- Broadcast SSE `agent_turn` events + +No schema changes needed — this is engine logic using existing A2A infrastructure. + +--- + +## 6. Configurable Runs + +Workflow JSON gets optional `defaults` section: + +```json +{ + "defaults": { + "model": "claude-sonnet-4-6", + "temperature": 0.7, + "max_agent_turns": 10 + }, + ... +} +``` + +Run creation accepts `config` overrides: + +``` +POST /workflows/{id}/run +{ + "input": {"task": "fix bug"}, + "config": {"model": "claude-opus-4-6", "temperature": 0.3} +} +``` + +Merged config (run overrides > workflow defaults) stored in `run.config_json`. + +Template access: `{{config.model}}`, `{{config.temperature}}`. + +### Schema changes + +```sql +ALTER TABLE runs ADD COLUMN config_json TEXT; +``` + +--- + +## 7. Per-State Concurrency in NullTickets + +Extend nulltickets claim endpoint to support per-state limits. + +Claim request gets optional `concurrency` parameter: + +``` +POST /leases/claim +{ + "agent_id": "boiler-01", + "agent_role": "coder", + "concurrency": { + "per_state": {"in_progress": 5, "rework": 2} + } +} +``` + +Claim logic: before returning a task, count currently-leased tasks in the same state. If at limit, skip to next eligible task. + +This is a nulltickets store.zig change in the claim query. + +--- + +## 8. Reconciliation + +Engine tick adds a reconciliation step for runs linked to nulltickets tasks: + +After each step completes, if `run.task_id` is set (pull-mode run): +1. Fetch current task state from nulltickets: `GET /tasks/{task_id}` +2. If task state changed to a terminal state → cancel the run +3. If task state changed to a different active state → update run metadata, continue + +This prevents wasted agent execution on tasks that humans already resolved. + +Engine logic only — no schema changes. + +--- + +## 9. Workspace Reuse Per Issue + +In NullBoiler's tracker/workspace system, workspaces should be reused for the same nulltickets task: + +- Workspace directory name based on `task_id` (not `run_id`) +- On new run for same task: reuse existing workspace (skip `after_create` hook, still run `before_run`) +- On task completion: run `after_run` hook, keep workspace +- On task terminal state + configurable cleanup: run `before_remove`, delete workspace + +This is a tracker.zig + workspace.zig change. + +--- + +## 10. Message-Native State (add_messages reducer) + +New reducer type `add_messages`: + +```json +{ + "state_schema": { + "messages": {"type": "array", "reducer": "add_messages"} + } +} +``` + +Behavior: +- Each message has an `id` field +- On update: if message with same `id` exists, replace it. Otherwise append. +- Special: if update contains `{"remove": true, "id": "msg_123"}`, remove that message. +- If message has no `id`, auto-generate one and append. + +This enables chat-history-aware workflows where messages can be updated or removed by ID. + +Implementation: new case in `state.zig` `applyReducer()`. + +### ReducerType update + +Add `add_messages` to ReducerType enum in types.zig. + +--- + +## Summary of Changes + +| Repo | Changes | +|------|---------| +| NullBoiler types.zig | Add `subgraph` to StepType, `add_messages` to ReducerType | +| NullBoiler engine.zig | Command goto, subgraph execution, breakpoints, multi-turn, reconciliation, store_updates | +| NullBoiler state.zig | add_messages reducer | +| NullBoiler store.zig | `parent_run_id` + `config_json` columns | +| NullBoiler api.zig | config in run creation, template store access | +| NullBoiler templates.zig | `{{store.X.Y}}`, `{{config.X}}` access | +| NullBoiler tracker.zig | Workspace reuse, reconciliation | +| nulltickets store.zig | Store KV CRUD, per-state concurrency in claim | +| nulltickets api.zig | Store endpoints, claim concurrency param | +| nulltickets migrations | Store table | +| nullhub UI | Store viewer page (optional) | diff --git a/src/api.zig b/src/api.zig index 5f1dd59..0ce34a9 100644 --- a/src/api.zig +++ b/src/api.zig @@ -8,6 +8,9 @@ const metrics_mod = @import("metrics.zig"); const strategy_mod = @import("strategy.zig"); const tracker_mod = @import("tracker.zig"); const config_mod = @import("config.zig"); +const sse_mod = @import("sse.zig"); +const state_mod = @import("state.zig"); +const engine_mod = @import("engine.zig"); // ── Types ──────────────────────────────────────────────────────────── @@ -24,6 +27,8 @@ pub const Context = struct { strategies: ?*const strategy_mod.StrategyMap = null, tracker_state: ?*tracker_mod.TrackerState = null, tracker_cfg: ?*const config_mod.TrackerConfig = null, + sse_hub: ?*sse_mod.SseHub = null, + rate_limits: ?*std.StringHashMap(engine_mod.RateLimitInfo) = null, }; pub const HttpResponse = struct { @@ -41,16 +46,16 @@ pub fn handleRequest(ctx: *Context, method: []const u8, target: []const u8, body } const path = parsePath(target); - const seg0 = getPathSegment(path, 0); - const seg1 = getPathSegment(path, 1); - const seg2 = getPathSegment(path, 2); - const seg3 = getPathSegment(path, 3); - const seg4 = getPathSegment(path, 4); - const seg5 = getPathSegment(path, 5); + const seg0 = decodePathSegment(ctx.allocator, getPathSegment(path, 0)); + const seg1 = decodePathSegment(ctx.allocator, getPathSegment(path, 1)); + const seg2 = decodePathSegment(ctx.allocator, getPathSegment(path, 2)); + const seg3 = decodePathSegment(ctx.allocator, getPathSegment(path, 3)); + const seg4 = decodePathSegment(ctx.allocator, getPathSegment(path, 4)); const is_get = eql(method, "GET"); const is_post = eql(method, "POST"); const is_delete = eql(method, "DELETE"); + const is_put = eql(method, "PUT"); if (!isAuthorized(ctx, seg0, seg1)) { return jsonResponse(401, "{\"error\":{\"code\":\"unauthorized\",\"message\":\"missing or invalid bearer token\"}}"); @@ -101,26 +106,6 @@ pub fn handleRequest(ctx: *Context, method: []const u8, target: []const u8, body return handleGetStep(ctx, seg1.?, seg3.?); } - // POST /runs/{id}/steps/{step_id}/approve - if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "approve") and seg5 == null) { - return handleApproveStep(ctx, seg1.?, seg3.?); - } - - // POST /runs/{id}/steps/{step_id}/reject - if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "reject") and seg5 == null) { - return handleRejectStep(ctx, seg1.?, seg3.?); - } - - // POST /runs/{id}/steps/{step_id}/signal - if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "signal") and seg5 == null) { - return handleSignalStep(ctx, seg1.?, seg3.?, body); - } - - // GET /runs/{id}/steps/{step_id}/chat - if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "steps") and seg3 != null and eql(seg4, "chat") and seg5 == null) { - return handleGetChatTranscript(ctx, seg1.?, seg3.?); - } - // GET /runs/{id}/events if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "events") and seg3 == null) { return handleListEvents(ctx, seg1.?); @@ -171,6 +156,103 @@ pub fn handleRequest(ctx: *Context, method: []const u8, target: []const u8, body return handleTrackerRefresh(ctx); } + // GET /rate-limits + if (is_get and eql(seg0, "rate-limits") and seg1 == null) { + return handleGetRateLimits(ctx); + } + + // ── Workflow CRUD ─────────────────────────────────────────────── + + // POST /workflows + if (is_post and eql(seg0, "workflows") and seg1 == null) { + return handleCreateWorkflow(ctx, body); + } + + // GET /workflows + if (is_get and eql(seg0, "workflows") and seg1 == null) { + return handleListWorkflows(ctx); + } + + // GET /workflows/{id} + if (is_get and eql(seg0, "workflows") and seg1 != null and seg2 == null) { + return handleGetWorkflow(ctx, seg1.?); + } + + // PUT /workflows/{id} + if (is_put and eql(seg0, "workflows") and seg1 != null and seg2 == null) { + return handleUpdateWorkflow(ctx, seg1.?, body); + } + + // DELETE /workflows/{id} + if (is_delete and eql(seg0, "workflows") and seg1 != null and seg2 == null) { + return handleDeleteWorkflow(ctx, seg1.?); + } + + // POST /workflows/{id}/validate + if (is_post and eql(seg0, "workflows") and seg1 != null and eql(seg2, "validate") and seg3 == null) { + return handleValidateWorkflow(ctx, seg1.?); + } + + // GET /workflows/{id}/mermaid + if (is_get and eql(seg0, "workflows") and seg1 != null and eql(seg2, "mermaid") and seg3 == null) { + return handleGetMermaid(ctx, seg1.?); + } + + // POST /workflows/{id}/run + if (is_post and eql(seg0, "workflows") and seg1 != null and eql(seg2, "run") and seg3 == null) { + return handleRunWorkflow(ctx, seg1.?, body); + } + + // ── Checkpoint endpoints ──────────────────────────────────────── + + // GET /runs/{id}/checkpoints + if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "checkpoints") and seg3 == null) { + return handleListCheckpoints(ctx, seg1.?); + } + + // GET /runs/{id}/checkpoints/{cpId} + if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "checkpoints") and seg3 != null and seg4 == null) { + return handleGetCheckpoint(ctx, seg1.?, seg3.?); + } + + // ── State control endpoints ───────────────────────────────────── + + // POST /runs/{id}/resume + if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "resume") and seg3 == null) { + return handleResumeRun(ctx, seg1.?, body); + } + + // POST /runs/fork + if (is_post and eql(seg0, "runs") and eql(seg1, "fork") and seg2 == null) { + return handleForkRun(ctx, body); + } + + // POST /runs/{id}/state + if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "state") and seg3 == null) { + return handleInjectState(ctx, seg1.?, body); + } + + // ── SSE stream endpoint ───────────────────────────────────────── + + // GET /runs/{id}/stream + if (is_get and eql(seg0, "runs") and seg1 != null and eql(seg2, "stream") and seg3 == null) { + return handleStream(ctx, seg1.?, target); + } + + // ── Replay endpoint ──────────────────────────────────────────── + + // POST /runs/{id}/replay + if (is_post and eql(seg0, "runs") and seg1 != null and eql(seg2, "replay") and seg3 == null) { + return handleReplayRun(ctx, seg1.?, body); + } + + // ── Agent events callback ─────────────────────────────────────── + + // POST /internal/agent-events/{run_id}/{step_id} + if (is_post and eql(seg0, "internal") and eql(seg1, "agent-events") and seg2 != null and seg3 != null and seg4 == null) { + return handleAgentEventCallback(ctx, seg2.?, seg3.?, body); + } + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"endpoint not found\"}}"); } @@ -212,6 +294,36 @@ fn handleEnableDrain(ctx: *Context) HttpResponse { return jsonResponse(200, "{\"status\":\"draining\"}"); } +// ── Rate Limit Handler ────────────────────────────────────────────── + +fn handleGetRateLimits(ctx: *Context) HttpResponse { + const rl_map = ctx.rate_limits orelse { + return jsonResponse(200, "[]"); + }; + + var buf: std.ArrayListUnmanaged(u8) = .empty; + buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + var it = rl_map.iterator(); + var first = true; + while (it.next()) |entry| { + if (!first) { + buf.append(ctx.allocator, ',') catch continue; + } + first = false; + + const rl = entry.value_ptr.*; + const wid_json = jsonQuoted(ctx.allocator, rl.worker_id) catch continue; + const item = std.fmt.allocPrint(ctx.allocator, + \\{{"worker_id":{s},"remaining":{d},"limit":{d},"reset_ms":{d},"updated_at_ms":{d}}} + , .{ wid_json, rl.remaining, rl.limit, rl.reset_ms, rl.updated_at_ms }) catch continue; + buf.appendSlice(ctx.allocator, item) catch continue; + } + + buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, buf.items); +} + // ── Worker Handlers ────────────────────────────────────────────────── fn handleListWorkers(ctx: *Context) HttpResponse { @@ -294,7 +406,7 @@ fn handleRegisterWorker(ctx: *Context, body: []const u8) HttpResponse { const model = getJsonString(obj, "model"); const protocol = worker_protocol.parse(protocol_raw) orelse { - return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid protocol (expected webhook|api_chat|openai_chat)\"}}"); + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid protocol (expected webhook|api_chat|openai_chat|mqtt|redis_stream|a2a)\"}}"); }; if (!worker_protocol.validateUrlForProtocol(url, protocol)) { return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"webhook protocol requires explicit URL path (for example /webhook)\"}}"); @@ -615,20 +727,50 @@ fn handleGetRun(ctx: *Context, id: []const u8) HttpResponse { const ik_json = jsonQuoted(ctx.allocator, ik) catch ""; break :blk std.fmt.allocPrint(ctx.allocator, ",\"idempotency_key\":{s}", .{ik_json}) catch ""; } else ""; + const workflow_id_field = if (run.workflow_id) |wid| blk: { + const wid_json = jsonQuoted(ctx.allocator, wid) catch ""; + break :blk std.fmt.allocPrint(ctx.allocator, ",\"workflow_id\":{s}", .{wid_json}) catch ""; + } else ""; + + // Include state_json if present + const state_field = if (run.state_json) |sj| + std.fmt.allocPrint(ctx.allocator, ",\"state_json\":{s}", .{sj}) catch "" + else + ""; + + // Count checkpoints + const checkpoints = ctx.store.listCheckpoints(ctx.allocator, id) catch &.{}; + const checkpoint_count: i64 = @intCast(checkpoints.len); + const checkpoint_field = std.fmt.allocPrint(ctx.allocator, ",\"checkpoint_count\":{d}", .{checkpoint_count}) catch ""; + + // Token accounting (Gap 2) + var token_input: i64 = 0; + var token_output: i64 = 0; + var token_total: i64 = 0; + if (ctx.store.getRunTokens(id)) |t| { + token_input = t.input; + token_output = t.output; + token_total = t.total; + } else |_| {} + const token_field = std.fmt.allocPrint(ctx.allocator, ",\"total_input_tokens\":{d},\"total_output_tokens\":{d},\"total_tokens\":{d}", .{ token_input, token_output, token_total }) catch ""; const run_id_json = jsonQuoted(ctx.allocator, run.id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); const run_status_json = jsonQuoted(ctx.allocator, run.status) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"id":{s},"status":{s}{s},"created_at_ms":{d},"updated_at_ms":{d}{s}{s}{s},"steps":{s}}} + \\{{"id":{s},"status":{s}{s},"created_at_ms":{d},"updated_at_ms":{d}{s}{s}{s}{s}{s}{s}{s},"steps":{s}}} , .{ run_id_json, run_status_json, idempotency_field, run.created_at_ms, run.updated_at_ms, + workflow_id_field, error_field, started_field, ended_field, + state_field, + checkpoint_field, + token_field, steps_json, }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); return jsonResponse(200, resp); @@ -636,11 +778,12 @@ fn handleGetRun(ctx: *Context, id: []const u8) HttpResponse { fn handleListRuns(ctx: *Context, target: []const u8) HttpResponse { const status_filter = getQueryParam(target, "status"); + const workflow_id_filter = getQueryParam(target, "workflow_id"); const limit = parseQueryInt(target, "limit", 100, 1, 1000); const offset = parseQueryInt(target, "offset", 0, 0, 1_000_000_000); // Fetch one extra row to compute has_more. - const runs = ctx.store.listRuns(ctx.allocator, status_filter, limit + 1, offset) catch { + const runs = ctx.store.listRuns(ctx.allocator, status_filter, workflow_id_filter, limit + 1, offset) catch { return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to list runs\"}}"); }; @@ -660,12 +803,17 @@ fn handleListRuns(ctx: *Context, target: []const u8) HttpResponse { const ik_json = jsonQuoted(ctx.allocator, ik) catch ""; break :blk std.fmt.allocPrint(ctx.allocator, ",\"idempotency_key\":{s}", .{ik_json}) catch ""; } else ""; + const workflow_id_field = if (r.workflow_id) |wid| blk: { + const wid_json = jsonQuoted(ctx.allocator, wid) catch ""; + break :blk std.fmt.allocPrint(ctx.allocator, ",\"workflow_id\":{s}", .{wid_json}) catch ""; + } else ""; const entry = std.fmt.allocPrint(ctx.allocator, - \\{{"id":{s},"status":{s}{s},"created_at_ms":{d},"updated_at_ms":{d}}} + \\{{"id":{s},"status":{s}{s}{s},"created_at_ms":{d},"updated_at_ms":{d}}} , .{ run_id_json, run_status_json, idempotency_field, + workflow_id_field, r.created_at_ms, r.updated_at_ms, }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); @@ -765,7 +913,10 @@ fn handleCancelRun(ctx: *Context, run_id: []const u8) HttpResponse { // 5. Insert event ctx.store.insertEvent(run_id, null, "run.cancelled", "{}") catch {}; - // 6. Return 200 + // 6. Mark SSE queue closed but keep buffered events available for late subscribers. + if (ctx.sse_hub) |hub| hub.closeQueue(run_id); + + // 7. Return 200 const resp = std.fmt.allocPrint(ctx.allocator, \\{{"id":"{s}","status":"cancelled"}} , .{run_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); @@ -814,213 +965,837 @@ fn handleRetryRun(ctx: *Context, run_id: []const u8) HttpResponse { return jsonResponse(200, resp); } -fn handleApproveStep(ctx: *Context, run_id: []const u8, step_id: []const u8) HttpResponse { - // 1. Get step from store - const step = switch (lookupStepInRun(ctx, run_id, step_id)) { - .ok => |s| s, - .err => |resp| return resp, +fn handleListEvents(ctx: *Context, run_id: []const u8) HttpResponse { + // 1. Get events from store + const events = ctx.store.getEventsByRun(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get events\"}}"); }; - // 2. Must be "waiting_approval" - if (!std.mem.eql(u8, step.status, "waiting_approval")) { - const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"error":{{"code":"conflict","message":"step is not waiting_approval (current: {s})"}}}} - , .{step.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"step is not waiting_approval\"}}"); - return jsonResponse(409, resp); + // 2. Build JSON array + var buf: std.ArrayListUnmanaged(u8) = .empty; + buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + for (events, 0..) |ev, i| { + if (i > 0) { + buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } + + const step_field = if (ev.step_id) |sid| blk: { + const sid_json = jsonQuoted(ctx.allocator, sid) catch ""; + break :blk std.fmt.allocPrint(ctx.allocator, ",\"step_id\":{s}", .{sid_json}) catch ""; + } else ""; + const run_id_json = jsonQuoted(ctx.allocator, ev.run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const kind_json = jsonQuoted(ctx.allocator, ev.kind) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + const entry = std.fmt.allocPrint(ctx.allocator, + \\{{"id":{d},"run_id":{s}{s},"kind":{s},"data":{s},"ts_ms":{d}}} + , .{ + ev.id, + run_id_json, + step_field, + kind_json, + ev.data_json, + ev.ts_ms, + }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); } - // 3. Update status to "completed" - ctx.store.updateStepStatus(step_id, "completed", null, null, null, step.attempt) catch { - return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}"); + buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, json_body); +} + +// ── Workflow CRUD Handlers ─────────────────────────────────────────── + +fn handleCreateWorkflow(ctx: *Context, body: []const u8) HttpResponse { + const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}"); }; + defer parsed.deinit(); - // 4. Insert event - ctx.store.insertEvent(run_id, step_id, "step.approved", "{}") catch {}; + if (parsed.value != .object) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}"); + } + const obj = parsed.value.object; + + const name = getJsonString(obj, "name") orelse "untitled"; + + // Use provided id or generate one + const wf_id = if (getJsonString(obj, "id")) |provided_id| + ctx.allocator.dupe(u8, provided_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}") + else blk: { + const id_buf = ids.generateId(); + break :blk ctx.allocator.dupe(u8, &id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + }; + + // If definition_json is a sub-key, extract it; otherwise use the whole body + const definition_json = if (obj.get("definition_json")) |def_val| blk: { + break :blk serializeJsonValue(ctx.allocator, def_val) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize definition\"}}"); + } else body; + + // Extract version from body (default 1) + const version: i64 = if (obj.get("version")) |v| blk: { + if (v == .integer) break :blk v.integer; + break :blk 1; + } else 1; - // 5. Return 200 + ctx.store.createWorkflowWithVersion(wf_id, name, definition_json, version) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create workflow\"}}"); + }; + + const id_json = jsonQuoted(ctx.allocator, wf_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const name_json = jsonQuoted(ctx.allocator, name) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"step_id":"{s}","status":"completed"}} - , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - return jsonResponse(200, resp); + \\{{"id":{s},"name":{s},"version":{d}}} + , .{ id_json, name_json, version }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(201, resp); } -fn handleRejectStep(ctx: *Context, run_id: []const u8, step_id: []const u8) HttpResponse { - // 1. Get step from store - const step = switch (lookupStepInRun(ctx, run_id, step_id)) { - .ok => |s| s, - .err => |resp| return resp, +fn handleListWorkflows(ctx: *Context) HttpResponse { + const workflows = ctx.store.listWorkflows(ctx.allocator) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to list workflows\"}}"); }; - // 2. Must be "waiting_approval" - if (!std.mem.eql(u8, step.status, "waiting_approval")) { - const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"error":{{"code":"conflict","message":"step is not waiting_approval (current: {s})"}}}} - , .{step.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"step is not waiting_approval\"}}"); - return jsonResponse(409, resp); + var buf: std.ArrayListUnmanaged(u8) = .empty; + buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + for (workflows, 0..) |wf, i| { + if (i > 0) { + buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } + const id_json = jsonQuoted(ctx.allocator, wf.id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const name_json = jsonQuoted(ctx.allocator, wf.name) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const entry = std.fmt.allocPrint(ctx.allocator, + \\{{"id":{s},"name":{s},"version":{d},"definition":{s},"created_at_ms":{d},"updated_at_ms":{d}}} + , .{ + id_json, + name_json, + wf.version, + wf.definition_json, + wf.created_at_ms, + wf.updated_at_ms, + }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); } - // 3. Update status to "failed", set error_text - ctx.store.updateStepStatus(step_id, "failed", null, null, "rejected by user", step.attempt) catch { - return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}"); - }; + buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, json_body); +} - // 4. Insert event - ctx.store.insertEvent(run_id, step_id, "step.rejected", "{}") catch {}; +fn handleGetWorkflow(ctx: *Context, id: []const u8) HttpResponse { + const wf = ctx.store.getWorkflow(ctx.allocator, id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}"); + }; - // 5. Return 200 + const id_json = jsonQuoted(ctx.allocator, wf.id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const name_json = jsonQuoted(ctx.allocator, wf.name) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"step_id":"{s}","status":"failed"}} - , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + \\{{"id":{s},"name":{s},"version":{d},"definition":{s},"created_at_ms":{d},"updated_at_ms":{d}}} + , .{ + id_json, + name_json, + wf.version, + wf.definition_json, + wf.created_at_ms, + wf.updated_at_ms, + }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); return jsonResponse(200, resp); } -fn handleSignalStep(ctx: *Context, run_id: []const u8, step_id: []const u8, body: []const u8) HttpResponse { - // 1. Get step from store - const step = switch (lookupStepInRun(ctx, run_id, step_id)) { - .ok => |s| s, - .err => |resp| return resp, +fn handleUpdateWorkflow(ctx: *Context, id: []const u8, body: []const u8) HttpResponse { + // Verify workflow exists + _ = ctx.store.getWorkflow(ctx.allocator, id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}"); }; - // 2. Must be "waiting_approval" (signal mode uses this status) - if (!std.mem.eql(u8, step.status, "waiting_approval")) { - const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"error":{{"code":"conflict","message":"step is not waiting for signal (current: {s})"}}}} - , .{step.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"step is not waiting for signal\"}}"); - return jsonResponse(409, resp); - } + const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}"); + }; + defer parsed.deinit(); - // 3. Parse optional signal data from body - var signal_data: []const u8 = "{}"; - if (body.len > 0) { - const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch { - // Body is not valid JSON; use empty - signal_data = "{}"; - // Continue anyway - const output = std.fmt.allocPrint(ctx.allocator, - \\{{"output":"signaled","data":{{}}}} - , .{}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - - ctx.store.updateStepStatus(step_id, "completed", null, output, null, step.attempt) catch { - return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}"); - }; - ctx.store.insertEvent(run_id, step_id, "step.signaled", output) catch {}; - const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"step_id":"{s}","status":"completed"}} - , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - return jsonResponse(200, resp); - }; - _ = parsed; - signal_data = body; + if (parsed.value != .object) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}"); } + const obj = parsed.value.object; + + const name = getJsonString(obj, "name") orelse "untitled"; + const definition_json = if (obj.get("definition_json")) |def_val| blk: { + break :blk serializeJsonValue(ctx.allocator, def_val) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize definition\"}}"); + } else body; - // 4. Build output with signal data - const output = std.fmt.allocPrint(ctx.allocator, - \\{{"output":"signaled","data":{s}}} - , .{signal_data}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + // Extract version if provided + const version: ?i64 = if (obj.get("version")) |v| blk: { + if (v == .integer) break :blk v.integer; + break :blk null; + } else null; - // 5. Update step to "completed" - ctx.store.updateStepStatus(step_id, "completed", null, output, null, step.attempt) catch { - return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update step\"}}"); + ctx.store.updateWorkflowWithVersion(id, name, definition_json, version) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update workflow\"}}"); }; - // 6. Insert event - ctx.store.insertEvent(run_id, step_id, "step.signaled", output) catch {}; + return jsonResponse(200, "{\"ok\":true}"); +} - // 7. Return 200 - const resp = std.fmt.allocPrint(ctx.allocator, - \\{{"step_id":"{s}","status":"completed"}} - , .{step_id}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - return jsonResponse(200, resp); +fn handleDeleteWorkflow(ctx: *Context, id: []const u8) HttpResponse { + // Verify workflow exists + _ = ctx.store.getWorkflow(ctx.allocator, id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}"); + }; + + ctx.store.deleteWorkflow(id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to delete workflow\"}}"); + }; + + return jsonResponse(200, "{\"ok\":true}"); } -fn handleListEvents(ctx: *Context, run_id: []const u8) HttpResponse { - // 1. Get events from store - const events = ctx.store.getEventsByRun(ctx.allocator, run_id) catch { - return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get events\"}}"); +fn handleValidateWorkflow(ctx: *Context, id: []const u8) HttpResponse { + const wf = ctx.store.getWorkflow(ctx.allocator, id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}"); }; - // 2. Build JSON array + const errors = workflow_validation.validate(ctx.allocator, wf.definition_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"validation failed\"}}"); + }; + + // Build validation result var buf: std.ArrayListUnmanaged(u8) = .empty; - buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + buf.appendSlice(ctx.allocator, "{\"valid\":") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + buf.appendSlice(ctx.allocator, if (errors.len == 0) "true" else "false") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + buf.appendSlice(ctx.allocator, ",\"errors\":[") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - for (events, 0..) |ev, i| { + for (errors, 0..) |ve, i| { if (i > 0) { buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); } - - const step_field = if (ev.step_id) |sid| blk: { - const sid_json = jsonQuoted(ctx.allocator, sid) catch ""; - break :blk std.fmt.allocPrint(ctx.allocator, ",\"step_id\":{s}", .{sid_json}) catch ""; + const err_type_json = jsonQuoted(ctx.allocator, ve.err_type) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const node_field = if (ve.node) |n| blk: { + const n_json = jsonQuoted(ctx.allocator, n) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + break :blk std.fmt.allocPrint(ctx.allocator, ",\"node\":{s}", .{n_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); } else ""; - const run_id_json = jsonQuoted(ctx.allocator, ev.run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - const kind_json = jsonQuoted(ctx.allocator, ev.kind) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - + const key_field = if (ve.key) |k| blk: { + const k_json = jsonQuoted(ctx.allocator, k) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + break :blk std.fmt.allocPrint(ctx.allocator, ",\"key\":{s}", .{k_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } else ""; + const msg_json = jsonQuoted(ctx.allocator, ve.message) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); const entry = std.fmt.allocPrint(ctx.allocator, - \\{{"id":{d},"run_id":{s}{s},"kind":{s},"data":{s},"ts_ms":{d}}} + \\{{"type":{s}{s}{s},"message":{s}}} , .{ - ev.id, - run_id_json, - step_field, - kind_json, - ev.data_json, - ev.ts_ms, + err_type_json, + node_field, + key_field, + msg_json, }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); } + buf.appendSlice(ctx.allocator, "]") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + // Include Mermaid diagram in validation response + const mermaid_str = engine_mod.generateMermaid(ctx.allocator, wf.definition_json) catch null; + if (mermaid_str) |ms| { + const mermaid_json = jsonQuoted(ctx.allocator, ms) catch null; + if (mermaid_json) |mj| { + buf.appendSlice(ctx.allocator, ",\"mermaid\":") catch {}; + buf.appendSlice(ctx.allocator, mj) catch {}; + } + } + + buf.appendSlice(ctx.allocator, "}") catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, json_body); +} + +fn handleGetMermaid(ctx: *Context, id: []const u8) HttpResponse { + const wf = ctx.store.getWorkflow(ctx.allocator, id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}"); + }; + + const mermaid = engine_mod.generateMermaid(ctx.allocator, wf.definition_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to generate mermaid diagram\"}}"); + }; + + return plainResponse(200, mermaid); +} + +fn handleRunWorkflow(ctx: *Context, workflow_id: []const u8, body: []const u8) HttpResponse { + // Load workflow + const wf = ctx.store.getWorkflow(ctx.allocator, workflow_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get workflow\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"workflow not found\"}}"); + }; + + // Validate + const errors = workflow_validation.validate(ctx.allocator, wf.definition_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"validation failed\"}}"); + }; + if (errors.len > 0) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"workflow has validation errors\"}}"); + } + + // Parse definition to extract state_schema for initState + const def_parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, wf.definition_json, .{}) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to parse workflow definition\"}}"); + }; + defer def_parsed.deinit(); + + const schema_json = if (def_parsed.value == .object) blk: { + if (def_parsed.value.object.get("state_schema")) |ss| { + break :blk serializeJsonValue(ctx.allocator, ss) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize schema\"}}"); + } + break :blk "{}"; + } else "{}"; + + // Parse input from request body (or default to {}) + const input_json = if (body.len > 0) blk: { + const bp = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch break :blk "{}"; + defer bp.deinit(); + if (bp.value == .object) { + if (bp.value.object.get("input")) |input_val| { + break :blk serializeJsonValue(ctx.allocator, input_val) catch break :blk "{}"; + } + } + break :blk "{}"; + } else "{}"; + + // Init state + const initial_state = state_mod.initState(ctx.allocator, input_json, schema_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to initialize state\"}}"); + }; + + // Generate run ID + const run_id_buf = ids.generateId(); + const run_id = ctx.allocator.dupe(u8, &run_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + // Create run directly with "running" status to avoid race window where + // engine could miss a run created as "pending" then updated to "running". + ctx.store.createRunWithStateAndStatus(run_id, workflow_id, wf.definition_json, input_json, initial_state, "running") catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create run\"}}"); + }; + + // Create initial checkpoint (version 0, no completed nodes) + const cp_id_buf = ids.generateId(); + const cp_id = ctx.allocator.dupe(u8, &cp_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + ctx.store.createCheckpoint(cp_id, run_id, "__init__", null, initial_state, "[]", 0, null) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create checkpoint\"}}"); + }; + + const run_id_json = jsonQuoted(ctx.allocator, run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const resp = std.fmt.allocPrint(ctx.allocator, + \\{{"id":{s},"status":"running"}} + , .{run_id_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(201, resp); +} + +// ── Checkpoint Handlers ───────────────────────────────────────────── + +fn handleListCheckpoints(ctx: *Context, run_id: []const u8) HttpResponse { + // Verify run exists + _ = ctx.store.getRun(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}"); + }; + + const checkpoints = ctx.store.listCheckpoints(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to list checkpoints\"}}"); + }; + + var buf: std.ArrayListUnmanaged(u8) = .empty; + buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + for (checkpoints, 0..) |cp, i| { + if (i > 0) { + buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } + const entry = buildCheckpointJson(ctx.allocator, cp) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } + buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); return jsonResponse(200, json_body); } -// ── Chat Transcript Handler ────────────────────────────────────────── +fn handleGetCheckpoint(ctx: *Context, run_id: []const u8, cp_id: []const u8) HttpResponse { + const cp = ctx.store.getCheckpoint(ctx.allocator, cp_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get checkpoint\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}"); + }; -fn handleGetChatTranscript(ctx: *Context, run_id: []const u8, step_id: []const u8) HttpResponse { - _ = switch (lookupStepInRun(ctx, run_id, step_id)) { - .ok => |s| s, - .err => |resp| return resp, + // Verify checkpoint belongs to run + if (!std.mem.eql(u8, cp.run_id, run_id)) { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}"); + } + + const json_body = buildCheckpointJson(ctx.allocator, cp) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, json_body); +} + +fn buildCheckpointJson(allocator: std.mem.Allocator, cp: types.CheckpointRow) ![]const u8 { + const id_json = try jsonQuoted(allocator, cp.id); + const run_id_json = try jsonQuoted(allocator, cp.run_id); + const step_id_json = try jsonQuoted(allocator, cp.step_id); + const parent_field = if (cp.parent_id) |pid| blk: { + const pid_json = try jsonQuoted(allocator, pid); + break :blk try std.fmt.allocPrint(allocator, ",\"parent_id\":{s}", .{pid_json}); + } else ""; + const metadata_field = if (cp.metadata_json) |md| + try std.fmt.allocPrint(allocator, ",\"metadata\":{s}", .{md}) + else + ""; + + return try std.fmt.allocPrint(allocator, + \\{{"id":{s},"run_id":{s},"step_id":{s}{s},"state":{s},"completed_nodes":{s},"version":{d}{s},"created_at_ms":{d}}} + , .{ + id_json, + run_id_json, + step_id_json, + parent_field, + cp.state_json, + cp.completed_nodes_json, + cp.version, + metadata_field, + cp.created_at_ms, + }); +} + +// ── State Control Handlers ────────────────────────────────────────── + +fn handleResumeRun(ctx: *Context, run_id: []const u8, body: []const u8) HttpResponse { + // Load run — must be status=interrupted + const run = ctx.store.getRun(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}"); + }; + + if (!std.mem.eql(u8, run.status, "interrupted")) { + const resp = std.fmt.allocPrint(ctx.allocator, + \\{{"error":{{"code":"conflict","message":"run is not interrupted (current: {s})"}}}} + , .{run.status}) catch return jsonResponse(409, "{\"error\":{\"code\":\"conflict\",\"message\":\"run is not interrupted\"}}"); + return jsonResponse(409, resp); + } + + // Load latest checkpoint + const latest_cp = ctx.store.getLatestCheckpoint(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get latest checkpoint\"}}"); + } orelse { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"no checkpoint found for run\"}}"); + }; + + // Get current state + var current_state = latest_cp.state_json; + + // Apply state_updates from body if provided + if (body.len > 0) { + const bp = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}); + if (bp) |body_parsed| { + defer body_parsed.deinit(); + + if (body_parsed.value == .object) { + if (body_parsed.value.object.get("state_updates")) |updates_val| { + const updates_json = serializeJsonValue(ctx.allocator, updates_val) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize updates\"}}"); + }; + + // Get schema from workflow definition + const schema_json = getSchemaFromRun(ctx, run); + + current_state = state_mod.applyUpdates(ctx.allocator, latest_cp.state_json, updates_json, schema_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to apply state updates\"}}"); + }; + } + } + } else |_| { + // Body is not valid JSON — proceed without updates + } + } + + // Save new state + ctx.store.updateRunState(run_id, current_state) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run state\"}}"); + }; + + // Set status to running + ctx.store.updateRunStatus(run_id, "running", null) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run status\"}}"); + }; + + const run_id_json = jsonQuoted(ctx.allocator, run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const resp = std.fmt.allocPrint(ctx.allocator, + \\{{"id":{s},"status":"running"}} + , .{run_id_json}) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, resp); +} + +fn handleForkRun(ctx: *Context, body: []const u8) HttpResponse { + const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}"); + }; + defer parsed.deinit(); + + if (parsed.value != .object) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}"); + } + const obj = parsed.value.object; + + // Get checkpoint_id from body + const checkpoint_id = getJsonString(obj, "checkpoint_id") orelse { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"missing required field: checkpoint_id\"}}"); + }; + + // Load checkpoint + const cp = ctx.store.getCheckpoint(ctx.allocator, checkpoint_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get checkpoint\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}"); + }; + + // Load the original run to get workflow_json + const orig_run = ctx.store.getRun(ctx.allocator, cp.run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get original run\"}}"); + } orelse { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"original run not found\"}}"); + }; + + // Apply state_overrides if provided + var fork_state = cp.state_json; + if (obj.get("state_overrides")) |overrides_val| { + const overrides_json = serializeJsonValue(ctx.allocator, overrides_val) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize overrides\"}}"); + }; + const schema_json = getSchemaFromRun(ctx, orig_run); + fork_state = state_mod.applyUpdates(ctx.allocator, cp.state_json, overrides_json, schema_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to apply state overrides\"}}"); + }; + } + + // Generate new run ID + const new_run_id_buf = ids.generateId(); + const new_run_id = ctx.allocator.dupe(u8, &new_run_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + + // Create forked run + ctx.store.createForkedRun(new_run_id, orig_run.workflow_json, fork_state, cp.run_id, checkpoint_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create forked run\"}}"); + }; + + // Create initial checkpoint for forked run + const cp_id_buf = ids.generateId(); + const cp_id = ctx.allocator.dupe(u8, &cp_id_buf) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + ctx.store.createCheckpoint(cp_id, new_run_id, "__fork__", checkpoint_id, fork_state, cp.completed_nodes_json, 0, null) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create checkpoint\"}}"); }; - const messages = ctx.store.getChatMessages(ctx.allocator, step_id) catch { - return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get chat messages\"}}"); + // Set to running + ctx.store.updateRunStatus(new_run_id, "running", null) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run status\"}}"); + }; + + const run_id_json = jsonQuoted(ctx.allocator, new_run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const resp = std.fmt.allocPrint(ctx.allocator, + \\{{"id":{s},"status":"running","forked_from_checkpoint":{s}}} + , .{ run_id_json, checkpoint_id }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(201, resp); +} + +// ── Replay Handler ────────────────────────────────────────────────── + +fn handleReplayRun(ctx: *Context, run_id: []const u8, body: []const u8) HttpResponse { + // Parse replay checkpoint ID. Accept both the canonical + // `from_checkpoint_id` field and the older `checkpoint_id` alias so + // existing clients keep working. + const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}"); + }; + defer parsed.deinit(); + + if (parsed.value != .object) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}"); + } + const obj = parsed.value.object; + + const checkpoint_id = getJsonString(obj, "from_checkpoint_id") orelse getJsonString(obj, "checkpoint_id") orelse { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"missing required field: from_checkpoint_id or checkpoint_id\"}}"); + }; + + // Load checkpoint + const cp = ctx.store.getCheckpoint(ctx.allocator, checkpoint_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get checkpoint\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"checkpoint not found\"}}"); + }; + + // Verify checkpoint belongs to this run + if (!std.mem.eql(u8, cp.run_id, run_id)) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"checkpoint does not belong to this run\"}}"); + } + + // Load run to verify it exists + _ = ctx.store.getRun(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}"); + }; + + // Delete steps and checkpoints created after the replay checkpoint + // so the engine re-executes from a clean slate. + ctx.store.deleteStepsAfterTimestamp(run_id, cp.created_at_ms) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to clear old steps\"}}"); + }; + ctx.store.deleteCheckpointsAfterVersion(run_id, cp.version) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to clear old checkpoints\"}}"); + }; + + // Reset run state to checkpoint's state + ctx.store.updateRunState(run_id, cp.state_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run state\"}}"); + }; + + // Set run status to running — engine will pick it up on next tick + // with the checkpoint's completed_nodes + ctx.store.updateRunStatus(run_id, "running", null) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run status\"}}"); + }; + + ctx.store.insertEvent(run_id, null, "run.replayed", "{}") catch {}; + + const run_id_json = jsonQuoted(ctx.allocator, run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const cp_id_json = jsonQuoted(ctx.allocator, checkpoint_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const resp = std.fmt.allocPrint(ctx.allocator, + \\{{"id":{s},"status":"running","replayed_from_checkpoint":{s}}} + , .{ run_id_json, cp_id_json }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, resp); +} + +fn handleInjectState(ctx: *Context, run_id: []const u8, body: []const u8) HttpResponse { + // Verify run exists + const run = ctx.store.getRun(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}"); + }; + + const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}"); + }; + defer parsed.deinit(); + + if (parsed.value != .object) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}"); + } + const obj = parsed.value.object; + + // Get updates + const updates_val = obj.get("updates") orelse { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"missing required field: updates\"}}"); + }; + const updates_json = serializeJsonValue(ctx.allocator, updates_val) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to serialize updates\"}}"); + }; + + // Check apply_after_step + const apply_after_step = getJsonString(obj, "apply_after_step"); + + if (apply_after_step == null) { + // Apply immediately to run.state_json + const current_state = run.state_json orelse "{}"; + const schema_json = getSchemaFromRun(ctx, run); + const new_state = state_mod.applyUpdates(ctx.allocator, current_state, updates_json, schema_json) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to apply state updates\"}}"); + }; + ctx.store.updateRunState(run_id, new_state) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to update run state\"}}"); + }; + return jsonResponse(200, "{\"applied\":true}"); + } else { + // Insert into pending_state_injections + ctx.store.createPendingInjection(run_id, updates_json, apply_after_step) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create pending injection\"}}"); + }; + return jsonResponse(200, "{\"applied\":false,\"pending\":true}"); + } +} + +// ── SSE Stream Handler ────────────────────────────────────────────── + +fn handleStream(ctx: *Context, run_id: []const u8, target: []const u8) HttpResponse { + // For now, return the current state and events as a regular JSON response. + // Full SSE streaming with held-open connections will be implemented + // when the threading model is wired in main.zig (Task 12). + // + // Supports ?mode=values,tasks,debug,updates,custom query param to filter + // which streaming modes the client wants. Default: all modes. + const run = ctx.store.getRun(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get run\"}}"); + } orelse { + return jsonResponse(404, "{\"error\":{\"code\":\"not_found\",\"message\":\"run not found\"}}"); + }; + + // Parse requested modes from ?mode= query param + const mode_param = getQueryParam(target, "mode"); + const after_seq = if (getQueryParam(target, "after_seq")) |raw| + std.fmt.parseInt(u64, raw, 10) catch 0 + else + 0; + var requested_modes: [5]bool = .{ true, true, true, true, true }; // all modes by default + if (mode_param) |modes_str| { + // Reset all to false, then enable requested + requested_modes = .{ false, false, false, false, false }; + var mode_it = std.mem.splitScalar(u8, modes_str, ','); + while (mode_it.next()) |mode_name| { + if (sse_mod.StreamMode.fromString(mode_name)) |m| { + requested_modes[@intFromEnum(m)] = true; + } + } + } + + const events_json = if (after_seq == 0) blk: { + const events = ctx.store.getEventsByRun(ctx.allocator, run_id) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to get events\"}}"); + }; + + // Build events JSON array + var events_buf: std.ArrayListUnmanaged(u8) = .empty; + events_buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + for (events, 0..) |ev, i| { + if (i > 0) { + events_buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } + const kind_json = jsonQuoted(ctx.allocator, ev.kind) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const entry = std.fmt.allocPrint(ctx.allocator, + \\{{"kind":{s},"data":{s},"ts_ms":{d}}} + , .{ kind_json, ev.data_json, ev.ts_ms }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + events_buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } + events_buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + break :blk events_buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + } else "[]"; + + // If SSE hub available, snapshot queued SSE events filtered by requested modes + var sse_events_json: []const u8 = "[]"; + var latest_stream_seq: u64 = 0; + var oldest_stream_seq: u64 = 0; + var stream_gap = false; + if (ctx.sse_hub) |hub| { + const queue = hub.getOrCreateQueue(run_id); + const snapshot = queue.snapshotSince(ctx.allocator, after_seq); + latest_stream_seq = snapshot.latest_seq; + oldest_stream_seq = snapshot.oldest_seq; + stream_gap = snapshot.gap_detected; + if (snapshot.events.len > 0) { + var sse_buf: std.ArrayListUnmanaged(u8) = .empty; + sse_buf.append(ctx.allocator, '[') catch {}; + var first = true; + for (snapshot.events) |sse_ev| { + // Filter by requested modes + if (!requested_modes[@intFromEnum(sse_ev.mode)]) continue; + if (!first) { + sse_buf.append(ctx.allocator, ',') catch {}; + } + first = false; + const mode_str = sse_ev.mode.toString(); + const sse_entry = std.fmt.allocPrint(ctx.allocator, + \\{{"seq":{d},"event":{s},"mode":"{s}","data":{s}}} + , .{ + sse_ev.seq, + jsonQuoted(ctx.allocator, sse_ev.event_type) catch "\"\"", + mode_str, + sse_ev.data, + }) catch continue; + sse_buf.appendSlice(ctx.allocator, sse_entry) catch {}; + } + sse_buf.append(ctx.allocator, ']') catch {}; + sse_events_json = sse_buf.toOwnedSlice(ctx.allocator) catch "[]"; + } + } + + const status_json = jsonQuoted(ctx.allocator, run.status) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + const state_field = if (run.state_json) |sj| + std.fmt.allocPrint(ctx.allocator, ",\"state\":{s}", .{sj}) catch "" + else + ""; + + const resp = std.fmt.allocPrint(ctx.allocator, + \\{{"status":{s}{s},"events":{s},"stream_events":{s},"next_stream_seq":{d},"stream_oldest_seq":{d},"stream_gap":{s}}} + , .{ + status_json, + state_field, + events_json, + sse_events_json, + latest_stream_seq, + oldest_stream_seq, + if (stream_gap) "true" else "false", + }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, resp); +} + +// ── Agent Events Callback Handler ─────────────────────────────────── + +fn handleAgentEventCallback(ctx: *Context, run_id: []const u8, step_id: []const u8, body: []const u8) HttpResponse { + const parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, body, .{}) catch { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"invalid JSON body\"}}"); + }; + defer parsed.deinit(); + + if (parsed.value != .object) { + return jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"body must be a JSON object\"}}"); + } + const obj = parsed.value.object; + + const iteration: i64 = if (obj.get("iteration")) |it| blk: { + if (it == .integer) break :blk it.integer; + break :blk 0; + } else 0; + + const tool = getJsonString(obj, "tool"); + const args_json = if (obj.get("args")) |args_val| + serializeJsonValue(ctx.allocator, args_val) catch null + else + null; + const result_text = getJsonString(obj, "result"); + const status = getJsonString(obj, "status") orelse "running"; + + ctx.store.createAgentEvent(run_id, step_id, iteration, tool, args_json, result_text, status) catch { + return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"failed to create agent event\"}}"); }; - // Build JSON array of chat messages - var buf: std.ArrayListUnmanaged(u8) = .empty; - buf.append(ctx.allocator, '[') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - - for (messages, 0..) |msg, i| { - if (i > 0) { - buf.append(ctx.allocator, ',') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + // If sse_hub is available, broadcast as agent_event + if (ctx.sse_hub) |hub| { + const event_data = std.fmt.allocPrint(ctx.allocator, + \\{{"run_id":"{s}","step_id":"{s}","iteration":{d},"status":"{s}"}} + , .{ run_id, step_id, iteration, status }) catch ""; + if (event_data.len > 0) { + hub.broadcast(run_id, .{ .event_type = "agent_event", .data = event_data }); } + } - const worker_field = if (msg.worker_id) |wid| blk: { - const wid_json = jsonQuoted(ctx.allocator, wid) catch ""; - break :blk std.fmt.allocPrint(ctx.allocator, ",\"worker_id\":{s}", .{wid_json}) catch ""; - } else ""; - const msg_run_id_json = jsonQuoted(ctx.allocator, msg.run_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - const msg_step_id_json = jsonQuoted(ctx.allocator, msg.step_id) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - const role_json = jsonQuoted(ctx.allocator, msg.role) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - const message_json = jsonQuoted(ctx.allocator, msg.message) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); + return jsonResponse(200, "{\"ok\":true}"); +} - const entry = std.fmt.allocPrint(ctx.allocator, - \\{{"id":{d},"run_id":{s},"step_id":{s},"round":{d},"role":{s}{s},"message":{s},"ts_ms":{d}}} - , .{ - msg.id, - msg_run_id_json, - msg_step_id_json, - msg.round, - role_json, - worker_field, - message_json, - msg.ts_ms, - }) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - buf.appendSlice(ctx.allocator, entry) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - } +// ── State Helper ──────────────────────────────────────────────────── - buf.append(ctx.allocator, ']') catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - const json_body = buf.toOwnedSlice(ctx.allocator) catch return jsonResponse(500, "{\"error\":{\"code\":\"internal\",\"message\":\"out of memory\"}}"); - return jsonResponse(200, json_body); +fn getSchemaFromRun(ctx: *Context, run: types.RunRow) []const u8 { + const def_parsed = std.json.parseFromSlice(std.json.Value, ctx.allocator, run.workflow_json, .{}) catch return "{}"; + defer def_parsed.deinit(); + if (def_parsed.value != .object) return "{}"; + if (def_parsed.value.object.get("state_schema")) |ss| { + return serializeJsonValue(ctx.allocator, ss) catch "{}"; + } + return "{}"; } // ── Tracker Handlers ───────────────────────────────────────────────── @@ -1188,16 +1963,6 @@ fn validationErrorResponse(err: workflow_validation.ValidateError) HttpResponse error.DependsOnItemNotString => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"depends_on items must be strings\"}}"), error.DependsOnDuplicate => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"depends_on contains duplicate step id\"}}"), error.DependsOnUnknownStepId => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"depends_on references unknown step id\"}}"), - error.LoopBodyRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"loop step requires 'body' field\"}}"), - error.SubWorkflowRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"sub_workflow step requires 'workflow' field\"}}"), - error.WaitConditionRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait step requires 'duration_ms', 'until_ms', or 'signal'\"}}"), - error.WaitDurationInvalid => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait.duration_ms must be a non-negative integer\"}}"), - error.WaitUntilInvalid => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait.until_ms must be a non-negative integer\"}}"), - error.WaitSignalInvalid => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"wait.signal must be a non-empty string\"}}"), - error.RouterRoutesRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"router step requires 'routes' field\"}}"), - error.SagaBodyRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"saga step requires 'body' field\"}}"), - error.DebateCountRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"debate step requires 'count' field\"}}"), - error.GroupChatParticipantsRequired => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"group_chat step requires 'participants' field\"}}"), error.RetryMustBeObject => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"retry must be an object\"}}"), error.MaxAttemptsMustBePositiveInteger => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"retry.max_attempts must be a positive integer\"}}"), error.TimeoutMsMustBePositiveInteger => jsonResponse(400, "{\"error\":{\"code\":\"bad_request\",\"message\":\"timeout_ms must be a positive integer\"}}"), @@ -1377,6 +2142,14 @@ fn getPathSegment(segments: [max_segments]?[]const u8, index: usize) ?[]const u8 return segments[index]; } +fn decodePathSegment(allocator: std.mem.Allocator, segment: ?[]const u8) ?[]const u8 { + const raw = segment orelse return null; + if (std.mem.indexOfScalar(u8, raw, '%') == null) return raw; + + const encoded = allocator.dupe(u8, raw) catch return raw; + return std.Uri.percentDecodeInPlace(encoded); +} + fn eql(a: ?[]const u8, b: []const u8) bool { if (a) |val| return std.mem.eql(u8, val, b); return false; @@ -1581,48 +2354,6 @@ test "API: create run rejects non-positive timeout_ms" { try std.testing.expectEqual(@as(u16, 400), resp.status_code); } -test "API: create run rejects invalid wait duration string" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - var ctx = Context{ - .store = &store, - .allocator = arena.allocator(), - }; - - const body = - \\{"steps":[{"id":"w1","type":"wait","duration_ms":"abc"}]} - ; - - const resp = handleRequest(&ctx, "POST", "/runs", body); - try std.testing.expectEqual(@as(u16, 400), resp.status_code); -} - -test "API: create run rejects invalid wait signal type" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - var ctx = Context{ - .store = &store, - .allocator = arena.allocator(), - }; - - const body = - \\{"steps":[{"id":"w1","type":"wait","signal":1}]} - ; - - const resp = handleRequest(&ctx, "POST", "/runs", body); - try std.testing.expectEqual(@as(u16, 400), resp.status_code); -} - test "API: create run rejects duplicate depends_on items" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); @@ -1664,34 +2395,6 @@ test "API: get step enforces run ownership" { try std.testing.expectEqual(@as(u16, 404), resp.status_code); } -test "API: chat transcript escapes message content" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - try store.insertRun("run-chat", null, "running", "{\"steps\":[]}", "{}", "[]"); - try store.insertStep("step-chat-1", "run-chat", "chat", "group_chat", "completed", "{}", 1, null, null, null); - try store.insertChatMessage("run-chat", "step-chat-1", 1, "agent", null, "He said \"go\"\\nline"); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - var ctx = Context{ - .store = &store, - .allocator = arena.allocator(), - }; - - const resp = handleRequest(&ctx, "GET", "/runs/run-chat/steps/step-chat-1/chat", ""); - try std.testing.expectEqual(@as(u16, 200), resp.status_code); - - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, resp.body, .{}); - defer parsed.deinit(); - - try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len); - const msg = parsed.value.array.items[0].object.get("message").?; - try std.testing.expectEqualStrings("He said \"go\"\\nline", msg.string); -} - test "API: register worker rejects non-array tags" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); @@ -1772,30 +2475,6 @@ test "API: register worker rejects non-positive max_concurrent" { try std.testing.expectEqual(@as(u16, 400), resp.status_code); } -test "API: approve route does not match extra path segment" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]"); - try store.insertStep("s1", "r1", "approve-1", "approval", "waiting_approval", "{}", 1, null, null, null); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - var ctx = Context{ - .store = &store, - .allocator = arena.allocator(), - }; - - const resp = handleRequest(&ctx, "POST", "/runs/r1/steps/s1/approve/extra", ""); - try std.testing.expectEqual(@as(u16, 404), resp.status_code); - try std.testing.expect(std.mem.indexOf(u8, resp.body, "endpoint not found") != null); - - const step = (try store.getStep(arena.allocator(), "s1")).?; - try std.testing.expectEqualStrings("waiting_approval", step.status); -} - test "API: register openai_chat worker requires model" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); @@ -1942,3 +2621,246 @@ test "API: metrics endpoint returns text format" { try std.testing.expect(std.mem.startsWith(u8, resp.content_type, "text/plain")); try std.testing.expect(std.mem.indexOf(u8, resp.body, "nullboiler_http_requests_total") != null); } + +test "API: list runs supports workflow_id filter" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + try store.createWorkflowWithVersion("wf_1", "WF 1", "{\"nodes\":{},\"edges\":[]}", 1); + try store.createWorkflowWithVersion("wf_2", "WF 2", "{\"nodes\":{},\"edges\":[]}", 1); + try store.createRunWithStateAndStatus("r1", "wf_1", "{\"nodes\":{},\"edges\":[]}", "{}", "{}", "running"); + try store.createRunWithStateAndStatus("r2", "wf_2", "{\"nodes\":{},\"edges\":[]}", "{}", "{}", "running"); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + const resp = handleRequest(&ctx, "GET", "/runs?workflow_id=wf_1", ""); + try std.testing.expectEqual(@as(u16, 200), resp.status_code); + try std.testing.expect(std.mem.indexOf(u8, resp.body, "\"workflow_id\":\"wf_1\"") != null); + try std.testing.expect(std.mem.indexOf(u8, resp.body, "\"workflow_id\":\"wf_2\"") == null); +} + +test "API: replay run from checkpoint" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + // Create a run with a checkpoint + try store.createRunWithState("r1", null, "{\"nodes\":{}}", "{}", "{\"x\":1}"); + try store.updateRunStatus("r1", "completed", null); + try store.createCheckpoint("cp1", "r1", "step_a", null, "{\"x\":1}", "[\"step_a\"]", 1, null); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + const body = + \\{"from_checkpoint_id":"cp1"} + ; + + const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body); + try std.testing.expectEqual(@as(u16, 200), resp.status_code); + try std.testing.expect(std.mem.indexOf(u8, resp.body, "running") != null); + try std.testing.expect(std.mem.indexOf(u8, resp.body, "replayed_from_checkpoint") != null); + + // Verify run state was reset to checkpoint state + const run = (try store.getRun(arena.allocator(), "r1")).?; + try std.testing.expectEqualStrings("running", run.status); + if (run.state_json) |sj| { + try std.testing.expectEqualStrings("{\"x\":1}", sj); + } +} + +test "API: replay run accepts checkpoint_id alias" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + try store.createRunWithState("r1", null, "{\"nodes\":{}}", "{}", "{\"x\":1}"); + try store.updateRunStatus("r1", "completed", null); + try store.createCheckpoint("cp1", "r1", "step_a", null, "{\"x\":1}", "[\"step_a\"]", 1, null); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + const body = + \\{"checkpoint_id":"cp1"} + ; + + const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body); + try std.testing.expectEqual(@as(u16, 200), resp.status_code); + try std.testing.expect(std.mem.indexOf(u8, resp.body, "replayed_from_checkpoint") != null); +} + +test "API: replay run rejects wrong checkpoint" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + // Create two runs, checkpoint belongs to r2 + try store.createRunWithState("r1", null, "{}", "{}", "{}"); + try store.createRunWithState("r2", null, "{}", "{}", "{}"); + try store.createCheckpoint("cp_r2", "r2", "step_a", null, "{}", "[]", 1, null); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + const body = + \\{"from_checkpoint_id":"cp_r2"} + ; + + const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body); + try std.testing.expectEqual(@as(u16, 400), resp.status_code); + try std.testing.expect(std.mem.indexOf(u8, resp.body, "does not belong") != null); +} + +test "API: replay run rejects missing checkpoint" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + try store.createRunWithState("r1", null, "{}", "{}", "{}"); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + const body = + \\{"from_checkpoint_id":"nonexistent"} + ; + + const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", body); + try std.testing.expectEqual(@as(u16, 404), resp.status_code); +} + +test "API: replay run rejects missing field" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + try store.createRunWithState("r1", null, "{}", "{}", "{}"); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + const resp = handleRequest(&ctx, "POST", "/runs/r1/replay", "{}"); + try std.testing.expectEqual(@as(u16, 400), resp.status_code); +} + +test "API: stream with mode query param" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + try store.createRunWithState("r1", null, "{}", "{}", "{\"x\":1}"); + try store.updateRunStatus("r1", "running", null); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + // Default (no mode param) — should succeed + const resp1 = handleRequest(&ctx, "GET", "/runs/r1/stream", ""); + try std.testing.expectEqual(@as(u16, 200), resp1.status_code); + try std.testing.expect(std.mem.indexOf(u8, resp1.body, "stream_events") != null); + + // With specific modes + const resp2 = handleRequest(&ctx, "GET", "/runs/r1/stream?mode=values,debug", ""); + try std.testing.expectEqual(@as(u16, 200), resp2.status_code); + try std.testing.expect(std.mem.indexOf(u8, resp2.body, "stream_events") != null); +} + +test "API: stream supports independent cursors for multiple consumers" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + var hub = sse_mod.SseHub.init(allocator); + defer hub.deinit(); + + try store.createRunWithState("r1", null, "{}", "{}", "{\"x\":1}"); + try store.updateRunStatus("r1", "running", null); + + const queue = hub.getOrCreateQueue("r1"); + queue.push(.{ .event_type = "values", .data = "{\"step\":\"n1\"}", .mode = .values }); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + .sse_hub = &hub, + }; + + const consumer_a = handleRequest(&ctx, "GET", "/runs/r1/stream", ""); + try std.testing.expectEqual(@as(u16, 200), consumer_a.status_code); + try std.testing.expect(std.mem.indexOf(u8, consumer_a.body, "\"seq\":1") != null); + + const consumer_b = handleRequest(&ctx, "GET", "/runs/r1/stream", ""); + try std.testing.expectEqual(@as(u16, 200), consumer_b.status_code); + try std.testing.expect(std.mem.indexOf(u8, consumer_b.body, "\"seq\":1") != null); + + queue.push(.{ .event_type = "updates", .data = "{\"step\":\"n2\"}", .mode = .updates }); + const consumer_a_next = handleRequest(&ctx, "GET", "/runs/r1/stream?after_seq=1", ""); + try std.testing.expectEqual(@as(u16, 200), consumer_a_next.status_code); + try std.testing.expect(std.mem.indexOf(u8, consumer_a_next.body, "\"seq\":2") != null); + try std.testing.expect(std.mem.indexOf(u8, consumer_a_next.body, "\"events\":[]") != null); + try std.testing.expect(std.mem.indexOf(u8, consumer_a_next.body, "\"next_stream_seq\":2") != null); +} + +test "API: workflow routes decode percent-encoded ids" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + try store.createWorkflowWithVersion("wf/alpha beta", "Encoded Workflow", "{\"nodes\":{},\"edges\":[]}", 1); + + var ctx = Context{ + .store = &store, + .allocator = arena.allocator(), + }; + + const get_resp = handleRequest(&ctx, "GET", "/workflows/wf%2Falpha%20beta", ""); + try std.testing.expectEqual(@as(u16, 200), get_resp.status_code); + try std.testing.expect(std.mem.indexOf(u8, get_resp.body, "\"id\":\"wf/alpha beta\"") != null); + + const validate_resp = handleRequest(&ctx, "POST", "/workflows/wf%2Falpha%20beta/validate", ""); + try std.testing.expectEqual(@as(u16, 200), validate_resp.status_code); +} diff --git a/src/config.zig b/src/config.zig index 430f5b2..0d3406e 100644 --- a/src/config.zig +++ b/src/config.zig @@ -77,6 +77,7 @@ pub const Config = struct { port: u16 = 8080, db: []const u8 = "nullboiler.db", api_token: ?[]const u8 = null, + self_url: ?[]const u8 = null, strategies_dir: []const u8 = "strategies", workers: []const WorkerConfig = &.{}, engine: EngineConfig = .{}, diff --git a/src/dispatch.zig b/src/dispatch.zig index c1ccdbb..65fdadd 100644 --- a/src/dispatch.zig +++ b/src/dispatch.zig @@ -14,7 +14,7 @@ pub const WorkerInfo = struct { id: []const u8, url: []const u8, token: []const u8, - protocol: []const u8 = "webhook", // "webhook", "api_chat", "openai_chat" + protocol: []const u8 = "webhook", // "webhook", "api_chat", "openai_chat", "a2a" model: ?[]const u8 = null, tags_json: []const u8, // JSON array like ["coder","researcher"] max_concurrent: i64, @@ -84,6 +84,23 @@ fn workerMatchesTags( return false; } +// ── Agent Step Options ──────────────────────────────────────────────── + +/// Extra fields included in the webhook body when step type is "agent". +pub const AgentOpts = struct { + /// "autonomous" or "managed" + mode: ?[]const u8 = null, + /// Full callback URL for agent events; if null, omitted from body. + /// Typically constructed as: self_url + "/internal/agent-events/{run_id}/{step_id}" + callback_url: ?[]const u8 = null, + /// Maximum agent iterations; if null, omitted from body. + max_iterations: ?i64 = null, + /// JSON array of tool names, e.g. "[\"search\",\"code\"]"; if null, omitted from body. + tools_json: ?[]const u8 = null, + /// Current state JSON to pass to the agent; if null, omitted from body. + state_json: ?[]const u8 = null, +}; + // ── HTTP Dispatch ───────────────────────────────────────────────────── pub fn dispatchStep( @@ -95,6 +112,24 @@ pub fn dispatchStep( run_id: []const u8, step_id: []const u8, rendered_prompt: []const u8, +) !DispatchResult { + return dispatchStepWithOpts(allocator, worker_url, worker_token, worker_protocol_raw, worker_model, run_id, step_id, rendered_prompt, null); +} + +/// Like dispatchStep but also accepts optional agent-specific fields. +/// When agent_opts is non-null and the protocol is webhook, the additional +/// fields (mode, callback_url, max_iterations, tools, state) are merged +/// into the request body. +pub fn dispatchStepWithOpts( + allocator: std.mem.Allocator, + worker_url: []const u8, + worker_token: []const u8, + worker_protocol_raw: []const u8, + worker_model: ?[]const u8, + run_id: []const u8, + step_id: []const u8, + rendered_prompt: []const u8, + agent_opts: ?AgentOpts, ) !DispatchResult { const protocol = worker_protocol.parse(worker_protocol_raw) orelse { const err_msg = try std.fmt.allocPrint(allocator, "unsupported worker protocol: {s}", .{worker_protocol_raw}); @@ -131,6 +166,7 @@ pub fn dispatchStep( run_id, step_id, rendered_prompt, + agent_opts, ) catch |err| switch (err) { error.MissingWorkerModel => { return DispatchResult{ @@ -195,6 +231,12 @@ pub fn dispatchStep( } const response_data = response_body.written(); + + // A2A uses JSON-RPC 2.0 responses; parse them with the A2A-specific parser + if (protocol == .a2a) { + return try parseA2aResponse(allocator, response_data); + } + return try worker_response.parse(allocator, response_data); } @@ -205,7 +247,7 @@ pub fn probeWorker( ) bool { const protocol = worker_protocol.parse(worker_protocol_raw) orelse return false; - // Async protocols (mqtt/redis_stream) can't be probed via HTTP + // Async protocols (mqtt/redis_stream) can't be probed via HTTP; a2a is probed via its own endpoint if (protocol == .mqtt or protocol == .redis_stream) return true; const url = worker_protocol.buildRequestUrl(allocator, worker_url, protocol) catch return false; @@ -234,12 +276,18 @@ fn buildRequestBody( run_id: []const u8, step_id: []const u8, rendered_prompt: []const u8, + agent_opts: ?AgentOpts, ) ![]const u8 { const session_key = try std.fmt.allocPrint(allocator, "run_{s}_step_{s}", .{ run_id, step_id }); defer allocator.free(session_key); switch (protocol) { .webhook => { + // For agent steps with opts, build an extended body that includes + // agent-specific fields alongside the standard webhook fields. + if (agent_opts) |opts| { + return buildWebhookAgentBody(allocator, session_key, rendered_prompt, opts); + } return std.json.Stringify.valueAlloc(allocator, .{ .message = rendered_prompt, .text = rendered_prompt, @@ -267,6 +315,9 @@ fn buildRequestBody( .messages = messages[0..], }, .{}); }, + .a2a => { + return buildA2aRequestBody(allocator, rendered_prompt, session_key); + }, .mqtt, .redis_stream => { // MQTT and Redis Stream use async dispatch; body built by their respective clients return std.json.Stringify.valueAlloc(allocator, .{ @@ -277,6 +328,247 @@ fn buildRequestBody( } } +/// Build the webhook JSON body for an agent step, merging standard fields with +/// agent-specific optional fields (mode, callback_url, max_iterations, tools, state). +/// Only non-null fields from agent_opts are included in the output. +fn buildWebhookAgentBody( + allocator: std.mem.Allocator, + session_key: []const u8, + rendered_prompt: []const u8, + opts: AgentOpts, +) ![]const u8 { + var buf: std.ArrayListUnmanaged(u8) = .empty; + errdefer buf.deinit(allocator); + + // Standard webhook fields + try buf.appendSlice(allocator, "{\"message\":"); + try appendJsonString(&buf, allocator, rendered_prompt); + try buf.appendSlice(allocator, ",\"text\":"); + try appendJsonString(&buf, allocator, rendered_prompt); + try buf.appendSlice(allocator, ",\"session_key\":"); + try appendJsonString(&buf, allocator, session_key); + try buf.appendSlice(allocator, ",\"session_id\":"); + try appendJsonString(&buf, allocator, session_key); + + // Optional agent fields + if (opts.mode) |mode| { + try buf.appendSlice(allocator, ",\"mode\":"); + try appendJsonString(&buf, allocator, mode); + } + if (opts.callback_url) |cb_url| { + try buf.appendSlice(allocator, ",\"callback_url\":"); + try appendJsonString(&buf, allocator, cb_url); + } + if (opts.max_iterations) |max_iter| { + const field = try std.fmt.allocPrint(allocator, ",\"max_iterations\":{d}", .{max_iter}); + defer allocator.free(field); + try buf.appendSlice(allocator, field); + } + if (opts.tools_json) |tools| { + // tools_json is already a JSON array string — embed it verbatim + try buf.appendSlice(allocator, ",\"tools\":"); + try buf.appendSlice(allocator, tools); + } + if (opts.state_json) |state| { + // state_json is already a JSON object/value — embed it verbatim + try buf.appendSlice(allocator, ",\"state\":"); + try buf.appendSlice(allocator, state); + } + + try buf.append(allocator, '}'); + + return buf.toOwnedSlice(allocator); +} + +/// Append a JSON-encoded string (with surrounding quotes and escapes) to buf. +fn appendJsonString(buf: *std.ArrayListUnmanaged(u8), allocator: std.mem.Allocator, s: []const u8) !void { + try buf.append(allocator, '"'); + for (s) |byte| { + switch (byte) { + '"' => try buf.appendSlice(allocator, "\\\""), + '\\' => try buf.appendSlice(allocator, "\\\\"), + '\n' => try buf.appendSlice(allocator, "\\n"), + '\r' => try buf.appendSlice(allocator, "\\r"), + '\t' => try buf.appendSlice(allocator, "\\t"), + 0x00...0x08, 0x0b, 0x0c, 0x0e...0x1f => { + const escaped = try std.fmt.allocPrint(allocator, "\\u{x:0>4}", .{byte}); + defer allocator.free(escaped); + try buf.appendSlice(allocator, escaped); + }, + else => try buf.append(allocator, byte), + } + } + try buf.append(allocator, '"'); +} + +// ── A2A Protocol Support ────────────────────────────────────────────── + +/// Build an A2A (Agent-to-Agent) JSON-RPC 2.0 request body using tasks/send. +/// The context_id provides session persistence — same context_id means same conversation. +fn buildA2aRequestBody( + allocator: std.mem.Allocator, + prompt: []const u8, + context_id: []const u8, +) ![]const u8 { + // Build the parts array + const parts = [_]struct { + type: []const u8, + text: []const u8, + }{ + .{ .type = "text", .text = prompt }, + }; + + // Build the message + const message = .{ + .role = "user", + .parts = parts[0..], + }; + + // Build the params + const params = .{ + .message = message, + .contextId = context_id, + }; + + // Build the full JSON-RPC request + return std.json.Stringify.valueAlloc(allocator, .{ + .jsonrpc = "2.0", + .id = context_id, + .method = "tasks/send", + .params = params, + }, .{}); +} + +/// Parse an A2A JSON-RPC 2.0 response and extract the text from the first artifact. +/// Expected structure: result.artifacts[0].parts[0].text (or .kind=="text") +/// Also checks for JSON-RPC error responses. +fn parseA2aResponse(allocator: std.mem.Allocator, response_body: []const u8) !DispatchResult { + const parsed = std.json.parseFromSlice(std.json.Value, allocator, response_body, .{}) catch { + return DispatchResult{ + .output = "", + .success = false, + .error_text = "A2A: invalid JSON response", + }; + }; + defer parsed.deinit(); + + if (parsed.value != .object) { + return DispatchResult{ + .output = "", + .success = false, + .error_text = "A2A: response is not a JSON object", + }; + } + const obj = parsed.value.object; + + // Check for JSON-RPC error + if (obj.get("error")) |err_val| { + if (err_val == .object) { + if (err_val.object.get("message")) |msg_val| { + if (msg_val == .string) { + return DispatchResult{ + .output = "", + .success = false, + .error_text = try allocator.dupe(u8, msg_val.string), + }; + } + } + } + return DispatchResult{ + .output = "", + .success = false, + .error_text = "A2A: JSON-RPC error", + }; + } + + // Extract result + const result_val = obj.get("result") orelse { + return DispatchResult{ + .output = "", + .success = false, + .error_text = "A2A: missing result field", + }; + }; + if (result_val != .object) { + return DispatchResult{ + .output = "", + .success = false, + .error_text = "A2A: result is not an object", + }; + } + const result_obj = result_val.object; + + // Check task status + if (result_obj.get("status")) |status_val| { + if (status_val == .object) { + if (status_val.object.get("state")) |state_val| { + if (state_val == .string) { + if (std.mem.eql(u8, state_val.string, "failed")) { + // Extract error message from status if available + if (status_val.object.get("message")) |msg| { + if (msg == .object) { + if (msg.object.get("parts")) |msg_parts| { + if (msg_parts == .array and msg_parts.array.items.len > 0) { + const first_part = msg_parts.array.items[0]; + if (first_part == .object) { + if (first_part.object.get("text")) |t| { + if (t == .string) { + return DispatchResult{ + .output = "", + .success = false, + .error_text = try allocator.dupe(u8, t.string), + }; + } + } + } + } + } + } + } + return DispatchResult{ + .output = "", + .success = false, + .error_text = "A2A: task failed", + }; + } + } + } + } + } + + // Extract text from artifacts[0].parts[0].text + if (result_obj.get("artifacts")) |artifacts_val| { + if (artifacts_val == .array and artifacts_val.array.items.len > 0) { + const first_artifact = artifacts_val.array.items[0]; + if (first_artifact == .object) { + if (first_artifact.object.get("parts")) |parts_val| { + if (parts_val == .array and parts_val.array.items.len > 0) { + const first_part = parts_val.array.items[0]; + if (first_part == .object) { + // Check for "text" field (A2A uses "text" key for text parts) + if (first_part.object.get("text")) |text_val| { + if (text_val == .string) { + return DispatchResult{ + .output = try allocator.dupe(u8, text_val.string), + .success = true, + .error_text = null, + }; + } + } + } + } + } + } + } + } + + return DispatchResult{ + .output = "", + .success = false, + .error_text = "A2A: no text found in artifacts", + }; +} + /// Build the wire-format JSON body for async (MQTT/Redis) dispatch. /// Includes correlation_id, reply_to topic/stream, timestamp, auth token, /// the rendered prompt, and a session_key matching the correlation_id. @@ -623,8 +915,54 @@ test "buildRequestBody: openai_chat requires model" { const allocator = std.testing.allocator; try std.testing.expectError( error.MissingWorkerModel, - buildRequestBody(allocator, .openai_chat, null, "run-1", "step-1", "hello"), + buildRequestBody(allocator, .openai_chat, null, "run-1", "step-1", "hello", null), + ); +} + +test "buildWebhookAgentBody: includes all agent fields when present" { + const allocator = std.testing.allocator; + const opts = AgentOpts{ + .mode = "autonomous", + .callback_url = "http://localhost:8080/internal/agent-events/run-1/step-1", + .max_iterations = 25, + .tools_json = "[\"search\",\"code\"]", + .state_json = "{\"foo\":\"bar\"}", + }; + const body = try buildRequestBody(allocator, .webhook, null, "run-1", "step-1", "do something", opts); + defer allocator.free(body); + + const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{}); + defer parsed.deinit(); + const obj = parsed.value.object; + + try std.testing.expectEqualStrings("do something", obj.get("message").?.string); + try std.testing.expectEqualStrings("autonomous", obj.get("mode").?.string); + try std.testing.expectEqualStrings( + "http://localhost:8080/internal/agent-events/run-1/step-1", + obj.get("callback_url").?.string, ); + try std.testing.expectEqual(@as(i64, 25), obj.get("max_iterations").?.integer); + // tools and state are embedded JSON — check they round-trip + const tools_arr = obj.get("tools").?.array; + try std.testing.expectEqual(@as(usize, 2), tools_arr.items.len); + try std.testing.expectEqualStrings("search", tools_arr.items[0].string); +} + +test "buildWebhookAgentBody: omits null agent fields" { + const allocator = std.testing.allocator; + const opts = AgentOpts{ .mode = "managed" }; + const body = try buildRequestBody(allocator, .webhook, null, "run-1", "step-1", "hello", opts); + defer allocator.free(body); + + const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{}); + defer parsed.deinit(); + const obj = parsed.value.object; + + try std.testing.expectEqualStrings("managed", obj.get("mode").?.string); + try std.testing.expect(obj.get("callback_url") == null); + try std.testing.expect(obj.get("max_iterations") == null); + try std.testing.expect(obj.get("tools") == null); + try std.testing.expect(obj.get("state") == null); } test "buildAsyncRequestBody: produces valid wire-format JSON with all fields" { @@ -703,3 +1041,92 @@ test "dispatchRedis: invalid URL returns error" { try std.testing.expect(!result.success); try std.testing.expectEqualStrings("invalid redis:// URL", result.error_text.?); } + +test "buildA2aRequestBody: produces valid JSON-RPC 2.0 request" { + const allocator = std.testing.allocator; + const body = try buildA2aRequestBody(allocator, "Fix the bug in main.py", "run_abc_step_fix"); + defer allocator.free(body); + + const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{}); + defer parsed.deinit(); + const obj = parsed.value.object; + + try std.testing.expectEqualStrings("2.0", obj.get("jsonrpc").?.string); + try std.testing.expectEqualStrings("run_abc_step_fix", obj.get("id").?.string); + try std.testing.expectEqualStrings("tasks/send", obj.get("method").?.string); + + const params = obj.get("params").?.object; + try std.testing.expectEqualStrings("run_abc_step_fix", params.get("contextId").?.string); + + const message = params.get("message").?.object; + try std.testing.expectEqualStrings("user", message.get("role").?.string); + + const parts = message.get("parts").?.array; + try std.testing.expectEqual(@as(usize, 1), parts.items.len); + try std.testing.expectEqualStrings("text", parts.items[0].object.get("type").?.string); + try std.testing.expectEqualStrings("Fix the bug in main.py", parts.items[0].object.get("text").?.string); +} + +test "buildRequestBody: a2a protocol produces JSON-RPC body" { + const allocator = std.testing.allocator; + const body = try buildRequestBody(allocator, .a2a, null, "run-1", "step-1", "hello agent", null); + defer allocator.free(body); + + const parsed = try std.json.parseFromSlice(std.json.Value, allocator, body, .{}); + defer parsed.deinit(); + const obj = parsed.value.object; + + try std.testing.expectEqualStrings("2.0", obj.get("jsonrpc").?.string); + try std.testing.expectEqualStrings("tasks/send", obj.get("method").?.string); + // context_id is "run_{run_id}_step_{step_id}" + try std.testing.expectEqualStrings("run_run-1_step_step-1", obj.get("id").?.string); +} + +test "parseA2aResponse: extracts text from successful response" { + const allocator = std.testing.allocator; + const response = + \\{"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","contextId":"ctx-1","status":{"state":"completed","timestamp":"2025-01-01T00:00:00Z"},"artifacts":[{"artifactId":"a1","parts":[{"kind":"text","text":"The bug has been fixed."}]}]}} + ; + const result = try parseA2aResponse(allocator, response); + defer allocator.free(result.output); + try std.testing.expect(result.success); + try std.testing.expectEqualStrings("The bug has been fixed.", result.output); +} + +test "parseA2aResponse: handles JSON-RPC error" { + const allocator = std.testing.allocator; + const response = + \\{"jsonrpc":"2.0","id":"req-1","error":{"code":-32600,"message":"Invalid Request"}} + ; + const result = try parseA2aResponse(allocator, response); + defer allocator.free(result.error_text.?); + try std.testing.expect(!result.success); + try std.testing.expectEqualStrings("Invalid Request", result.error_text.?); +} + +test "parseA2aResponse: handles failed task status" { + const allocator = std.testing.allocator; + const response = + \\{"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","status":{"state":"failed"}}} + ; + const result = try parseA2aResponse(allocator, response); + try std.testing.expect(!result.success); + try std.testing.expectEqualStrings("A2A: task failed", result.error_text.?); +} + +test "parseA2aResponse: handles missing artifacts" { + const allocator = std.testing.allocator; + const response = + \\{"jsonrpc":"2.0","id":"req-1","result":{"id":"task-1","status":{"state":"completed"}}} + ; + const result = try parseA2aResponse(allocator, response); + try std.testing.expect(!result.success); + try std.testing.expectEqualStrings("A2A: no text found in artifacts", result.error_text.?); +} + +test "parseA2aResponse: handles invalid JSON" { + const allocator = std.testing.allocator; + const result = try parseA2aResponse(allocator, "not json"); + try std.testing.expect(!result.success); + try std.testing.expectEqualStrings("A2A: invalid JSON response", result.error_text.?); +} diff --git a/src/engine.zig b/src/engine.zig index c3b3baa..e2727c4 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -1,15 +1,30 @@ -/// DAG Engine — Scheduler Loop +/// DAG Engine — Unified State Model Scheduler /// /// The engine runs on its own thread, polling the database for active runs -/// and processing their steps according to the DAG dependencies. +/// and processing them using a graph-based state model with 7 node types: +/// task, route, interrupt, agent, send, transform, subgraph /// /// Each tick: -/// 1. Get active runs -/// 2. For each run, promote pending steps to ready -/// 3. Process ready steps by type (task, fan_out, map, reduce, condition, approval) -/// 4. Check run completion +/// 1. Get active runs (status = running) +/// 2. For each run: +/// a. Load current state from run.state_json +/// b. Load workflow definition from run.workflow_json +/// c. Get completed nodes from latest checkpoint (or []) +/// d. Find ready nodes (all nodes whose inbound edges are satisfied) +/// e. Execute ready nodes in sequence +/// f. Apply state updates via reducers, save checkpoint +/// g. Check termination / deadlock +/// +/// Features: +/// - Command primitive (goto): worker responses can contain "goto" to override routing +/// - Breakpoints: interrupt_before / interrupt_after arrays in workflow definition +/// - Subgraph: inline execution of child workflows with input/output mapping +/// - Multi-turn: agent nodes can loop with continuation_prompt up to max_turns +/// - Configurable runs: config stored as state.__config, accessible via templates +/// - Reconciliation: check nulltickets task status between steps const std = @import("std"); const log = std.log.scoped(.engine); +const json = std.json; const Store = @import("store.zig").Store; const types = @import("types.zig"); @@ -19,6 +34,110 @@ const dispatch = @import("dispatch.zig"); const callbacks = @import("callbacks.zig"); const metrics_mod = @import("metrics.zig"); const async_dispatch = @import("async_dispatch.zig"); +const state_mod = @import("state.zig"); +const sse_mod = @import("sse.zig"); +const tracker_client = @import("tracker_client.zig"); +const workflow_loader = @import("workflow_loader.zig"); + +// ── Structured Events ──────────────────────────────────────────────── + +pub const OrchestratorEvent = struct { + event_type: EventType, + run_id: ?[]const u8, + step_id: ?[]const u8, + node_name: ?[]const u8, + timestamp_ms: i64, + metadata_json: ?[]const u8, + + pub const EventType = enum { + run_started, + run_completed, + run_failed, + run_interrupted, + run_cancelled, + step_started, + step_completed, + step_failed, + step_retrying, + agent_turn_started, + agent_turn_completed, + workflow_reloaded, + checkpoint_created, + state_injected, + }; + + pub fn eventKindString(et: EventType) []const u8 { + return switch (et) { + .run_started => "run.started", + .run_completed => "run.completed", + .run_failed => "run.failed", + .run_interrupted => "run.interrupted", + .run_cancelled => "run.cancelled", + .step_started => "step.started", + .step_completed => "step.completed", + .step_failed => "step.failed", + .step_retrying => "step.retrying", + .agent_turn_started => "agent_turn.started", + .agent_turn_completed => "agent_turn.completed", + .workflow_reloaded => "workflow.reloaded", + .checkpoint_created => "checkpoint.created", + .state_injected => "state.injected", + }; + } + + pub fn toJson(self: OrchestratorEvent, alloc: std.mem.Allocator) ?[]const u8 { + return std.fmt.allocPrint(alloc, + \\{{"event_type":"{s}","run_id":"{s}","step_id":"{s}","node_name":"{s}","timestamp_ms":{d}}} + , .{ + eventKindString(self.event_type), + self.run_id orelse "", + self.step_id orelse "", + self.node_name orelse "", + self.timestamp_ms, + }) catch null; + } +}; + +// ── Constants ──────────────────────────────────────────────────────── + +/// Maximum number of node executions per tick to prevent infinite loops. +const max_nodes_per_tick: u32 = 1000; + +/// Maximum inline subgraph recursion depth. +const max_subgraph_depth: u32 = 10; + +const StoreWriter = *const fn ( + alloc: std.mem.Allocator, + base_url: []const u8, + api_token: ?[]const u8, + namespace: []const u8, + key: []const u8, + value_json: []const u8, +) anyerror!void; + +const TrackerRuntime = struct { + base_url: []const u8, + api_token: ?[]const u8, + + fn storeAccess(self: TrackerRuntime, fetcher: templates.StoreFetcher) templates.StoreAccess { + return .{ + .base_url = self.base_url, + .api_token = self.api_token, + .fetcher = fetcher, + }; + } +}; + +const RuntimeBindings = struct { + input_json: ?[]const u8, + task_id: ?[]const u8, + tracker: ?TrackerRuntime, + + fn storeAccess(self: RuntimeBindings, fetcher: templates.StoreFetcher) ?templates.StoreAccess { + const tracker = self.tracker orelse return null; + return tracker.storeAccess(fetcher); + } +}; // ── Engine ─────────────────────────────────────────────────────────── @@ -32,6 +151,14 @@ pub const RuntimeConfig = struct { retry_max_elapsed_ms: i64 = 900_000, }; +pub const RateLimitInfo = struct { + worker_id: []const u8, + remaining: i64, + limit: i64, + reset_ms: i64, + updated_at_ms: i64, +}; + pub const Engine = struct { store: *Store, allocator: std.mem.Allocator, @@ -41,11 +168,18 @@ pub const Engine = struct { next_health_check_at_ms: i64, metrics: ?*metrics_mod.Metrics, response_queue: ?*async_dispatch.ResponseQueue, - - const TaskPromptSource = union(enum) { - rendered: []const u8, - template: []const u8, - }; + sse_hub: ?*sse_mod.SseHub = null, + workflow_watcher: ?*workflow_loader.WorkflowWatcher = null, + rate_limits: std.StringHashMap(RateLimitInfo), + store_fetcher: templates.StoreFetcher, + store_writer: StoreWriter, + trusted_tracker_url: ?[]const u8 = null, + trusted_tracker_api_token: ?[]const u8 = null, + config_valid: bool = false, + last_config_check_ms: i64 = 0, + + /// How often to re-run config validation (default 30s). + const config_check_interval_ms: i64 = 30_000; pub fn init(store: *Store, allocator: std.mem.Allocator, poll_interval_ms: u64) Engine { return .{ @@ -57,6 +191,15 @@ pub const Engine = struct { .next_health_check_at_ms = 0, .metrics = null, .response_queue = null, + .sse_hub = null, + .workflow_watcher = null, + .rate_limits = std.StringHashMap(RateLimitInfo).init(allocator), + .store_fetcher = templates.fetchStoreValueHttp, + .store_writer = putStoreValueViaHttp, + .trusted_tracker_url = null, + .trusted_tracker_api_token = null, + .config_valid = false, + .last_config_check_ms = 0, }; } @@ -65,6 +208,11 @@ pub const Engine = struct { self.metrics = metrics; } + pub fn setTrustedTrackerAccess(self: *Engine, base_url: ?[]const u8, api_token: ?[]const u8) void { + self.trusted_tracker_url = base_url; + self.trusted_tracker_api_token = api_token; + } + pub fn stop(self: *Engine) void { self.running.store(false, .release); } @@ -80,6 +228,87 @@ pub const Engine = struct { log.info("engine stopped", .{}); } + // ── Config Validation ──────────────────────────────────────────── + + /// Validate that the engine configuration is healthy before dispatching + /// new work. Returns true if workers exist and the store is reachable. + /// Results are cached for config_check_interval_ms to avoid running + /// 2 DB queries (listWorkers + getActiveRuns) on every tick. + fn validateConfig(self: *Engine) bool { + const now_ms = ids.nowMs(); + if (self.config_valid and (now_ms - self.last_config_check_ms) < config_check_interval_ms) { + return true; + } + + // Check: at least one worker registered and active + var arena = std.heap.ArenaAllocator.init(self.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const workers = self.store.listWorkers(alloc) catch { + log.warn("config validation: store query failed (listWorkers)", .{}); + self.config_valid = false; + return false; + }; + + if (workers.len == 0) { + log.warn("config validation: no workers registered", .{}); + self.config_valid = false; + return false; + } + + // Check: store connection healthy (simple query) + _ = self.store.getActiveRuns(alloc) catch { + log.warn("config validation: store connection unhealthy", .{}); + self.config_valid = false; + return false; + }; + + self.config_valid = true; + self.last_config_check_ms = now_ms; + return true; + } + + // ── Structured Event Emission ──────────────────────────────────── + + /// Emit a structured OrchestratorEvent: persist to the events table and + /// broadcast via SseHub for real-time consumption. + fn emitEvent( + self: *Engine, + alloc: std.mem.Allocator, + event_type: OrchestratorEvent.EventType, + run_id: ?[]const u8, + step_id: ?[]const u8, + node_name: ?[]const u8, + metadata_json: ?[]const u8, + ) void { + const ev = OrchestratorEvent{ + .event_type = event_type, + .run_id = run_id, + .step_id = step_id, + .node_name = node_name, + .timestamp_ms = ids.nowMs(), + .metadata_json = metadata_json, + }; + + const kind = OrchestratorEvent.eventKindString(event_type); + const data = ev.toJson(alloc) orelse "{}"; + + // Persist to events table + if (run_id) |rid| { + self.store.insertEvent(rid, step_id, kind, data) catch |err| { + log.warn("failed to persist event {s}: {}", .{ kind, err }); + }; + } + + // Broadcast via SSE + if (self.sse_hub) |hub| { + if (run_id) |rid| { + hub.broadcast(rid, .{ .event_type = kind, .data = data }); + } + } + } + // ── tick — single scheduler iteration ──────────────────────────── fn tick(self: *Engine) !void { @@ -87,6 +316,17 @@ pub const Engine = struct { defer arena.deinit(); const alloc = arena.allocator(); + // Validate config before processing — skip dispatch if unhealthy + if (!self.validateConfig()) { + log.warn("config validation failed, skipping dispatch this tick", .{}); + return; + } + + // Check for hot-reloaded workflow files + if (self.workflow_watcher) |watcher| { + watcher.checkForChanges(); + } + const now_ms = ids.nowMs(); if (now_ms >= self.next_health_check_at_ms) { self.runWorkerHealthChecks(alloc, now_ms) catch |err| { @@ -137,693 +377,824 @@ pub const Engine = struct { } } - // ── processRun ─────────────────────────────────────────────────── + // ── processRun — state-based graph execution ───────────────────── fn processRun(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow) !void { - // 1. Get all steps for this run - const steps = try self.store.getStepsByRun(alloc, run_row.id); - - // 2. Promote pending -> ready: for each pending step, check if - // all its deps are completed/skipped. - for (steps) |step| { - if (!std.mem.eql(u8, step.status, "pending")) continue; - - const dep_ids = try self.store.getStepDeps(alloc, step.id); - var all_deps_met = true; - - for (dep_ids) |dep_id| { - // Find the dep step status from our already-fetched steps - const dep_status = findStepStatus(steps, dep_id); - if (dep_status) |ds| { - if (!std.mem.eql(u8, ds, "completed") and !std.mem.eql(u8, ds, "skipped")) { - all_deps_met = false; - break; + return self.processRunWithDepth(alloc, run_row, 0); + } + + /// Wrapper for inline subgraph execution. Uses anyerror to break + /// the recursive inferred-error-set cycle. + fn processRunInline(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, recursion_depth: u32) void { + self.processRunWithDepth(alloc, run_row, recursion_depth) catch |err| { + log.err("inline subgraph run {s} failed: {}", .{ run_row.id, err }); + }; + } + + fn processRunWithDepth(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, recursion_depth: u32) !void { + // 1. Load current state + var current_state = run_row.state_json orelse "{}"; + + // 1b. Inject __config into state (configurable runs) + if (run_row.config_json) |config_str| { + if (config_str.len > 0) { + const config_update = std.fmt.allocPrint(alloc, "{{\"__config\":{s}}}", .{config_str}) catch null; + if (config_update) |cu| { + // Simple merge: parse state, add __config key + const merged = state_mod.applyUpdates(alloc, current_state, cu, "{}") catch null; + if (merged) |m| { + current_state = m; + } + } + } + } + + // 2. Load and parse workflow definition once for the entire tick. + // Helper functions still accept raw JSON strings for external callers, + // but we pre-extract commonly used values here to avoid redundant parsing. + const workflow_json = run_row.workflow_json; + const wf_parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch { + log.err("failed to parse workflow_json for run {s}", .{run_row.id}); + try self.store.updateRunStatus(run_row.id, "failed", "invalid workflow JSON"); + return; + }; + const wf_root = wf_parsed.value; + + // Pre-extract schema (used many times in the loop) + const cached_schema_json = if (wf_root == .object) blk: { + if (wf_root.object.get("state_schema")) |ss| { + break :blk serializeJsonValue(alloc, ss) catch "{}"; + } + if (wf_root.object.get("schema")) |ss| { + break :blk serializeJsonValue(alloc, ss) catch "{}"; + } + break :blk "{}"; + } else "{}"; + + // 2b. Parse breakpoint lists from workflow definition + const interrupt_before = parseBreakpointListFromRoot(alloc, wf_root, "interrupt_before"); + const interrupt_after = parseBreakpointListFromRoot(alloc, wf_root, "interrupt_after"); + + // 2d. Collect deferred nodes (Gap 6) + const deferred_nodes = collectDeferredNodesFromRoot(alloc, wf_root); + + // 2c. Get task id for reconciliation. + const runtime = self.buildRuntimeBindings(alloc, workflow_json, current_state, run_row.input_json); + const task_id = runtime.task_id; + + // 3. Get completed nodes from latest checkpoint + var completed_nodes = std.StringHashMap(void).init(alloc); + var route_results = std.StringHashMap([]const u8).init(alloc); + + const latest_checkpoint = try self.store.getLatestCheckpoint(alloc, run_row.id); + if (latest_checkpoint) |cp| { + // Parse completed_nodes_json array + const cn_parsed = json.parseFromSlice(json.Value, alloc, cp.completed_nodes_json, .{}) catch null; + if (cn_parsed) |p| { + if (p.value == .array) { + for (p.value.array.items) |item| { + if (item == .string) { + try completed_nodes.put(item.string, {}); + } } - } else { - // Dep step not found — treat as unmet - all_deps_met = false; - break; } } - if (all_deps_met) { - try self.store.updateStepStatus(step.id, "ready", null, null, null, step.attempt); - log.info("promoted step {s} to ready", .{step.id}); + // Parse route results from checkpoint metadata + if (cp.metadata_json) |meta_str| { + const meta_parsed = json.parseFromSlice(json.Value, alloc, meta_str, .{}) catch null; + if (meta_parsed) |mp| { + if (mp.value == .object) { + if (mp.value.object.get("route_results")) |rr| { + if (rr == .object) { + var it = rr.object.iterator(); + while (it.next()) |entry| { + if (entry.value_ptr.* == .string) { + try route_results.put(entry.key_ptr.*, entry.value_ptr.string); + } + } + } + } + } + } } } - // 3. Re-fetch steps to get updated statuses - const updated_steps = try self.store.getStepsByRun(alloc, run_row.id); + var version: i64 = if (latest_checkpoint) |cp| cp.version else 0; + const initial_version = version; - // 4. Process ready steps based on their type - for (updated_steps) |step| { - if (!std.mem.eql(u8, step.status, "ready")) continue; + // Track the latest checkpoint ID for correct parent chaining. + // Updated after each checkpoint creation so subsequent checkpoints + // within the same tick correctly chain to their predecessor. + var latest_checkpoint_id: ?[]const u8 = if (latest_checkpoint) |cp| cp.id else null; - if (std.mem.eql(u8, step.type, "task")) { - self.executeTaskStep(alloc, run_row, step) catch |err| { - log.err("error executing task step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "fan_out")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeFanOutStep(alloc, run_row, step) catch |err| { - log.err("error executing fan_out step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "map")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeMapStep(alloc, run_row, step) catch |err| { - log.err("error executing map step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "reduce")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeReduceStep(alloc, run_row, step, updated_steps) catch |err| { - log.err("error executing reduce step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "condition")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeConditionStep(alloc, run_row, step, updated_steps) catch |err| { - log.err("error executing condition step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "approval")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeApprovalStep(alloc, run_row, step) catch |err| { - log.err("error executing approval step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "transform")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeTransformStep(alloc, run_row, step) catch |err| { - log.err("error executing transform step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "wait")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeWaitStep(alloc, run_row, step) catch |err| { - log.err("error executing wait step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "router")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeRouterStep(alloc, run_row, step, updated_steps) catch |err| { - log.err("error executing router step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "loop")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeLoopStep(alloc, run_row, step) catch |err| { - log.err("error executing loop step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "sub_workflow")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeSubWorkflowStep(alloc, run_row, step) catch |err| { - log.err("error executing sub_workflow step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "debate")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeDebateStep(alloc, run_row, step) catch |err| { - log.err("error executing debate step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "group_chat")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeGroupChatStep(alloc, run_row, step) catch |err| { - log.err("error executing group_chat step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "saga")) { - const claimed = self.store.claimReadyStep(step.id, null, ids.nowMs()) catch false; - if (!claimed) continue; - self.executeSagaStep(alloc, run_row, step) catch |err| { - log.err("error executing saga step {s}: {}", .{ step.id, err }); - }; - } else { - log.warn("unknown step type {s} for step {s}", .{ step.type, step.id }); - } + // Emit run_started only on the first tick (no prior checkpoints) + if (latest_checkpoint == null) { + self.emitEvent(alloc, .run_started, run_row.id, null, null, null); } - // 4b. Check running steps that need tick-based polling - for (updated_steps) |step| { - if (!std.mem.eql(u8, step.status, "running")) continue; - if (std.mem.eql(u8, step.type, "wait")) { - self.executeWaitStep(alloc, run_row, step) catch |err| { - log.err("error polling wait step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "loop")) { - self.pollRunningLoopStep(alloc, run_row, step) catch |err| { - log.err("error polling loop step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "sub_workflow")) { - self.pollRunningSubWorkflowStep(alloc, run_row, step) catch |err| { - log.err("error polling sub_workflow step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "debate")) { - self.pollRunningDebateStep(alloc, run_row, step) catch |err| { - log.err("error polling debate step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "group_chat")) { - self.pollRunningGroupChatStep(alloc, run_row, step) catch |err| { - log.err("error polling group_chat step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "saga")) { - self.pollRunningSagaStep(alloc, run_row, step) catch |err| { - log.err("error polling saga step {s}: {}", .{ step.id, err }); - }; - } else if (std.mem.eql(u8, step.type, "task")) { - self.pollAsyncTaskStep(alloc, run_row, step) catch |err| { - log.err("error polling async task step {s}: {}", .{ step.id, err }); - }; + // 3b. Workflow version migration check + const wf_version = getWorkflowVersion(alloc, workflow_json); + if (latest_checkpoint) |cp| { + const cp_version = getCheckpointWorkflowVersion(alloc, cp.metadata_json); + if (cp_version != wf_version) { + log.warn("workflow version changed from {d} to {d}, attempting migration", .{ cp_version, wf_version }); + // Filter completed_nodes to only include nodes that still exist + _ = migrateCompletedNodes(alloc, &completed_nodes, workflow_json); } } - // 5. Check run completion - try self.checkRunCompletion(run_row.id, alloc); - } + // 4. Main execution loop: find ready nodes, execute, repeat + var running_state: []const u8 = try alloc.dupe(u8, current_state); + var max_iterations: u32 = max_nodes_per_tick; + var goto_ready: ?[]const []const u8 = null; // goto override from command primitive - // ── executeTaskStep ────────────────────────────────────────────── + while (max_iterations > 0) : (max_iterations -= 1) { + // Use goto override if set, otherwise find ready nodes normally + const all_ready_nodes = if (goto_ready) |gr| blk: { + goto_ready = null; + break :blk gr; + } else try findReadyNodesFromRoot(alloc, wf_root, &completed_nodes, &route_results); - fn executeTaskStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - if (step.next_attempt_at_ms) |next_attempt| { - if (ids.nowMs() < next_attempt) return; - } + // Gap 6: Filter out deferred nodes from ready list (execute them later) + var ready_list: std.ArrayListUnmanaged([]const u8) = .empty; + for (all_ready_nodes) |name| { + if (!isInBreakpointList(name, deferred_nodes)) { + try ready_list.append(alloc, name); + } + } + const ready_nodes = ready_list.items; + if (ready_nodes.len == 0) { + // Check termination: if all paths reached __end__ + if (completed_nodes.get("__end__") != null) { + // Save final state if we made progress + if (version > initial_version) { + try self.store.updateRunState(run_row.id, running_state); + } + try self.store.updateRunStatus(run_row.id, "completed", null); + try self.store.insertEvent(run_row.id, null, "run.completed", "{}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.completed", run_row.id, null, "{}", self.metrics); + log.info("run {s} completed", .{run_row.id}); + return; + } + // Deadlock: no ready nodes and not done + if (completed_nodes.count() > 0) { + // Check if any step is still running asynchronously + const steps = try self.store.getStepsByRun(alloc, run_row.id); + var has_running = false; + for (steps) |step| { + if (std.mem.eql(u8, step.status, "running")) { + has_running = true; + break; + } + } + if (has_running) { + for (steps) |step| { + if (std.mem.eql(u8, step.status, "running")) { + self.pollAsyncTaskStep(alloc, run_row, step) catch |err| { + log.err("error polling async step {s}: {}", .{ step.id, err }); + }; + } + } + return; + } + log.err("run {s} deadlocked: no ready nodes, not completed", .{run_row.id}); + try self.store.updateRunStatus(run_row.id, "failed", "deadlock: no ready nodes"); + try self.store.insertEvent(run_row.id, null, "run.failed", "{\"reason\":\"deadlock\"}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics); + } + return; + } - // 1. Resolve prompt source for this task step. - const prompt_source = try self.resolveTaskPromptSource(alloc, run_row, step) orelse { - log.warn("no prompt_template for step {s}", .{step.def_step_id}); - return; - }; + // 5. Execute ready nodes sequentially + var made_progress = false; + var goto_override: ?[]const []const u8 = null; + + for (ready_nodes) |node_name| { + if (std.mem.eql(u8, node_name, "__end__")) { + // Gap 6: Execute deferred nodes before completing + for (deferred_nodes) |deferred_name| { + if (completed_nodes.get(deferred_name) != null) continue; + + const def_node_json = getNodeJsonFromRoot(alloc, wf_root, deferred_name) orelse continue; + const def_node_type = getNodeField(alloc, def_node_json, "type") orelse "task"; + + if (std.mem.eql(u8, def_node_type, "transform")) { + const def_updates = getNodeField(alloc, def_node_json, "updates") orelse "{}"; + const def_schema = cached_schema_json; + const def_new_state = state_mod.applyUpdates(alloc, running_state, def_updates, def_schema) catch running_state; + running_state = def_new_state; + } else if (std.mem.eql(u8, def_node_type, "task") or std.mem.eql(u8, def_node_type, "agent")) { + const def_result = self.executeTaskNode(alloc, run_row, runtime, deferred_name, def_node_json, running_state) catch continue; + switch (def_result) { + .completed => |cr| { + if (cr.state_updates) |updates| { + const def_schema = cached_schema_json; + const def_new_state = state_mod.applyUpdates(alloc, running_state, updates, def_schema) catch running_state; + running_state = def_new_state; + } + }, + else => {}, + } + } - // 2. Build final prompt. - const rendered_prompt = switch (prompt_source) { - .rendered => |prompt| prompt, - .template => |prompt_template| blk: { - const ctx = try buildTemplateContext(alloc, run_row, step, self.store); - break :blk templates.render(alloc, prompt_template, ctx) catch |err| { - log.err("template render failed for step {s}: {}", .{ step.id, err }); - try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); + try completed_nodes.put(try alloc.dupe(u8, deferred_name), {}); + log.info("deferred node {s} completed for run {s}", .{ deferred_name, run_row.id }); + } + + // Mark __end__ as completed + try completed_nodes.put("__end__", {}); + version += 1; + + // Save checkpoint + const cp_id_buf = ids.generateId(); + const cp_id = try alloc.dupe(u8, &cp_id_buf); + const cn_json = try serializeCompletedNodes(alloc, &completed_nodes); + const parent_id: ?[]const u8 = latest_checkpoint_id; + const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version); + try self.store.createCheckpoint(cp_id, run_row.id, "__end__", parent_id, running_state, cn_json, version, meta_json); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = cp_id; + + // Run is completed + try self.store.updateRunStatus(run_row.id, "completed", null); + try self.store.insertEvent(run_row.id, null, "run.completed", "{}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.completed", run_row.id, null, "{}", self.metrics); + log.info("run {s} completed", .{run_row.id}); + return; + } + + // Breakpoint: interrupt_before check + if (isInBreakpointList(node_name, interrupt_before)) { + log.info("breakpoint interrupt_before at node {s} for run {s}", .{ node_name, run_row.id }); + version += 1; + const cp_id_buf = ids.generateId(); + const cp_id = try alloc.dupe(u8, &cp_id_buf); + const cn_json = try serializeCompletedNodes(alloc, &completed_nodes); + const parent_id: ?[]const u8 = latest_checkpoint_id; + const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version); + try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = cp_id; + + try self.store.updateRunStatus(run_row.id, "interrupted", null); + try self.store.insertEvent(run_row.id, null, "run.interrupted", "{}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.interrupted", run_row.id, null, "{}", self.metrics); + return; + } + + // Get node definition from workflow + const node_json = getNodeJsonFromRoot(alloc, wf_root, node_name) orelse { + log.err("node {s} not found in workflow for run {s}", .{ node_name, run_row.id }); + try self.store.updateRunStatus(run_row.id, "failed", "node not found in workflow"); return; }; - }, - }; - // 4. Get all workers and build WorkerInfo list - const workers = try self.store.listWorkers(alloc); - var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty; - for (workers) |w| { - const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0; - try worker_infos.append(alloc, .{ - .id = w.id, - .url = w.url, - .token = w.token, - .protocol = w.protocol, - .model = w.model, - .tags_json = w.tags_json, - .max_concurrent = w.max_concurrent, - .status = w.status, - .current_tasks = current_tasks, - }); - } + // Get node type + const node_type = getNodeField(alloc, node_json, "type") orelse "task"; - // 5. Parse worker_tags from the step definition - const required_tags = try getStepTags(alloc, run_row.workflow_json, step.def_step_id); + // Execute based on type + if (std.mem.eql(u8, node_type, "route")) { + // Route: evaluate routing logic, no worker dispatch + const result = try executeRouteNode(alloc, node_json, running_state); + if (result.route_value) |rv| { + try route_results.put(try alloc.dupe(u8, node_name), rv); + } + try completed_nodes.put(try alloc.dupe(u8, node_name), {}); + + // Create step record + const step_id_buf = ids.generateId(); + const step_id = try alloc.dupe(u8, &step_id_buf); + try self.store.insertStep(step_id, run_row.id, node_name, "route", "completed", "{}", 1, null, null, null); + const route_output = try std.fmt.allocPrint(alloc, "{{\"route\":\"{s}\"}}", .{result.route_value orelse "default"}); + try self.store.updateStepStatus(step_id, "completed", null, route_output, null, 1); + try self.store.insertEvent(run_row.id, step_id, "step.completed", route_output); + + log.info("route node {s} -> {s}", .{ node_name, result.route_value orelse "default" }); + } else if (std.mem.eql(u8, node_type, "interrupt")) { + // Interrupt: save checkpoint, set run to interrupted + try completed_nodes.put(try alloc.dupe(u8, node_name), {}); + version += 1; + + const step_id_buf = ids.generateId(); + const step_id = try alloc.dupe(u8, &step_id_buf); + try self.store.insertStep(step_id, run_row.id, node_name, "interrupt", "completed", "{}", 1, null, null, null); + try self.store.updateStepStatus(step_id, "completed", null, "{\"interrupted\":true}", null, 1); + try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}"); + + const cp_id_buf = ids.generateId(); + const cp_id = try alloc.dupe(u8, &cp_id_buf); + const cn_json = try serializeCompletedNodes(alloc, &completed_nodes); + const parent_id: ?[]const u8 = latest_checkpoint_id; + const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version); + try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = cp_id; + + try self.store.updateRunStatus(run_row.id, "interrupted", null); + try self.store.insertEvent(run_row.id, null, "run.interrupted", "{}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.interrupted", run_row.id, null, "{}", self.metrics); + log.info("run {s} interrupted at node {s}", .{ run_row.id, node_name }); + return; + } else if (std.mem.eql(u8, node_type, "transform")) { + // Transform: apply static updates, no worker dispatch + const state_updates = getNodeField(alloc, node_json, "updates") orelse "{}"; - // 6. Select an available worker - const selected_worker = try dispatch.selectWorker(alloc, worker_infos.items, required_tags); - if (selected_worker == null) { - // No worker available — leave as "ready", will retry next tick - log.debug("no worker available for step {s}, will retry", .{step.id}); - return; - } - const worker = selected_worker.?; + // Get schema from workflow + const schema_json = cached_schema_json; - // 7. Atomically claim the step to avoid duplicate dispatch across instances. - const claim_ts = ids.nowMs(); - const claimed = try self.store.claimReadyStep(step.id, worker.id, claim_ts); - if (!claimed) { - return; - } - if (self.metrics) |m| { - metrics_mod.Metrics.incr(&m.steps_claimed_total); - } - try self.store.insertEvent(run_row.id, step.id, "step.running", "{}"); + // Apply updates via reducers + const new_state = state_mod.applyUpdates(alloc, running_state, state_updates, schema_json) catch |err| { + log.err("transform node {s} failed to apply updates: {}", .{ node_name, err }); + try self.store.updateRunStatus(run_row.id, "failed", "transform failed"); + return; + }; + running_state = new_state; - // 8. Dispatch to worker with handoff support - var current_worker = worker; - var current_prompt = rendered_prompt; - var handoff_count: u32 = 0; - const max_handoffs: u32 = 5; + if (getNodeField(alloc, node_json, "store_updates")) |store_updates_json| { + self.applyStoreUpdates(alloc, running_state, store_updates_json, runtime) catch |err| { + log.err("transform node {s} failed to write store updates: {}", .{ node_name, err }); + try self.store.updateRunStatus(run_row.id, "failed", "transform store update failed"); + return; + }; + } - var final_result: dispatch.DispatchResult = undefined; + try completed_nodes.put(try alloc.dupe(u8, node_name), {}); + + // Create step record + const step_id_buf = ids.generateId(); + const step_id = try alloc.dupe(u8, &step_id_buf); + try self.store.insertStep(step_id, run_row.id, node_name, "transform", "completed", "{}", 1, null, null, null); + try self.store.updateStepStatus(step_id, "completed", null, state_updates, null, 1); + try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}"); + + log.info("transform node {s} completed", .{node_name}); + } else if (std.mem.eql(u8, node_type, "task") or std.mem.eql(u8, node_type, "agent")) { + // Gap 7: Inject __meta managed values + const state_with_meta = injectMeta(alloc, running_state, run_row.id, node_name, version, @as(i64, @intCast(max_iterations))) catch running_state; + + // Gap 3: Check cache before executing + const cache_ttl = parseCacheTtlMs(alloc, node_json); + if (cache_ttl != null) cache_check: { + const pt_c = getNodeField(alloc, node_json, "prompt_template") orelse break :cache_check; + const rnd_c = self.renderWorkflowTemplate(alloc, pt_c, state_with_meta, runtime, null) catch break :cache_check; + const ck_c = computeCacheKey(alloc, node_name, rnd_c) catch break :cache_check; + const cached = self.store.getCachedResult(alloc, ck_c) catch break :cache_check; + if (cached) |cached_upd| { + const cs = cached_schema_json; + running_state = state_mod.applyUpdates(alloc, running_state, cached_upd, cs) catch running_state; + try completed_nodes.put(try alloc.dupe(u8, node_name), {}); + log.info("task node {s} cache hit for run {s}", .{ node_name, run_row.id }); + made_progress = true; + version += 1; + const ccb = ids.generateId(); + const cci = try alloc.dupe(u8, &ccb); + const ccn = try serializeCompletedNodes(alloc, &completed_nodes); + const cpi: ?[]const u8 = latest_checkpoint_id; + const cmj = try serializeRouteResults(alloc, &route_results); + try self.store.createCheckpoint(cci, run_row.id, node_name, cpi, running_state, ccn, version, cmj); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = cci; + continue; + } + } - while (true) { - final_result = try dispatch.dispatchStep( - alloc, - current_worker.url, - current_worker.token, - current_worker.protocol, - current_worker.model, - run_row.id, - step.id, - current_prompt, - ); + // Gap 2: Non-blocking retry — check for pending retry step + const max_attempts = parseRetryMaxAttempts(alloc, node_json) orelse 1; + const retry_init_ms = parseRetryInitialMs(alloc, node_json) orelse 500; + const retry_bf = parseRetryBackoff(alloc, node_json) orelse 2.0; + const retry_max_ms = parseRetryMaxMs(alloc, node_json) orelse 30000; + + // Check if there's a pending retry step for this node + const retrying_step = self.store.getRetryingStepForNode(alloc, run_row.id, node_name) catch null; + if (retrying_step) |rs| { + const now_ms = ids.nowMs(); + if (rs.next_attempt_at_ms) |next_at| { + if (now_ms < next_at) { + // Retry delay not elapsed yet — skip this node, let other runs process + return; + } + } + // Retry timer expired — clear the retrying step and re-execute below + // The attempt count is tracked on the step record + } - if (!final_result.success) break; + const current_attempt: u32 = if (retrying_step) |rs| @intCast(rs.attempt) else 0; + const result = try self.executeTaskNode(alloc, run_row, runtime, node_name, node_json, state_with_meta); + + // Handle retry scheduling for failed results (non-blocking) + const result_after_retry: TaskNodeResult = switch (result) { + .failed => |err_text| blk: { + if (current_attempt + 1 < max_attempts) { + // Calculate delay with exponential backoff + var dms: u64 = retry_init_ms; + var ei: u32 = 0; + while (ei < current_attempt) : (ei += 1) { + const nd = @as(f64, @floatFromInt(dms)) * retry_bf; + dms = @intFromFloat(@min(nd, @as(f64, @floatFromInt(retry_max_ms)))); + } + if (dms > retry_max_ms) dms = retry_max_ms; + log.info("task node {s} attempt {d}/{d} failed, scheduling retry in {d}ms", .{ node_name, current_attempt + 1, max_attempts, dms }); + self.emitEvent(alloc, .step_retrying, run_row.id, null, node_name, null); + + // Create or update step record with retry schedule + const next_retry_at = ids.nowMs() + @as(i64, @intCast(dms)); + if (retrying_step) |rs| { + // Update existing step with next retry time + self.store.scheduleStepRetry(rs.id, next_retry_at, @as(i64, @intCast(current_attempt + 1)), err_text) catch {}; + } else { + // Create new step record for retry tracking + const retry_step_id_buf = ids.generateId(); + const retry_step_id = alloc.dupe(u8, &retry_step_id_buf) catch { + break :blk result; + }; + self.store.insertStep(retry_step_id, run_row.id, node_name, node_type, "ready", "{}", @intCast(max_attempts), null, null, null) catch { + break :blk result; + }; + self.store.scheduleStepRetry(retry_step_id, next_retry_at, 1, err_text) catch {}; + } - // Check for handoff_to in the output - const handoff_target = extractHandoffTarget(alloc, final_result.output); - if (handoff_target == null) break; // Normal completion + // Save progress checkpoint before returning + if (version > initial_version) { + const cp_id_buf = ids.generateId(); + const cp_id = alloc.dupe(u8, &cp_id_buf) catch { + break :blk result; + }; + const cn_json = serializeCompletedNodes(alloc, &completed_nodes) catch { + break :blk result; + }; + const parent_id: ?[]const u8 = if (latest_checkpoint_id) |pid| pid else null; + const meta_json = serializeRouteResultsWithVersion(alloc, &route_results, wf_version) catch { + break :blk result; + }; + self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json) catch {}; + self.store.incrementCheckpointCount(run_row.id) catch {}; + self.store.updateRunState(run_row.id, running_state) catch {}; + latest_checkpoint_id = cp_id; + } - handoff_count += 1; - if (handoff_count >= max_handoffs) { - final_result = .{ - .output = "", - .success = false, - .error_text = "handoff chain limit exceeded (max 5)", - }; - break; - } + // Return without marking node as completed — next tick will retry + return; + } + break :blk result; + }, + else => result, + }; + + switch (result_after_retry) { + .completed => |cr| { + // Gap 7: Strip __meta (don't persist) + running_state = stripMeta(alloc, running_state) catch running_state; + + if (cr.state_updates) |updates| { + const schema_json = cached_schema_json; + const new_state = state_mod.applyUpdates(alloc, running_state, updates, schema_json) catch |err| { + log.err("task node {s} failed to apply updates: {}", .{ node_name, err }); + try self.store.updateRunStatus(run_row.id, "failed", "state update failed"); + return; + }; + running_state = new_state; + + // Gap 3: Store result in cache + if (cache_ttl) |ttl| cache_store: { + const pt_s = getNodeField(alloc, node_json, "prompt_template") orelse break :cache_store; + const rnd_s = self.renderWorkflowTemplate(alloc, pt_s, state_with_meta, runtime, null) catch break :cache_store; + const ck_s = computeCacheKey(alloc, node_name, rnd_s) catch break :cache_store; + self.store.setCachedResult(ck_s, node_name, updates, ttl) catch |cerr| { + log.warn("failed to cache result for node {s}: {}", .{ node_name, cerr }); + }; + } - // Log the handoff event - const handoff_event = try std.fmt.allocPrint(alloc, "{{\"handoff_from\":\"{s}\",\"handoff_to_tags\":\"{s}\"}}", .{ current_worker.id, handoff_target.?.tags_str }); - try self.store.insertEvent(run_row.id, step.id, "step.handoff", handoff_event); - log.info("step {s} handoff #{d} from worker {s}", .{ step.id, handoff_count, current_worker.id }); - - // Select new worker by handoff tags - const new_worker = try dispatch.selectWorker(alloc, worker_infos.items, handoff_target.?.tags); - if (new_worker == null) { - final_result = .{ - .output = "", - .success = false, - .error_text = "no worker available for handoff", - }; - break; - } - current_worker = new_worker.?; + // Gap 4: Save as pending write + self.store.savePendingWrite(run_row.id, node_name, node_name, updates) catch |perr| { + log.warn("failed to save pending write for node {s}: {}", .{ node_name, perr }); + }; + } - // Build handoff prompt with message - if (handoff_target.?.message) |msg| { - current_prompt = msg; - } - // Otherwise reuse current_prompt - } + // Apply UI messages to state (__ui_messages key) + if (cr.raw_output) |raw_out| { + running_state = applyUiMessagesToState(alloc, running_state, raw_out) catch running_state; + } - // 8.5. If async dispatch, save state and leave step running - if (final_result.async_pending) { - const async_state = try mergeAsyncState(alloc, step.input_json, final_result.correlation_id orelse ""); - try self.store.updateStepInputJson(step.id, async_state); - log.info("step {s} dispatched async, correlation_id={s}", .{ step.id, final_result.correlation_id orelse "?" }); - return; - } + // Consume pending injections + const injections = self.store.consumePendingInjections(alloc, run_row.id, node_name) catch &.{}; + for (injections) |injection| { + const schema_json = cached_schema_json; + const new_state = state_mod.applyUpdates(alloc, running_state, injection.updates_json, schema_json) catch |err| { + log.warn("failed to apply injection for run {s}: {}", .{ run_row.id, err }); + continue; + }; + running_state = new_state; + } - // 9. Handle result - if (final_result.success) { - // Mark step as completed, save output_json - const output_json = try wrapOutput(alloc, final_result.output); - try self.store.updateStepStatus(step.id, "completed", current_worker.id, output_json, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); - try self.store.markWorkerSuccess(current_worker.id, ids.nowMs()); - if (self.metrics) |m| { - metrics_mod.Metrics.incr(&m.worker_dispatch_success_total); - } - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics); - log.info("step {s} completed", .{step.id}); - } else { - // On failure: retry or fail - const err_text = final_result.error_text orelse "dispatch failed"; - const now_ms = ids.nowMs(); - const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms; - try self.store.markWorkerFailure( - current_worker.id, - err_text, - now_ms, - self.runtime_cfg.worker_failure_threshold, - circuit_until, - ); - if (self.metrics) |m| { - metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total); - } + try completed_nodes.put(try alloc.dupe(u8, node_name), {}); + + if (cr.goto_targets) |targets| { + var valid_targets: std.ArrayListUnmanaged([]const u8) = .empty; + for (targets) |target| { + if (std.mem.eql(u8, target, "__end__") or workflowHasNode(wf_root, target)) { + try valid_targets.append(alloc, target); + } else { + log.warn("goto target {s} not found in workflow, skipping", .{target}); + } + } + if (valid_targets.items.len > 0) { + goto_override = try valid_targets.toOwnedSlice(alloc); + log.info("task node {s} goto: {d} targets", .{ node_name, goto_override.?.len }); + } + } - if (step.attempt < step.max_attempts) { - const elapsed_ms = now_ms - step.created_at_ms; - if (elapsed_ms > self.runtime_cfg.retry_max_elapsed_ms) { - const elapsed_err = try std.fmt.allocPrint(alloc, "retry max elapsed exceeded ({d}ms)", .{self.runtime_cfg.retry_max_elapsed_ms}); - try self.store.updateStepStatus(step.id, "failed", current_worker.id, null, elapsed_err, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.err("step {s} failed: {s}", .{ step.id, elapsed_err }); + // Gap 4: Clear pending writes + self.store.clearPendingWrites(run_row.id) catch {}; + + log.info("task node {s} completed for run {s}", .{ node_name, run_row.id }); + }, + .async_pending => { + // Step is dispatched async, don't mark as completed yet + // Will be polled on next tick + log.info("task node {s} dispatched async for run {s}", .{ node_name, run_row.id }); + // Save checkpoint with current progress before returning + version += 1; + const cp_id_buf = ids.generateId(); + const cp_id = try alloc.dupe(u8, &cp_id_buf); + const cn_json = try serializeCompletedNodes(alloc, &completed_nodes); + const parent_id: ?[]const u8 = latest_checkpoint_id; + const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version); + try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = cp_id; + return; + }, + .no_worker => { + // No worker available, will retry next tick + log.debug("no worker for task node {s}, will retry", .{node_name}); + // Save progress so far + if (version > initial_version) { + const cp_id_buf = ids.generateId(); + const cp_id = try alloc.dupe(u8, &cp_id_buf); + const cn_json = try serializeCompletedNodes(alloc, &completed_nodes); + const parent_id: ?[]const u8 = latest_checkpoint_id; + const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version); + try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = cp_id; + } + return; + }, + .failed => |err_text| { + log.err("task node {s} failed: {s}", .{ node_name, err_text }); + try self.store.updateRunStatus(run_row.id, "failed", err_text); + try self.store.insertEvent(run_row.id, null, "run.failed", "{}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics); + return; + }, + } + } else if (std.mem.eql(u8, node_type, "subgraph")) { + // Subgraph: execute child workflow inline + const result = try self.executeSubgraphNode(alloc, run_row, node_name, node_json, running_state, recursion_depth); + + switch (result) { + .completed => |cr| { + if (cr.state_updates) |updates| { + const schema_json = cached_schema_json; + const new_state = state_mod.applyUpdates(alloc, running_state, updates, schema_json) catch |err| { + log.err("subgraph node {s} failed to apply updates: {}", .{ node_name, err }); + try self.store.updateRunStatus(run_row.id, "failed", "subgraph state update failed"); + return; + }; + running_state = new_state; + } + try completed_nodes.put(try alloc.dupe(u8, node_name), {}); + log.info("subgraph node {s} completed for run {s}", .{ node_name, run_row.id }); + }, + .failed => |err_text| { + log.err("subgraph node {s} failed: {s}", .{ node_name, err_text }); + try self.store.updateRunStatus(run_row.id, "failed", err_text); + try self.store.insertEvent(run_row.id, null, "run.failed", "{}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics); + return; + }, + else => {}, + } + } else if (std.mem.eql(u8, node_type, "send")) { + // Send: read items from state, dispatch target_node per item + const result = try self.executeSendNode(alloc, run_row, runtime, node_name, node_json, running_state); + if (result.state_updates) |updates| { + const schema_json = cached_schema_json; + const new_state = state_mod.applyUpdates(alloc, running_state, updates, schema_json) catch |err| { + log.err("send node {s} failed to apply updates: {}", .{ node_name, err }); + try self.store.updateRunStatus(run_row.id, "failed", "send state update failed"); + return; + }; + running_state = new_state; + } + try completed_nodes.put(try alloc.dupe(u8, node_name), {}); + log.info("send node {s} completed for run {s}", .{ node_name, run_row.id }); + } else { + log.warn("unknown node type {s} for node {s}", .{ node_type, node_name }); + try self.store.updateRunStatus(run_row.id, "failed", "unknown node type"); return; } - const delay_ms = computeRetryDelayMs(self.runtime_cfg, step, now_ms); - const next_attempt_ms = now_ms + delay_ms; - try self.store.scheduleStepRetry(step.id, next_attempt_ms, step.attempt + 1, err_text); - const retry_event = try std.fmt.allocPrint(alloc, "{{\"next_attempt_at_ms\":{d},\"delay_ms\":{d}}}", .{ next_attempt_ms, delay_ms }); - try self.store.insertEvent(run_row.id, step.id, "step.retry", retry_event); - if (self.metrics) |m| { - metrics_mod.Metrics.incr(&m.steps_retry_scheduled_total); + // Breakpoint: interrupt_after check + if (isInBreakpointList(node_name, interrupt_after)) { + log.info("breakpoint interrupt_after at node {s} for run {s}", .{ node_name, run_row.id }); + // Save checkpoint with updated state first + version += 1; + const bp_cp_id_buf = ids.generateId(); + const bp_cp_id = try alloc.dupe(u8, &bp_cp_id_buf); + const bp_cn_json = try serializeCompletedNodes(alloc, &completed_nodes); + const bp_parent_id: ?[]const u8 = latest_checkpoint_id; + const bp_meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version); + try self.store.createCheckpoint(bp_cp_id, run_row.id, node_name, bp_parent_id, running_state, bp_cn_json, version, bp_meta_json); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = bp_cp_id; + + try self.store.updateRunStatus(run_row.id, "interrupted", null); + try self.store.insertEvent(run_row.id, null, "run.interrupted", "{}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.interrupted", run_row.id, null, "{}", self.metrics); + return; } - log.info("step {s} will retry (attempt {d}/{d}, delay={d}ms)", .{ step.id, step.attempt + 1, step.max_attempts, delay_ms }); - } else { - try self.store.updateStepStatus(step.id, "failed", current_worker.id, null, err_text, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.err("step {s} failed: {s}", .{ step.id, err_text }); - } - } - } - - // ── async helpers ────────────────────────────────────────────── - - /// Merge async_pending + correlation_id into existing input_json, - /// preserving any existing fields (e.g. rendered_prompt for retries). - fn mergeAsyncState(alloc: std.mem.Allocator, existing_input: []const u8, correlation_id: []const u8) ![]const u8 { - var obj = std.json.ObjectMap.init(alloc); - // Parse and copy existing fields - if (existing_input.len > 0) { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, existing_input, .{}) catch null; - if (parsed) |p| { - if (p.value == .object) { - var it = p.value.object.iterator(); - while (it.next()) |entry| { - try obj.put(entry.key_ptr.*, entry.value_ptr.*); + // Reconciliation: check tracker task status between steps + if (runtime.tracker) |tracker| { + if (task_id != null and !reconcileWithTracker(alloc, tracker.base_url, tracker.api_token, task_id.?)) { + log.info("run {s} cancelled by reconciliation", .{run_row.id}); + try self.store.updateRunStatus(run_row.id, "failed", "cancelled by tracker reconciliation"); + try self.store.insertEvent(run_row.id, null, "run.failed", "{\"reason\":\"tracker_cancelled\"}"); + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_row.id, null, "{}", self.metrics); + return; } } + + // Strip ephemeral keys before checkpoint persistence + const schema_for_eph = cached_schema_json; + running_state = state_mod.stripEphemeralKeys(alloc, running_state, schema_for_eph) catch running_state; + + // Save checkpoint after each node + made_progress = true; + version += 1; + const cp_id_buf = ids.generateId(); + const cp_id = try alloc.dupe(u8, &cp_id_buf); + const cn_json = try serializeCompletedNodes(alloc, &completed_nodes); + const parent_id: ?[]const u8 = latest_checkpoint_id; + const meta_json = try serializeRouteResultsWithVersion(alloc, &route_results, wf_version); + try self.store.createCheckpoint(cp_id, run_row.id, node_name, parent_id, running_state, cn_json, version, meta_json); + try self.store.incrementCheckpointCount(run_row.id); + try self.store.updateRunState(run_row.id, running_state); + latest_checkpoint_id = cp_id; + + // Emit structured checkpoint event + self.emitEvent(alloc, .checkpoint_created, run_row.id, null, node_name, null); + + // Broadcast rich SSE events for all modes + if (self.sse_hub) |hub| { + const node_json_for_sse = getNodeJsonFromRoot(alloc, wf_root, node_name); + const nt = if (node_json_for_sse) |nj| (getNodeField(alloc, nj, "type") orelse "task") else "task"; + broadcastNodeEvents(hub, alloc, run_row.id, node_name, nt, running_state, null, version, 0); + } } - } - // Add async fields - try obj.put("async_pending", .{ .bool = true }); - try obj.put("correlation_id", .{ .string = correlation_id }); + // If goto override is set, use it for next iteration instead of findReadyNodes + if (goto_override) |targets| { + goto_ready = targets; + } - return std.json.Stringify.valueAlloc(alloc, std.json.Value{ .object = obj }, .{}); + // If no progress was made in this iteration, break + if (!made_progress) break; + } // end while loop } - fn pollAsyncTaskStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // Only handle steps that are async (have async_pending in input_json) - const input_json = step.input_json; - if (input_json.len == 0) return; + // ── Node Execution Results ─────────────────────────────────────── - // Parse input_json to check for async_pending flag - const parsed = std.json.parseFromSlice(std.json.Value, alloc, input_json, .{}) catch return; - defer parsed.deinit(); - if (parsed.value != .object) return; + const TaskNodeResult = union(enum) { + completed: struct { + state_updates: ?[]const u8, + goto_targets: ?[]const []const u8 = null, + raw_output: ?[]const u8 = null, + }, + async_pending: void, + no_worker: void, + failed: []const u8, + }; - const async_flag = parsed.value.object.get("async_pending") orelse return; - if (async_flag != .bool or !async_flag.bool) return; + const SendNodeResult = struct { + state_updates: ?[]const u8, + }; - const corr_val = parsed.value.object.get("correlation_id") orelse return; - if (corr_val != .string) return; - const correlation_id = corr_val.string; + const RouteNodeResult = struct { + route_value: ?[]const u8, + }; - // Check response queue - const queue = self.response_queue orelse return; - const response = queue.take(correlation_id) orelse { - // Check timeout - if (step.timeout_ms) |timeout_ms| { - if (step.started_at_ms) |started_at| { - const elapsed = ids.nowMs() - started_at; - if (elapsed > timeout_ms) { - const err_text = try std.fmt.allocPrint(alloc, "async step timed out after {d}ms", .{timeout_ms}); - try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - if (self.metrics) |m| { - metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total); - } - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.err("async step {s} timed out", .{step.id}); - } - } - } - return; - }; - - // Got a response — complete or fail the step - if (response.success) { - const output_json = try wrapOutput(alloc, response.output); - try self.store.updateStepStatus(step.id, "completed", step.worker_id, output_json, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); - if (step.worker_id) |wid| { - try self.store.markWorkerSuccess(wid, ids.nowMs()); - } - if (self.metrics) |m| { - metrics_mod.Metrics.incr(&m.worker_dispatch_success_total); - } - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics); - log.info("async step {s} completed", .{step.id}); - } else { - const err_text = response.error_text orelse "async dispatch failed"; - try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - if (step.worker_id) |wid| { - const now_ms = ids.nowMs(); - const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms; - try self.store.markWorkerFailure(wid, err_text, now_ms, self.runtime_cfg.worker_failure_threshold, circuit_until); - } - if (self.metrics) |m| { - metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total); - } - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.err("async step {s} failed: {s}", .{ step.id, err_text }); - } - } - - fn resolveTaskPromptSource(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !?TaskPromptSource { - // Explicit rendered_prompt is highest priority for generated children - // (for example debate judge prompts). - if (extractRenderedPromptFromInput(alloc, step.input_json)) |rendered_prompt| { - return .{ .rendered = rendered_prompt }; - } + // ── executeRouteNode ───────────────────────────────────────────── - // Normal task step definition prompt. - if (try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template")) |tpl| { - return .{ .template = tpl }; - } + fn executeRouteNode(alloc: std.mem.Allocator, node_json: []const u8, state_json: []const u8) !RouteNodeResult { + // Get the input path to read from state + const input_path = getNodeField(alloc, node_json, "input") orelse "state.route_input"; + const default_route = getNodeField(alloc, node_json, "default"); - // Fallback for generated child tasks that should reuse parent prompt template. - if (step.parent_step_id) |parent_id| { - if (try self.store.getStep(alloc, parent_id)) |parent_step| { - if (try getStepField(alloc, run_row.workflow_json, parent_step.def_step_id, "prompt_template")) |parent_tpl| { - return .{ .template = parent_tpl }; - } - } + // Read value from state + const value_json = state_mod.getStateValue(alloc, state_json, input_path) catch null; + if (value_json == null) { + return RouteNodeResult{ .route_value = resolveDeclaredRouteValue(alloc, node_json, default_route) }; } - return null; - } - - // ── executeFanOutStep ──────────────────────────────────────────── - - fn executeFanOutStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Parse step definition from workflow_json, get "count" - const count_val = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "count") orelse { - log.warn("no count for fan_out step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing count in fan_out definition", step.attempt); - return; + // Stringify value for route matching + const route_key = state_mod.stringifyForRoute(alloc, value_json.?) catch { + return RouteNodeResult{ .route_value = resolveDeclaredRouteValue(alloc, node_json, default_route) }; }; - const count: usize = @intCast(count_val); - // 2. Create N child steps - for (0..count) |i| { - const child_id_buf = ids.generateId(); - const child_id = try alloc.dupe(u8, &child_id_buf); - const child_def_id = try std.fmt.allocPrint(alloc, "{s}_{d}", .{ step.def_step_id, i }); - const idx: i64 = @intCast(i); + return RouteNodeResult{ .route_value = resolveDeclaredRouteValue(alloc, node_json, route_key) }; + } - try self.store.insertStep( - child_id, - run_row.id, - child_def_id, - "task", - "ready", - step.input_json, - step.max_attempts, - step.timeout_ms, - step.id, // parent_step_id - idx, - ); - log.info("created fan_out child step {s} (index {d})", .{ child_id, i }); + fn buildWorkerInfos(self: *Engine, alloc: std.mem.Allocator) ![]dispatch.WorkerInfo { + const workers = try self.store.listWorkers(alloc); + var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty; + for (workers) |worker| { + const current_tasks = self.store.countRunningStepsByWorker(worker.id) catch 0; + try worker_infos.append(alloc, .{ + .id = worker.id, + .url = worker.url, + .token = worker.token, + .protocol = worker.protocol, + .model = worker.model, + .tags_json = worker.tags_json, + .max_concurrent = worker.max_concurrent, + .status = worker.status, + .current_tasks = current_tasks, + }); } - - // 3. Mark fan_out step as "completed" - try self.store.updateStepStatus(step.id, "completed", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); - log.info("fan_out step {s} completed, created {d} children", .{ step.id, count }); + return worker_infos.toOwnedSlice(alloc); } - // ── executeMapStep ─────────────────────────────────────────────── + // ── executeTaskNode ────────────────────────────────────────────── - fn executeMapStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Parse step definition, get "items_from" (e.g. "$.topics") - const items_from = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "items_from") orelse { - log.warn("no items_from for map step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing items_from in map definition", step.attempt); - return; + fn executeTaskNode(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, runtime: RuntimeBindings, node_name: []const u8, node_json: []const u8, state_json: []const u8) !TaskNodeResult { + // 1. Get prompt template from node definition + const prompt_template = getNodeField(alloc, node_json, "prompt_template") orelse { + // No prompt template — mark as completed with no state updates + return TaskNodeResult{ .completed = .{ .state_updates = null } }; }; - // 2. Resolve items_from against run.input_json — extract the array - // items_from format: "$.field_name" - const field_name = if (std.mem.startsWith(u8, items_from, "$.")) - items_from[2..] - else - items_from; - - const items = try extractJsonArray(alloc, run_row.input_json, field_name) orelse { - log.warn("items_from field '{s}' not found or not an array in input", .{field_name}); - try self.store.updateStepStatus(step.id, "failed", null, null, "items_from field not found or not an array", step.attempt); - return; + // 2. Render prompt with graph template interpolation and optional store access. + const rendered_prompt = self.renderWorkflowTemplate(alloc, prompt_template, state_json, runtime, null) catch |err| { + log.err("template render failed for node {s}: {}", .{ node_name, err }); + return TaskNodeResult{ .failed = "template render failed" }; }; - // 3. For each item in the array, create a child step - for (items, 0..) |item, i| { - const child_id_buf = ids.generateId(); - const child_id = try alloc.dupe(u8, &child_id_buf); - const child_def_id = try std.fmt.allocPrint(alloc, "{s}_{d}", .{ step.def_step_id, i }); - const idx: i64 = @intCast(i); - - // Store the item as input_json for the child - const item_json = try wrapItemJson(alloc, item); - - try self.store.insertStep( - child_id, - run_row.id, - child_def_id, - "task", - "ready", - item_json, - step.max_attempts, - step.timeout_ms, - step.id, // parent_step_id - idx, - ); - log.info("created map child step {s} for item {d}", .{ child_id, i }); - } - - // 4. Mark map step as "completed" - try self.store.updateStepStatus(step.id, "completed", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); - log.info("map step {s} completed, created {d} children", .{ step.id, items.len }); - } - - // ── executeReduceStep ──────────────────────────────────────────── - - fn executeReduceStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, all_steps: []const types.StepRow) !void { - // 1. Find the dependency step (the fan_out or map step this depends on) - const dep_ids = try self.store.getStepDeps(alloc, step.id); - if (dep_ids.len == 0) { - log.warn("reduce step {s} has no dependencies", .{step.id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "reduce step has no dependencies", step.attempt); - return; - } - - // The reduce depends on a fan_out/map step; find it - const dep_step_id = dep_ids[0]; - - // 2. Get all child steps of that dependency - const children = try self.store.getChildSteps(alloc, dep_step_id); - - if (children.len == 0) { - // If the dep is a fan_out/map that hasn't spawned children yet, wait - // Check if dep step itself is completed - const dep_status = findStepStatus(all_steps, dep_step_id); - if (dep_status == null or !std.mem.eql(u8, dep_status.?, "completed")) { - // Dep not completed yet, stay ready - return; + // 3. Get workers and select one + const worker_infos = try self.buildWorkerInfos(alloc); + + const required_tags = getNodeTags(alloc, node_json); + const node_type = getNodeField(alloc, node_json, "type") orelse "task"; + const is_agent_node = std.mem.eql(u8, node_type, "agent"); + + // For agent nodes, prefer A2A-protocol workers first, then fall back to any worker + var selected_worker: ?dispatch.WorkerInfo = null; + if (is_agent_node) { + // Filter to A2A workers only + var a2a_workers: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty; + for (worker_infos) |w| { + if (std.mem.eql(u8, w.protocol, "a2a")) { + try a2a_workers.append(alloc, w); + } } - // Dep completed but no children? Odd, proceed with empty outputs - } - - // 3. Check if ALL children are completed - var all_done = true; - for (children) |child| { - if (!std.mem.eql(u8, child.status, "completed") and !std.mem.eql(u8, child.status, "skipped")) { - all_done = false; - break; + if (a2a_workers.items.len > 0) { + selected_worker = try dispatch.selectWorker(alloc, a2a_workers.items, required_tags); } } - if (!all_done) { - // Not all children done, leave reduce as "ready", try next tick - return; + // Fall back to any protocol if no A2A worker found (or not an agent node) + if (selected_worker == null) { + selected_worker = try dispatch.selectWorker(alloc, worker_infos, required_tags); } - - // 4. Collect all child outputs into an array - var child_outputs: std.ArrayListUnmanaged([]const u8) = .empty; - for (children) |child| { - if (child.output_json) |oj| { - // Extract "output" field from JSON, or use the raw JSON - const extracted = extractOutputField(alloc, oj) catch oj; - try child_outputs.append(alloc, extracted); - } else { - try child_outputs.append(alloc, ""); - } + if (selected_worker == null) { + return TaskNodeResult{ .no_worker = {} }; } + const worker = selected_worker.?; - // 5. Build template context with outputs array - // Find the dep step's def_step_id for template referencing - const dep_def_step_id = findStepDefId(all_steps, dep_step_id) orelse step.def_step_id; - - const step_output = templates.Context.StepOutput{ - .step_id = dep_def_step_id, - .output = null, - .outputs = child_outputs.items, - }; - - const prompt_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template") orelse { - // No template — just collect outputs and mark completed - const outputs_json = try serializeStringArray(alloc, child_outputs.items); - try self.store.updateStepStatus(step.id, "completed", null, outputs_json, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); - return; - }; - - const ctx = templates.Context{ - .input_json = run_row.input_json, - .step_outputs = &.{step_output}, - .item = null, - }; - - // 6. Render template - const rendered_prompt = templates.render(alloc, prompt_template, ctx) catch |err| { - log.err("template render failed for reduce step {s}: {}", .{ step.id, err }); - try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - return; - }; + // 4. Create step record + const step_id_buf = ids.generateId(); + const step_id = try alloc.dupe(u8, &step_id_buf); + try self.store.insertStep(step_id, run_row.id, node_name, node_type, "running", state_json, 1, null, null, null); + try self.store.insertEvent(run_row.id, step_id, "step.running", "{}"); + self.emitEvent(alloc, .step_started, run_row.id, step_id, node_name, null); - // 7. Get workers and dispatch - const workers = try self.store.listWorkers(alloc); - var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty; - for (workers) |w| { - const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0; - try worker_infos.append(alloc, .{ - .id = w.id, - .url = w.url, - .token = w.token, - .protocol = w.protocol, - .model = w.model, - .tags_json = w.tags_json, - .max_concurrent = w.max_concurrent, - .status = w.status, - .current_tasks = current_tasks, - }); + if (self.metrics) |m| { + metrics_mod.Metrics.incr(&m.steps_claimed_total); } - const required_tags = try getStepTags(alloc, run_row.workflow_json, step.def_step_id); - const selected_worker = try dispatch.selectWorker(alloc, worker_infos.items, required_tags); - if (selected_worker == null) { - log.debug("no worker available for reduce step {s}, will retry", .{step.id}); - return; + // 5. Dispatch to worker (A2A protocol for agent nodes with A2A workers, + // or standard protocol dispatch for task nodes / fallback) + if (is_agent_node and std.mem.eql(u8, worker.protocol, "a2a")) { + log.info("agent node {s} dispatching via A2A to worker {s}", .{ node_name, worker.id }); } - const worker = selected_worker.?; - - try self.store.updateStepStatus(step.id, "running", worker.id, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.running", "{}"); - const result = try dispatch.dispatchStep( alloc, worker.url, @@ -831,3471 +1202,1734 @@ pub const Engine = struct { worker.protocol, worker.model, run_row.id, - step.id, + step_id, rendered_prompt, ); - if (result.success) { - const output_json = try wrapOutput(alloc, result.output); - try self.store.updateStepStatus(step.id, "completed", worker.id, output_json, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics); - log.info("reduce step {s} completed", .{step.id}); - } else { - const err_text = result.error_text orelse "dispatch failed"; - if (step.attempt < step.max_attempts) { - try self.store.updateStepStatus(step.id, "ready", null, null, err_text, step.attempt + 1); - try self.store.insertEvent(run_row.id, step.id, "step.retry", "{}"); - } else { - try self.store.updateStepStatus(step.id, "failed", worker.id, null, err_text, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - } - } - } - - // ── executeConditionStep ───────────────────────────────────────── - - fn executeConditionStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, all_steps: []const types.StepRow) !void { - // 1. Get the dependency step's output - const dep_ids = try self.store.getStepDeps(alloc, step.id); - if (dep_ids.len == 0) { - log.warn("condition step {s} has no dependencies", .{step.id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "condition step has no dependencies", step.attempt); - return; + // 6. Handle async dispatch + if (result.async_pending) { + const async_state = try mergeAsyncState(alloc, state_json, result.correlation_id orelse ""); + try self.store.updateStepInputJson(step_id, async_state); + log.info("step {s} dispatched async, correlation_id={s}", .{ step_id, result.correlation_id orelse "?" }); + return TaskNodeResult{ .async_pending = {} }; } - const dep_step_id = dep_ids[0]; - const dep_output = findStepOutput(all_steps, dep_step_id) orelse ""; - - // 2. Parse the "expression" from step definition - const expression = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "expression") orelse "true"; - - // 3. Evaluate: for MVP, support simple "contains" check - // Expression format: check if the dependency output contains a certain substring - // If expression is "true", always take true branch - // Otherwise, check if dep output contains the expression text - const condition_met = if (std.mem.eql(u8, expression, "true")) - true - else if (std.mem.eql(u8, expression, "false")) - false - else - std.mem.indexOf(u8, dep_output, expression) != null; - - // 4. Determine branch - const true_target = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "true_target"); - const false_target = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "false_target"); + // 7. Handle result + if (result.success) { + var final_output = result.output; + + // Track cumulative token usage (Gap 2) + var total_input_tokens: i64 = 0; + var total_output_tokens: i64 = 0; + if (result.usage) |usage| { + total_input_tokens += usage.input_tokens; + total_output_tokens += usage.output_tokens; + } + + // 7a. Multi-turn continuation for agent nodes + if (is_agent_node) { + const max_turns_val = getNodeFieldInt(alloc, node_json, "max_turns"); + const continuation_prompt = getNodeField(alloc, node_json, "continuation_prompt"); + const turn_timeout_ms_val = getNodeFieldInt(alloc, node_json, "turn_timeout_ms"); + const turn_start_ms = ids.nowMs(); + + if (max_turns_val != null and continuation_prompt != null) { + const mt = max_turns_val.?; + const max_turns: u32 = @intCast(@min(@max(mt, 1), 100)); + if (max_turns > 1) { + var turn: u32 = 1; + while (turn < max_turns) : (turn += 1) { + // Check turn timeout (Gap 4) + if (turn_timeout_ms_val) |timeout_ms| { + const elapsed = ids.nowMs() - turn_start_ms; + if (elapsed > timeout_ms) { + log.info("agent node {s} turn timeout after {d}ms (limit={d}ms)", .{ node_name, elapsed, timeout_ms }); + break; + } + } - // 5. Determine the winning target and check for graph cycles - const winning_target: ?[]const u8 = if (condition_met) true_target else false_target; + // Consume pending injections between turns — these are + // queued but cannot be applied mid-node. Re-save them so + // they are applied after the full node completes. + const mid_injections = self.store.consumePendingInjections(alloc, run_row.id, node_name) catch &.{}; + for (mid_injections) |inj| { + self.store.createPendingInjection(run_row.id, inj.updates_json, node_name) catch {}; + } - // Check if the winning target is a backward edge (cycle) - if (winning_target) |target| { - const cycle_handled = try self.handleCycleBack(alloc, run_row, step, target, all_steps); - if (cycle_handled) return; // Cycle was handled, step is already completed - } + // Render continuation prompt + const cont_rendered = self.renderWorkflowTemplate(alloc, continuation_prompt.?, state_json, runtime, null) catch break; + + const cont_result = try dispatch.dispatchStep( + alloc, + worker.url, + worker.token, + worker.protocol, + worker.model, + run_row.id, + step_id, + cont_rendered, + ); + + if (!cont_result.success) break; + final_output = cont_result.output; + + // Accumulate token usage from continuation turns + if (cont_result.usage) |usage| { + total_input_tokens += usage.input_tokens; + total_output_tokens += usage.output_tokens; + } + } + log.info("agent node {s} completed {d} turns", .{ node_name, turn }); + } + } + } - // 6. For the losing branch target: mark steps as "skipped" - if (condition_met) { - // Skip the false branch target - if (false_target) |target_def_id| { - try self.skipStepByDefId(alloc, all_steps, run_row.id, target_def_id); + // Record token usage (Gap 2) + if (total_input_tokens > 0 or total_output_tokens > 0) { + self.store.updateStepTokens(step_id, total_input_tokens, total_output_tokens) catch |err| { + log.warn("failed to update step tokens: {}", .{err}); + }; + self.store.updateRunTokens(run_row.id, total_input_tokens, total_output_tokens) catch |err| { + log.warn("failed to update run tokens: {}", .{err}); + }; } - } else { - // Skip the true branch target - if (true_target) |target_def_id| { - try self.skipStepByDefId(alloc, all_steps, run_row.id, target_def_id); + + // Store rate limit info (Gap 3) + if (result.rate_limit) |rl| { + self.rate_limits.put(worker.id, RateLimitInfo{ + .worker_id = worker.id, + .remaining = rl.remaining, + .limit = rl.limit, + .reset_ms = rl.reset_ms, + .updated_at_ms = ids.nowMs(), + }) catch {}; } - } - // 7. Mark condition step as "completed" - const branch_result = if (condition_met) "true" else "false"; - const output_json = try std.fmt.allocPrint(alloc, "{{\"branch\":\"{s}\"}}", .{branch_result}); - try self.store.updateStepStatus(step.id, "completed", null, output_json, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); - log.info("condition step {s} evaluated to {s}", .{ step.id, branch_result }); - } + const output_json = try wrapOutput(alloc, final_output); + try self.store.updateStepStatus(step_id, "completed", worker.id, output_json, null, 1); + try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}"); + self.emitEvent(alloc, .step_completed, run_row.id, step_id, node_name, null); + try self.store.markWorkerSuccess(worker.id, ids.nowMs()); - // ── executeApprovalStep ────────────────────────────────────────── + if (self.metrics) |m| { + metrics_mod.Metrics.incr(&m.worker_dispatch_success_total); + } + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step_id, output_json, self.metrics); - fn executeApprovalStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - _ = alloc; - // 1. Mark step as "waiting_approval" - try self.store.updateStepStatus(step.id, "waiting_approval", null, null, null, step.attempt); - // 2. Insert event - try self.store.insertEvent(run_row.id, step.id, "step.waiting_approval", "{}"); - log.info("approval step {s} waiting for approval", .{step.id}); - } + // Process UI messages and stream messages from worker response + if (self.sse_hub) |hub| { + processUiMessages(hub, alloc, run_row.id, step_id, final_output); + processStreamMessages(hub, alloc, run_row.id, step_id, node_type, final_output); + } - // ── executeTransformStep ──────────────────────────────────────── + // Build state_updates from output. Prefer explicit state_updates + // from the worker, otherwise honor node-level output_key / + // output_mapping before falling back to the legacy "output" key. + const state_updates = try buildTaskStateUpdates(alloc, node_json, final_output); - fn executeTransformStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Get output_template from workflow_json - const output_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "output_template") orelse { - log.warn("no output_template for transform step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing output_template", step.attempt); - return; - }; + // Extract goto targets from output (command primitive) + const goto_targets = extractGotoTargets(alloc, final_output); - // 2. Build template context (same as task step) - const ctx = try buildTemplateContext(alloc, run_row, step, self.store); + return TaskNodeResult{ .completed = .{ .state_updates = state_updates, .goto_targets = goto_targets, .raw_output = final_output } }; + } else { + const err_text = result.error_text orelse "dispatch failed"; + try self.store.updateStepStatus(step_id, "failed", worker.id, null, err_text, 1); + try self.store.insertEvent(run_row.id, step_id, "step.failed", "{}"); + self.emitEvent(alloc, .step_failed, run_row.id, step_id, node_name, null); - // 3. Render template - const rendered = templates.render(alloc, output_template, ctx) catch |err| { - const err_msg = std.fmt.allocPrint(alloc, "template render error: {}", .{err}) catch "template render error"; - try self.store.updateStepStatus(step.id, "failed", null, null, err_msg, step.attempt); - return; - }; + const now_ms = ids.nowMs(); + const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms; + try self.store.markWorkerFailure( + worker.id, + err_text, + now_ms, + self.runtime_cfg.worker_failure_threshold, + circuit_until, + ); - // 4. Wrap as output and mark completed - const output = try wrapOutput(alloc, rendered); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); + if (self.metrics) |m| { + metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total); + } + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step_id, "{}", self.metrics); - // 5. Fire callback + event - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - log.info("transform step {s} completed", .{step.id}); + return TaskNodeResult{ .failed = err_text }; + } } - // ── executeWaitStep ────────────────────────────────────────────── + // ── executeSubgraphNode ───────────────────────────────────────── - fn executeWaitStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - const now = ids.nowMs(); - - // Check signal mode first - if (try getStepField(alloc, run_row.workflow_json, step.def_step_id, "signal")) |_| { - // Signal mode: set to waiting_approval and wait for external POST /signal - try self.store.updateStepStatus(step.id, "waiting_approval", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.waiting_signal", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.waiting_signal", run_row.id, step.id, "{}", self.metrics); - log.info("wait step {s} waiting for signal", .{step.id}); - return; + fn executeSubgraphNode(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, node_name: []const u8, node_json: []const u8, state_json: []const u8, recursion_depth: u32) !TaskNodeResult { + if (recursion_depth >= max_subgraph_depth) { + log.err("subgraph node {s}: max recursion depth ({d}) exceeded", .{ node_name, max_subgraph_depth }); + return TaskNodeResult{ .failed = "subgraph max recursion depth exceeded" }; } - // Duration mode - const duration_opt: ?i64 = blk: { - const duration_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "duration_ms"); - if (duration_raw != null) { - const dur_int = (try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "duration_ms")) orelse { - try self.failStepWithError(alloc, run_row, step, "duration_ms must be an integer"); - return; - }; - if (dur_int < 0) { - try self.failStepWithError(alloc, run_row, step, "duration_ms must be >= 0"); - return; - } - break :blk dur_int; - } - break :blk null; + // Get workflow_id + const workflow_id = getNodeField(alloc, node_json, "workflow_id") orelse { + log.err("subgraph node {s}: missing workflow_id", .{node_name}); + return TaskNodeResult{ .failed = "subgraph missing workflow_id" }; }; - if (duration_opt) |duration| { - if (step.started_at_ms) |started| { - // Already running -- check if duration elapsed - if (now - started >= duration) { - const waited = now - started; - const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"waited\",\"waited_ms\":{d}}}", .{waited}); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("wait step {s} completed after {d}ms", .{ step.id, waited }); - return; - } - // Not yet -- stay running (do nothing, will be checked next tick) - return; - } - // First time -- mark running and set started_at_ms - try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt); - try self.store.setStepStartedAt(step.id, now); - return; - } - // Until_ms mode (check integer field) - if (try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "until_ms")) |until| { - if (until < 0) { - try self.failStepWithError(alloc, run_row, step, "until_ms must be >= 0"); - return; - } - if (now >= until) { - const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"waited\",\"waited_ms\":{d}}}", .{now - (step.started_at_ms orelse now)}); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - log.info("wait step {s} completed (until_ms reached)", .{step.id}); - return; - } - if (step.started_at_ms == null) { - try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt); - try self.store.setStepStartedAt(step.id, now); - } - return; - } + // Load workflow definition from store + const workflow_row = try self.store.getWorkflow(alloc, workflow_id); + if (workflow_row == null) { + log.err("subgraph node {s}: workflow {s} not found", .{ node_name, workflow_id }); + return TaskNodeResult{ .failed = "subgraph workflow not found" }; + } + const definition = workflow_row.?.definition_json; + + // Build input state from parent state using input_mapping + const input_mapping_json = getNodeField(alloc, node_json, "input_mapping") orelse "{}"; + const child_input = buildSubgraphInput(alloc, state_json, input_mapping_json) catch "{}"; + + // Get schema from child workflow for initState + const child_schema = getSchemaJson(alloc, definition); + const child_state = state_mod.initState(alloc, child_input, child_schema) catch try alloc.dupe(u8, child_input); + + // Create child run + const child_id_buf = ids.generateId(); + const child_id = try alloc.dupe(u8, &child_id_buf); + try self.store.createRunWithState(child_id, workflow_id, definition, child_input, child_state); + try self.store.setParentRunId(child_id, run_row.id); + try self.store.updateRunStatus(child_id, "running", null); + + // Create step record for the subgraph node + const step_id_buf = ids.generateId(); + const step_id = try alloc.dupe(u8, &step_id_buf); + try self.store.insertStep(step_id, run_row.id, node_name, "subgraph", "running", "{}", 1, null, null, null); + try self.store.insertEvent(run_row.id, step_id, "step.running", "{}"); + + // Execute child run inline (recursive call to processRunWithDepth) + const child_run = (try self.store.getRun(alloc, child_id)).?; + self.processRunInline(alloc, child_run, recursion_depth + 1); + + // Check child run result + const completed_child = (try self.store.getRun(alloc, child_id)).?; + if (!std.mem.eql(u8, completed_child.status, "completed")) { + const child_error = completed_child.error_text orelse "subgraph did not complete"; + try self.store.updateStepStatus(step_id, "failed", null, null, child_error, 1); + return TaskNodeResult{ .failed = child_error }; + } + + // Extract output_key from child's final state + const output_key = getNodeField(alloc, node_json, "output_key") orelse "output"; + const child_final_state = completed_child.state_json orelse "{}"; + + // Get the value at output_key from child state + const output_path = try std.fmt.allocPrint(alloc, "state.{s}", .{output_key}); + const output_value = state_mod.getStateValue(alloc, child_final_state, output_path) catch null; + + // Build state_updates: {output_key: value} + const state_updates = if (output_value) |val| + try std.fmt.allocPrint(alloc, "{{\"{s}\":{s}}}", .{ output_key, val }) + else + try std.fmt.allocPrint(alloc, "{{\"{s}\":null}}", .{output_key}); - // No wait configuration -- fail - try self.failStepWithError(alloc, run_row, step, "wait step missing duration_ms, until_ms, or signal"); - } + try self.store.updateStepStatus(step_id, "completed", null, state_updates, null, 1); + try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}"); - // ── executeRouterStep ──────────────────────────────────────────── + log.info("subgraph node {s} completed (child run {s})", .{ node_name, child_id }); + return TaskNodeResult{ .completed = .{ .state_updates = state_updates } }; + } - fn executeRouterStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, all_steps: []const types.StepRow) !void { - // 1. Get dependency output - const deps = try self.store.getStepDeps(alloc, step.id); - if (deps.len == 0) { - try self.store.updateStepStatus(step.id, "failed", null, null, "router has no dependencies", step.attempt); - return; - } + // ── executeSendNode ────────────────────────────────────────────── - const dep_step = (try self.store.getStep(alloc, deps[0])) orelse { - try self.store.updateStepStatus(step.id, "failed", null, null, "dependency step not found", step.attempt); - return; + fn executeSendNode(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, runtime: RuntimeBindings, node_name: []const u8, node_json: []const u8, state_json: []const u8) !SendNodeResult { + // Read items_key state path, with items_from kept as a legacy alias. + const items_path = getSendItemsPath(alloc, node_json) orelse { + log.warn("send node {s} missing items_key/items_from", .{node_name}); + return SendNodeResult{ .state_updates = null }; }; - const dep_output = extractOutputField(alloc, dep_step.output_json orelse "") catch ""; - // 2. Parse routes from workflow definition (routes is a JSON object, not a string) - const routes_str = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "routes") orelse { - try self.store.updateStepStatus(step.id, "failed", null, null, "router missing routes", step.attempt); - return; + // Get the target_node + const target_node = getNodeField(alloc, node_json, "target_node") orelse { + log.warn("send node {s} missing target_node", .{node_name}); + return SendNodeResult{ .state_updates = null }; }; - const default_target = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "default"); - - // 3. Parse routes JSON object and find match - var matched_target: ?[]const u8 = null; - var all_targets: std.ArrayListUnmanaged([]const u8) = .empty; - - const parsed = std.json.parseFromSlice(std.json.Value, alloc, routes_str, .{}) catch { - try self.store.updateStepStatus(step.id, "failed", null, null, "invalid routes JSON", step.attempt); - return; + // Get target node definition from workflow + const target_json = getNodeJson(alloc, run_row.workflow_json, target_node) orelse { + log.warn("send node {s} target {s} not found", .{ node_name, target_node }); + return SendNodeResult{ .state_updates = null }; }; - if (parsed.value == .object) { - var it = parsed.value.object.iterator(); - while (it.next()) |entry| { - const target = switch (entry.value_ptr.*) { - .string => |s| s, - else => continue, - }; - try all_targets.append(alloc, target); - - if (matched_target == null) { - // Check if dep_output contains the route key - if (std.mem.indexOf(u8, dep_output, entry.key_ptr.*) != null) { - matched_target = target; - } - } - } + // Read items from state + const items_json = state_mod.getStateValue(alloc, state_json, items_path) catch null; + if (items_json == null) { + log.warn("send node {s}: no items at path {s}", .{ node_name, items_path }); + return SendNodeResult{ .state_updates = null }; } - // 4. Use default if no match - if (matched_target == null) { - matched_target = default_target; + // Parse items as array + const items_parsed = json.parseFromSlice(json.Value, alloc, items_json.?, .{}) catch { + log.warn("send node {s}: items not valid JSON", .{node_name}); + return SendNodeResult{ .state_updates = null }; + }; + if (items_parsed.value != .array) { + log.warn("send node {s}: items not an array", .{node_name}); + return SendNodeResult{ .state_updates = null }; } - if (matched_target == null) { - try self.store.updateStepStatus(step.id, "failed", null, null, "no matching route and no default", step.attempt); - return; - } + // Build worker list once before iterating items + const worker_infos = try self.buildWorkerInfos(alloc); + const required_tags = getNodeTags(alloc, target_json); - // 5. Check if matched target is a backward edge (cycle) - const cycle_handled = try self.handleCycleBack(alloc, run_row, step, matched_target.?, all_steps); - if (cycle_handled) return; // Cycle was handled, step is already completed + // For each item, execute the target node + var results: std.ArrayListUnmanaged([]const u8) = .empty; + for (items_parsed.value.array.items, 0..) |item, idx| { + // Serialize item + const item_str = serializeJsonValue(alloc, item) catch continue; - // 6. Skip all non-matched targets - for (all_targets.items) |target| { - if (!std.mem.eql(u8, target, matched_target.?)) { - self.skipStepByDefId(alloc, all_steps, run_row.id, target) catch {}; - } - } + // Get prompt template from target node + const prompt_template = getNodeField(alloc, target_json, "prompt_template") orelse continue; - // 7. Mark router completed - const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"routed\",\"routed_to\":\"{s}\"}}", .{matched_target.?}); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("router step {s} routed to {s}", .{ step.id, matched_target.? }); - } + // Render with item + const rendered = self.renderWorkflowTemplate(alloc, prompt_template, state_json, runtime, item_str) catch continue; - // ── executeLoopStep ───────────────────────────────────────────── - // - // First tick (step is "ready", no children exist): - // - Parse body array from workflow definition - // - Create child step instances for iteration 0 - // - Chain body steps sequentially within the iteration - // - Mark loop step as "running" - - fn executeLoopStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // Parse body array from step definition - const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse { - log.warn("no body for loop step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing body in loop definition", step.attempt); - return; - }; + const selected_worker = try dispatch.selectWorker(alloc, worker_infos, required_tags); + if (selected_worker == null) { + try results.append(alloc, "null"); + continue; + } + const worker = selected_worker.?; - const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch { - try self.store.updateStepStatus(step.id, "failed", null, null, "invalid body JSON in loop definition", step.attempt); - return; - }; + // Create child step + const child_step_id_buf = ids.generateId(); + const child_step_id = try alloc.dupe(u8, &child_step_id_buf); + const child_def_id = try std.fmt.allocPrint(alloc, "{s}_{d}", .{ node_name, idx }); + try self.store.insertStep(child_step_id, run_row.id, child_def_id, "task", "running", item_str, 1, null, null, @as(?i64, @intCast(idx))); + try self.store.insertEvent(run_row.id, child_step_id, "step.running", "{}"); - if (body_parsed.value != .array or body_parsed.value.array.items.len == 0) { - try self.store.updateStepStatus(step.id, "failed", null, null, "body must be a non-empty array", step.attempt); - return; + const dr = try dispatch.dispatchStep( + alloc, + worker.url, + worker.token, + worker.protocol, + worker.model, + run_row.id, + child_step_id, + rendered, + ); + + if (dr.success) { + const output_json = try wrapOutput(alloc, dr.output); + try self.store.updateStepStatus(child_step_id, "completed", worker.id, output_json, null, 1); + try self.store.insertEvent(run_row.id, child_step_id, "step.completed", "{}"); + try results.append(alloc, try jsonStringify(alloc, dr.output)); + } else { + try self.store.updateStepStatus(child_step_id, "failed", worker.id, null, dr.error_text, 1); + try results.append(alloc, "null"); + } } - const body_items = body_parsed.value.array.items; + // Build state_updates from collected results + const results_json = try serializeStringArray(alloc, results.items); + const output_key = getNodeField(alloc, node_json, "output_key") orelse "send_results"; + const state_updates = try std.fmt.allocPrint(alloc, "{{\"{s}\":{s}}}", .{ output_key, results_json }); - // Create child steps for iteration 0 - try self.createLoopIterationChildren(alloc, run_row, step, body_items, 0); + // Create parent step record + const step_id_buf = ids.generateId(); + const step_id = try alloc.dupe(u8, &step_id_buf); + try self.store.insertStep(step_id, run_row.id, node_name, "send", "completed", "{}", 1, null, null, null); + try self.store.updateStepStatus(step_id, "completed", null, state_updates, null, 1); + try self.store.insertEvent(run_row.id, step_id, "step.completed", "{}"); - // Mark loop step as "running" - try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.running", "{}"); - log.info("loop step {s} started iteration 0", .{step.id}); + return SendNodeResult{ .state_updates = state_updates }; } - // ── pollRunningLoopStep ───────────────────────────────────────── - // - // Checks progress of a running loop step each tick: - // - Find current iteration (max iteration_index) - // - Check if all children in current iteration are done - // - If any failed -> loop fails - // - If all done: evaluate exit_condition - // - If met -> loop completes - // - If max_iterations reached -> loop completes - // - Else -> create next iteration - - fn pollRunningLoopStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // Get all children of this loop step - const children = try self.store.getChildSteps(alloc, step.id); - if (children.len == 0) return; // No children yet, wait - - // Find the current (max) iteration_index - var max_iter: i64 = 0; - for (children) |child| { - if (child.iteration_index > max_iter) { - max_iter = child.iteration_index; - } - } + fn renderWorkflowTemplate( + self: *Engine, + alloc: std.mem.Allocator, + template: []const u8, + state_json: []const u8, + runtime: RuntimeBindings, + item_json: ?[]const u8, + ) ![]const u8 { + return templates.renderTemplateWithStore(alloc, template, state_json, runtime.input_json, item_json, runtime.storeAccess(self.store_fetcher)); + } - // Check if all children in the current iteration are in terminal states - var all_done = true; - var any_failed = false; - var last_child_output: ?[]const u8 = null; + fn buildRuntimeBindings(self: *Engine, alloc: std.mem.Allocator, workflow_json: []const u8, state_json: []const u8, input_json: ?[]const u8) RuntimeBindings { + return .{ + .input_json = input_json, + .task_id = getRuntimeStringSetting(alloc, state_json, workflow_json, &.{"task_id"}), + .tracker = if (self.trusted_tracker_url) |base_url| + .{ + .base_url = base_url, + .api_token = self.trusted_tracker_api_token, + } + else + null, + }; + } - for (children) |child| { - if (child.iteration_index != max_iter) continue; + fn applyStoreUpdates(self: *Engine, alloc: std.mem.Allocator, state_json: []const u8, store_updates_json: []const u8, runtime: RuntimeBindings) !void { + const access = runtime.storeAccess(self.store_fetcher) orelse return error.StoreNotConfigured; + const parsed = try json.parseFromSlice(json.Value, alloc, store_updates_json, .{}); - if (std.mem.eql(u8, child.status, "failed")) { - any_failed = true; - continue; - } - if (std.mem.eql(u8, child.status, "completed") or std.mem.eql(u8, child.status, "skipped")) { - // Track the last completed child's output (by item_index order) - if (child.output_json != null) { - last_child_output = child.output_json; + switch (parsed.value) { + .object => try self.applySingleStoreUpdate(alloc, access, state_json, parsed.value.object), + .array => |arr| { + for (arr.items) |item| { + if (item != .object) return error.InvalidStoreUpdates; + try self.applySingleStoreUpdate(alloc, access, state_json, item.object); } - continue; - } - // Still pending/ready/running - all_done = false; + }, + else => return error.InvalidStoreUpdates, } + } - if (!all_done) return; // Not done yet, wait - - if (any_failed) { - // Loop fails if any child fails - try self.store.updateStepStatus(step.id, "failed", null, null, "loop child step failed", step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.info("loop step {s} failed (child failed)", .{step.id}); - return; - } + fn applySingleStoreUpdate(self: *Engine, alloc: std.mem.Allocator, access: templates.StoreAccess, state_json: []const u8, obj: json.ObjectMap) !void { + const namespace_val = obj.get("namespace") orelse return error.InvalidStoreUpdates; + const key_val = obj.get("key") orelse return error.InvalidStoreUpdates; + const value_val = obj.get("value") orelse return error.InvalidStoreUpdates; - // All children in current iteration are done. Evaluate exit_condition. - const exit_condition = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "exit_condition"); - const max_iterations = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "max_iterations") orelse 10; + if (namespace_val != .string or key_val != .string) return error.InvalidStoreUpdates; - // Extract output text from last child for condition matching - const last_output_text = if (last_child_output) |oj| - (extractOutputField(alloc, oj) catch oj) - else - ""; + const value_json = try resolveStoreUpdateValue(alloc, state_json, value_val); + try self.store_writer(alloc, access.base_url, access.api_token, namespace_val.string, key_val.string, value_json); + } - // Check exit condition (substring match, same as condition step) - const condition_met = if (exit_condition) |cond| - std.mem.indexOf(u8, last_output_text, cond) != null - else - false; - - if (condition_met) { - // Exit condition met -- loop completes with last child's output - const output = last_child_output orelse try wrapOutput(alloc, "loop completed"); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("loop step {s} completed (exit condition met at iteration {d})", .{ step.id, max_iter }); - return; - } + // ── Async polling ──────────────────────────────────────────────── - // Check if max_iterations reached - if (max_iter + 1 >= max_iterations) { - // Max iterations reached -- loop completes with last child's output - const output = last_child_output orelse try wrapOutput(alloc, "loop completed (max iterations)"); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("loop step {s} completed (max iterations {d} reached)", .{ step.id, max_iterations }); - return; - } + fn pollAsyncTaskStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { + const input_json = step.input_json; + if (input_json.len == 0) return; - // Create next iteration - const next_iter = max_iter + 1; + const parsed = json.parseFromSlice(json.Value, alloc, input_json, .{}) catch return; + if (parsed.value != .object) return; - // Re-parse body to get the body step def IDs - const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse return; - const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch return; - if (body_parsed.value != .array) return; - const body_items = body_parsed.value.array.items; - - try self.createLoopIterationChildren(alloc, run_row, step, body_items, next_iter); - log.info("loop step {s} started iteration {d}", .{ step.id, next_iter }); - } - - /// Create child steps for one iteration of a loop. - fn createLoopIterationChildren(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, loop_step: types.StepRow, body_items: []const std.json.Value, iteration: i64) !void { - var prev_child_id: ?[]const u8 = null; - - for (body_items, 0..) |body_item, i| { - // Each body_item should be a string (step def ID) - const body_def_id = switch (body_item) { - .string => |s| s, - else => continue, - }; - - // Look up the body step's type from the workflow definition - const body_step_type = try getStepField(alloc, run_row.workflow_json, body_def_id, "type") orelse "task"; - - // Generate unique child step ID - const child_id_buf = ids.generateId(); - const child_id = try alloc.dupe(u8, &child_id_buf); - - // First step in chain is "ready", rest are "pending" - const initial_status: []const u8 = if (i == 0) "ready" else "pending"; - const idx: i64 = @intCast(i); + const async_flag = parsed.value.object.get("async_pending") orelse return; + if (async_flag != .bool or !async_flag.bool) return; - try self.store.insertStepWithIteration( - child_id, - run_row.id, - body_def_id, // original def_step_id for template/tag lookup - body_step_type, - initial_status, - "{}", // input_json - 1, // max_attempts - null, // timeout_ms - loop_step.id, // parent_step_id - idx, // item_index (position in body) - iteration, // iteration_index - ); + const corr_val = parsed.value.object.get("correlation_id") orelse return; + if (corr_val != .string) return; + const correlation_id = corr_val.string; - // Chain: this step depends on previous step in the body - if (prev_child_id) |prev_id| { - try self.store.insertStepDep(child_id, prev_id); + const queue = self.response_queue orelse return; + const response = queue.take(correlation_id) orelse { + if (step.timeout_ms) |timeout_ms| { + if (step.started_at_ms) |started_at| { + const elapsed = ids.nowMs() - started_at; + if (elapsed > timeout_ms) { + const err_text = try std.fmt.allocPrint(alloc, "async step timed out after {d}ms", .{timeout_ms}); + try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt); + try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); + if (self.metrics) |m| { + metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total); + } + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); + log.err("async step {s} timed out", .{step.id}); + } + } } - - prev_child_id = child_id; - } - } - - // ── executeSubWorkflowStep ────────────────────────────────────── - // - // First tick (step is "ready", child_run_id is null): - // - Get nested workflow definition - // - Create a child run with the nested workflow - // - Create child run's steps - // - Store child_run_id on the parent step - // - Mark step as "running" - - fn executeSubWorkflowStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Get nested workflow definition from the step def - const workflow_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "workflow") orelse { - log.warn("no workflow for sub_workflow step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing workflow in sub_workflow definition", step.attempt); - return; - }; - - // 2. Parse the nested workflow to extract steps - const nested_parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_raw, .{}) catch { - try self.store.updateStepStatus(step.id, "failed", null, null, "invalid workflow JSON in sub_workflow definition", step.attempt); return; }; - if (nested_parsed.value != .object) { - try self.store.updateStepStatus(step.id, "failed", null, null, "workflow must be a JSON object", step.attempt); - return; + if (response.success) { + const output_json = try wrapOutput(alloc, response.output); + try self.store.updateStepStatus(step.id, "completed", step.worker_id, output_json, null, step.attempt); + try self.store.insertEvent(run_row.id, step.id, "step.completed", "{}"); + if (step.worker_id) |wid| { + try self.store.markWorkerSuccess(wid, ids.nowMs()); + } + if (self.metrics) |m| { + metrics_mod.Metrics.incr(&m.worker_dispatch_success_total); + } + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output_json, self.metrics); + log.info("async step {s} completed", .{step.id}); + } else { + const err_text = response.error_text orelse "async dispatch failed"; + try self.store.updateStepStatus(step.id, "failed", step.worker_id, null, err_text, step.attempt); + try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); + if (step.worker_id) |wid| { + const now_ms = ids.nowMs(); + const circuit_until = now_ms + self.runtime_cfg.worker_circuit_breaker_ms; + try self.store.markWorkerFailure(wid, err_text, now_ms, self.runtime_cfg.worker_failure_threshold, circuit_until); + } + if (self.metrics) |m| { + metrics_mod.Metrics.incr(&m.worker_dispatch_failure_total); + } + callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); + log.err("async step {s} failed: {s}", .{ step.id, err_text }); } + } - const nested_steps_val = nested_parsed.value.object.get("steps") orelse { - try self.store.updateStepStatus(step.id, "failed", null, null, "workflow missing steps array", step.attempt); - return; - }; - if (nested_steps_val != .array or nested_steps_val.array.items.len == 0) { - try self.store.updateStepStatus(step.id, "failed", null, null, "workflow steps must be a non-empty array", step.attempt); - return; - } + /// Merge async_pending + correlation_id into existing input_json. + fn mergeAsyncState(alloc: std.mem.Allocator, existing_input: []const u8, correlation_id: []const u8) ![]const u8 { + var obj = json.ObjectMap.init(alloc); - // 3. Build input for child run from input_mapping (optional) - var child_input_json: []const u8 = run_row.input_json; - if (try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "input_mapping")) |mapping_raw| { - const mapping_parsed = std.json.parseFromSlice(std.json.Value, alloc, mapping_raw, .{}) catch null; - if (mapping_parsed) |mp| { - if (mp.value == .object) { - // Render each value in the mapping using template context - const ctx = try buildTemplateContext(alloc, run_row, step, self.store); - var result_buf: std.ArrayListUnmanaged(u8) = .empty; - try result_buf.append(alloc, '{'); - var first = true; - var it = mp.value.object.iterator(); + if (existing_input.len > 0) { + const p = json.parseFromSlice(json.Value, alloc, existing_input, .{}) catch null; + if (p) |parsed| { + if (parsed.value == .object) { + var it = parsed.value.object.iterator(); while (it.next()) |entry| { - if (!first) try result_buf.append(alloc, ','); - first = false; - // Write key - try result_buf.append(alloc, '"'); - try result_buf.appendSlice(alloc, entry.key_ptr.*); - try result_buf.appendSlice(alloc, "\":"); - // Render value as template if it's a string - if (entry.value_ptr.* == .string) { - const rendered = templates.render(alloc, entry.value_ptr.string, ctx) catch entry.value_ptr.string; - try result_buf.append(alloc, '"'); - for (rendered) |ch| { - switch (ch) { - '"' => try result_buf.appendSlice(alloc, "\\\""), - '\\' => try result_buf.appendSlice(alloc, "\\\\"), - '\n' => try result_buf.appendSlice(alloc, "\\n"), - '\r' => try result_buf.appendSlice(alloc, "\\r"), - '\t' => try result_buf.appendSlice(alloc, "\\t"), - else => try result_buf.append(alloc, ch), - } - } - try result_buf.append(alloc, '"'); - } else { - // Non-string values: serialize as-is - var out: std.io.Writer.Allocating = .init(alloc); - var jw: std.json.Stringify = .{ .writer = &out.writer }; - jw.write(entry.value_ptr.*) catch {}; - const serialized = out.toOwnedSlice() catch "null"; - try result_buf.appendSlice(alloc, serialized); - } + try obj.put(entry.key_ptr.*, entry.value_ptr.*); } - try result_buf.append(alloc, '}'); - child_input_json = try result_buf.toOwnedSlice(alloc); } } } - // 4. Create child run - const child_run_id_buf = ids.generateId(); - const child_run_id = try alloc.dupe(u8, &child_run_id_buf); - - // Build the child workflow_json: wrap the nested workflow with its steps - // The child run's workflow_json should be the workflow_raw itself - try self.store.insertRun(child_run_id, null, "running", workflow_raw, child_input_json, run_row.callbacks_json); - - // 5. Create child run's steps from the nested workflow definition - const nested_steps = nested_steps_val.array.items; - - // Build mapping from def_step_id -> generated step_id - var def_ids: std.ArrayListUnmanaged([]const u8) = .empty; - var gen_ids: std.ArrayListUnmanaged([]const u8) = .empty; - - // First pass: create all steps - for (nested_steps) |step_val| { - if (step_val != .object) continue; - const step_obj = step_val.object; - - const def_step_id = if (step_obj.get("id")) |id_val| blk: { - if (id_val == .string) break :blk id_val.string; - break :blk null; - } else null; - if (def_step_id == null) continue; - - const step_type_str = if (step_obj.get("type")) |t| blk: { - if (t == .string) break :blk t.string; - break :blk "task"; - } else "task"; - - const child_step_id_buf = ids.generateId(); - const child_step_id = try alloc.dupe(u8, &child_step_id_buf); + try obj.put("async_pending", .{ .bool = true }); + try obj.put("correlation_id", .{ .string = correlation_id }); - // Determine initial status - const has_deps = if (step_obj.get("depends_on")) |deps| blk: { - if (deps == .array and deps.array.items.len > 0) break :blk true; - break :blk false; - } else false; - const initial_status: []const u8 = if (has_deps) "pending" else "ready"; + return json.Stringify.valueAlloc(alloc, json.Value{ .object = obj }, .{}); + } +}; - try self.store.insertStep( - child_step_id, - child_run_id, - def_step_id.?, - step_type_str, - initial_status, - "{}", - 1, // max_attempts - null, // timeout_ms - null, // parent_step_id - null, // item_index - ); +// ── findReadyNodes ────────────────────────────────────────────────── + +/// Find nodes that are ready to execute. +/// A node is ready when ALL its inbound edges have their source in completed_nodes. +/// __start__ is always "completed" (synthetic). +/// For conditional edges "source:value", the source is just "source" (strip after `:`) +/// and the edge is only satisfied if route_results[source] == value. +pub fn findReadyNodes( + alloc: std.mem.Allocator, + workflow_json: []const u8, + completed_nodes: *std.StringHashMap(void), + route_results: *std.StringHashMap([]const u8), +) ![]const []const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch { + return &.{}; + }; + return findReadyNodesFromRoot(alloc, parsed.value, completed_nodes, route_results); +} - try def_ids.append(alloc, def_step_id.?); - try gen_ids.append(alloc, child_step_id); - } +fn findReadyNodesFromRoot( + alloc: std.mem.Allocator, + root: json.Value, + completed_nodes: *std.StringHashMap(void), + route_results: *std.StringHashMap([]const u8), +) ![]const []const u8 { + if (root != .object) return &.{}; - // Second pass: insert step dependencies - for (nested_steps) |step_val| { - if (step_val != .object) continue; - const step_obj = step_val.object; + // Get edges array + const edges_val = root.object.get("edges") orelse return &.{}; + if (edges_val != .array) return &.{}; - const def_step_id = if (step_obj.get("id")) |id_val| blk: { - if (id_val == .string) break :blk id_val.string; - break :blk null; - } else null; - if (def_step_id == null) continue; + // Get all node names from "nodes" object + const nodes_val = root.object.get("nodes") orelse return &.{}; + if (nodes_val != .object) return &.{}; - // Find generated step_id - const gen_step_id = lookupId(def_ids.items, gen_ids.items, def_step_id.?) orelse continue; + // Build inbound edge map: target -> list of (source, condition_value?) + const EdgeInfo = struct { + source: []const u8, + condition: ?[]const u8, // null for unconditional, "value" for conditional + }; - const deps_val = step_obj.get("depends_on") orelse continue; - if (deps_val != .array) continue; + var inbound = std.StringHashMap(std.ArrayListUnmanaged(EdgeInfo)).init(alloc); + + // Also collect all target nodes mentioned in edges + for (edges_val.array.items) |edge_item| { + if (edge_item != .array) continue; + if (edge_item.array.items.len < 2) continue; + + const source_raw = if (edge_item.array.items[0] == .string) edge_item.array.items[0].string else continue; + const target = if (edge_item.array.items[1] == .string) edge_item.array.items[1].string else continue; + + // Parse source: might be "node:value" for conditional edges + var source: []const u8 = source_raw; + var condition: ?[]const u8 = null; + if (std.mem.indexOfScalar(u8, source_raw, ':')) |colon_pos| { + source = source_raw[0..colon_pos]; + condition = source_raw[colon_pos + 1 ..]; + } + + var entry = inbound.getPtr(target); + if (entry == null) { + try inbound.put(target, std.ArrayListUnmanaged(EdgeInfo){}); + entry = inbound.getPtr(target); + } + try entry.?.append(alloc, .{ + .source = source, + .condition = condition, + }); + } + + // Detect dead nodes: nodes that are unreachable because a conditional + // edge was not taken. A node is dead if ALL its inbound edges are + // conditional and none match the route result. Dead nodes propagate: + // any node whose only inbound edges come from dead nodes is also dead. + var dead_nodes = std.StringHashMap(void).init(alloc); + + // Iterative dead node detection (propagate through the graph) + var changed = true; + while (changed) { + changed = false; + var dead_it = inbound.iterator(); + while (dead_it.next()) |kv| { + const target = kv.key_ptr.*; + const edges = kv.value_ptr.items; + + if (dead_nodes.get(target) != null) continue; + if (completed_nodes.get(target) != null) continue; + + var all_dead_or_unsat = true; + for (edges) |edge| { + if (std.mem.eql(u8, edge.source, "__start__")) { + // __start__ is never dead + all_dead_or_unsat = false; + break; + } - for (deps_val.array.items) |dep_item| { - if (dep_item != .string) continue; - const dep_gen_id = lookupId(def_ids.items, gen_ids.items, dep_item.string) orelse continue; - try self.store.insertStepDep(gen_step_id, dep_gen_id); + // If source is dead, this edge is dead + if (dead_nodes.get(edge.source) != null) continue; + + if (edge.condition) |cond| { + // Conditional edge: check if source completed and condition matched + if (completed_nodes.get(edge.source) != null) { + if (route_results.get(edge.source)) |actual| { + if (std.mem.eql(u8, actual, cond)) { + // This edge IS satisfied + all_dead_or_unsat = false; + break; + } + } + // Source completed but condition didn't match -> dead edge + } else { + // Source not completed yet and not dead -> not dead yet + all_dead_or_unsat = false; + break; + } + } else { + // Non-conditional edge from a live, non-dead source + all_dead_or_unsat = false; + break; + } } - } - - // 6. Store child_run_id on the parent step - try self.store.updateStepChildRunId(step.id, child_run_id); - - // 7. Mark sub_workflow step as "running" - try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.running", "{}"); - log.info("sub_workflow step {s} created child run {s}", .{ step.id, child_run_id }); - } - - // ── pollRunningSubWorkflowStep ────────────────────────────────── - // - // Checks the child run's status each tick: - // - If completed -> mark parent step completed with child's output - // - If failed -> mark parent step failed - // - Otherwise -> wait - - fn pollRunningSubWorkflowStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - const child_run_id = step.child_run_id orelse return; // No child run yet - // Get child run - const child_run = (try self.store.getRun(alloc, child_run_id)) orelse { - try self.store.updateStepStatus(step.id, "failed", null, null, "child run not found", step.attempt); - return; - }; - - if (std.mem.eql(u8, child_run.status, "completed")) { - // Get the child run's last completed step output - const child_steps = try self.store.getStepsByRun(alloc, child_run_id); - var last_output: ?[]const u8 = null; - for (child_steps) |cs| { - if (std.mem.eql(u8, cs.status, "completed") and cs.output_json != null) { - last_output = cs.output_json; - } + if (all_dead_or_unsat) { + try dead_nodes.put(target, {}); + changed = true; } - const output = last_output orelse try wrapOutput(alloc, "sub_workflow completed"); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("sub_workflow step {s} completed (child run {s})", .{ step.id, child_run_id }); - } else if (std.mem.eql(u8, child_run.status, "failed")) { - const err_text = child_run.error_text orelse "child run failed"; - try self.store.updateStepStatus(step.id, "failed", null, null, err_text, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.info("sub_workflow step {s} failed (child run {s})", .{ step.id, child_run_id }); } - // Otherwise: child run still in progress, wait } - // ── executeDebateStep ────────────────────────────────────────── - // - // Phase 1 (step is "ready"): Create N participant child steps - // Phase 2 (step is "running"): polled by pollRunningDebateStep - - fn executeDebateStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Parse count from workflow_json - const count_val = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "count") orelse { - log.warn("no count for debate step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing count in debate definition", step.attempt); - return; - }; - const count: usize = @intCast(count_val); - - // 2. Get prompt_template and render it - const prompt_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template") orelse { - log.warn("no prompt_template for debate step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing prompt_template in debate definition", step.attempt); - return; - }; - - const ctx = try buildTemplateContext(alloc, run_row, step, self.store); - const rendered_prompt = templates.render(alloc, prompt_template, ctx) catch |err| { - log.err("template render failed for debate step {s}: {}", .{ step.id, err }); - try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt); - return; - }; - - // 3. Create N participant child steps - for (0..count) |i| { - const child_id_buf = ids.generateId(); - const child_id = try alloc.dupe(u8, &child_id_buf); - const child_def_id = try std.fmt.allocPrint(alloc, "{s}_participant_{d}", .{ step.def_step_id, i }); - const idx: i64 = @intCast(i); + // Find ready nodes: for each node, check if all inbound edges are satisfied + // (treating dead source nodes as satisfied) + var ready: std.ArrayListUnmanaged([]const u8) = .empty; - // Store rendered prompt in input_json so participant children can be dispatched. - const input_json = try buildRenderedPromptInputJson(alloc, rendered_prompt); + var inbound_it = inbound.iterator(); + while (inbound_it.next()) |kv| { + const target = kv.key_ptr.*; + const edges = kv.value_ptr.items; - try self.store.insertStep( - child_id, - run_row.id, - child_def_id, - "task", - "ready", - input_json, - step.max_attempts, - step.timeout_ms, - step.id, // parent_step_id - idx, - ); - log.info("created debate participant child step {s} (index {d})", .{ child_id, i }); - } + // Skip if already completed or dead + if (completed_nodes.get(target) != null) continue; + if (dead_nodes.get(target) != null) continue; - // 4. Mark debate step as "running" - try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.running", "{}"); - log.info("debate step {s} started with {d} participants", .{ step.id, count }); - } + var all_satisfied = true; + var any_conditional_edge = false; + var any_conditional_satisfied = false; - // ── pollRunningDebateStep ──────────────────────────────────────── - // - // Checks if all participant children are done, then dispatches judge. + for (edges) |edge| { + // __start__ is always satisfied + if (std.mem.eql(u8, edge.source, "__start__")) continue; - fn pollRunningDebateStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - const children = try self.store.getChildSteps(alloc, step.id); - if (children.len == 0) return; + // Dead sources are considered satisfied (their branch was skipped) + if (dead_nodes.get(edge.source) != null) continue; - // Separate participants from judge child - var participants: std.ArrayListUnmanaged(types.StepRow) = .empty; - var judge_child: ?types.StepRow = null; + const source_completed = completed_nodes.get(edge.source) != null; - for (children) |child| { - if (std.mem.indexOf(u8, child.def_step_id, "_judge") != null) { - judge_child = child; - } else { - try participants.append(alloc, child); + if (!source_completed) { + all_satisfied = false; + break; } - } - // Check if judge child exists and is terminal - if (judge_child) |judge| { - if (std.mem.eql(u8, judge.status, "completed")) { - // Debate completes with judge output - const output = judge.output_json orelse try wrapOutput(alloc, "debate completed"); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("debate step {s} completed (judge decided)", .{step.id}); - return; - } else if (std.mem.eql(u8, judge.status, "failed")) { - const err_text = judge.error_text orelse "judge failed"; - try self.store.updateStepStatus(step.id, "failed", null, null, err_text, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.info("debate step {s} failed (judge failed)", .{step.id}); - return; + if (edge.condition) |cond| { + any_conditional_edge = true; + if (route_results.get(edge.source)) |actual| { + if (std.mem.eql(u8, actual, cond)) { + any_conditional_satisfied = true; + } + } } - // Judge still in progress, wait - return; } - // No judge child yet — check if all participants are done - var all_done = true; - var any_failed = false; - for (participants.items) |child| { - if (std.mem.eql(u8, child.status, "failed")) { - any_failed = true; - continue; - } - if (!std.mem.eql(u8, child.status, "completed") and !std.mem.eql(u8, child.status, "skipped")) { - all_done = false; - } - } + if (!all_satisfied) continue; - if (!all_done) return; // Still waiting for participants + // If there are conditional edges, at least one must be satisfied + if (any_conditional_edge and !any_conditional_satisfied) continue; - if (any_failed) { - try self.store.updateStepStatus(step.id, "failed", null, null, "debate participant failed", step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - return; - } + try ready.append(alloc, target); + } - // All participants done — collect outputs and create judge child - var response_items: std.ArrayListUnmanaged([]const u8) = .empty; - for (participants.items) |child| { - if (child.output_json) |oj| { - const extracted = extractOutputField(alloc, oj) catch oj; - try response_items.append(alloc, extracted); - } else { - try response_items.append(alloc, ""); - } - } + return ready.toOwnedSlice(alloc); +} - // Build debate_responses as JSON array - const debate_responses = try serializeStringArray(alloc, response_items.items); - - // Get judge_template - const judge_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "judge_template") orelse { - // No judge template — complete with collected responses - const output = try wrapOutput(alloc, debate_responses); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("debate step {s} completed (no judge template, returning responses)", .{step.id}); - return; - }; +// ── Workflow JSON Helpers ──────────────────────────────────────────── - // Render judge_template: replace {{debate_responses}} with actual responses - // Simple string replacement since it's a special variable - var rendered_judge_prompt: []const u8 = judge_template; - if (std.mem.indexOf(u8, judge_template, "{{debate_responses}}")) |_| { - rendered_judge_prompt = try std.mem.replaceOwned(u8, alloc, judge_template, "{{debate_responses}}", debate_responses); - } +/// Get the JSON string for a specific node from workflow_json. +/// Workflow format: {"nodes": {"node_name": {...}}, "edges": [...]} +fn getNodeJson(alloc: std.mem.Allocator, workflow_json: []const u8, node_name: []const u8) ?[]const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return null; + return getNodeJsonFromRoot(alloc, parsed.value, node_name); +} - // Create judge child step with rendered prompt in input_json - const judge_id_buf = ids.generateId(); - const judge_id = try alloc.dupe(u8, &judge_id_buf); - const judge_def_id = try std.fmt.allocPrint(alloc, "{s}_judge", .{step.def_step_id}); +fn getNodeJsonFromRoot(alloc: std.mem.Allocator, root: json.Value, node_name: []const u8) ?[]const u8 { + if (root != .object) return null; - const judge_input = try buildRenderedPromptInputJson(alloc, rendered_judge_prompt); - const judge_idx: i64 = @intCast(participants.items.len); + const nodes = root.object.get("nodes") orelse return null; + if (nodes != .object) return null; - try self.store.insertStep( - judge_id, - run_row.id, - judge_def_id, - "task", - "ready", - judge_input, - step.max_attempts, - step.timeout_ms, - step.id, // parent_step_id - judge_idx, - ); + const node = nodes.object.get(node_name) orelse return null; + return serializeJsonValue(alloc, node) catch null; +} - log.info("debate step {s} created judge child {s}", .{ step.id, judge_id }); - } +fn workflowHasNode(root: json.Value, node_name: []const u8) bool { + if (root != .object) return false; + const nodes = root.object.get("nodes") orelse return false; + if (nodes != .object) return false; + return nodes.object.get(node_name) != null; +} - // ── executeGroupChatStep ───────────────────────────────────────── - // - // First tick: parse participants, mark as running, start round 1. - // Dispatch is attempted but may fail (no workers in test). +/// Get a string field from a node's JSON. +fn getNodeField(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?[]const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null; + if (parsed.value != .object) return null; + const val = parsed.value.object.get(field) orelse return null; + if (val == .string) return alloc.dupe(u8, val.string) catch null; + return serializeJsonValue(alloc, val) catch null; +} - fn executeGroupChatStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Parse participants from workflow_json - const participants_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "participants") orelse { - log.warn("no participants for group_chat step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing participants in group_chat definition", step.attempt); - return; - }; +/// Get the state schema JSON from a workflow definition. +/// Looks up "state_schema" first (canonical key used by API/validation), +/// then falls back to "schema" for inline workflow definitions in tests. +fn getSchemaJson(alloc: std.mem.Allocator, workflow_json: []const u8) []const u8 { + return getWorkflowField(alloc, workflow_json, "state_schema") orelse + getWorkflowField(alloc, workflow_json, "schema") orelse + "{}"; +} - const parsed_participants = std.json.parseFromSlice(std.json.Value, alloc, participants_raw, .{}) catch { - try self.store.updateStepStatus(step.id, "failed", null, null, "invalid participants JSON", step.attempt); - return; - }; +/// Get a top-level field from workflow_json. +fn getWorkflowField(alloc: std.mem.Allocator, workflow_json: []const u8, field: []const u8) ?[]const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return null; + if (parsed.value != .object) return null; + const val = parsed.value.object.get(field) orelse return null; + if (val == .string) return alloc.dupe(u8, val.string) catch null; + return serializeJsonValue(alloc, val) catch null; +} - if (parsed_participants.value != .array or parsed_participants.value.array.items.len == 0) { - try self.store.updateStepStatus(step.id, "failed", null, null, "participants must be a non-empty array", step.attempt); - return; - } +fn getRuntimeStringSetting( + alloc: std.mem.Allocator, + state_json: []const u8, + workflow_json: []const u8, + field_names: []const []const u8, +) ?[]const u8 { + for (field_names) |field_name| { + if (getConfigString(alloc, state_json, field_name)) |value| return value; + } + for (field_names) |field_name| { + if (getWorkflowField(alloc, workflow_json, field_name)) |value| return value; + } + return null; +} - // 2. Get prompt_template for round 1 - const prompt_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "prompt_template") orelse { - try self.store.updateStepStatus(step.id, "failed", null, null, "missing prompt_template in group_chat definition", step.attempt); - return; - }; +fn getConfigString(alloc: std.mem.Allocator, state_json: []const u8, field_name: []const u8) ?[]const u8 { + const path = std.fmt.allocPrint(alloc, "state.__config.{s}", .{field_name}) catch return null; + defer alloc.free(path); - // 3. Render prompt template - const ctx = try buildTemplateContext(alloc, run_row, step, self.store); - const rendered_prompt = templates.render(alloc, prompt_template, ctx) catch |err| { - log.err("template render failed for group_chat step {s}: {}", .{ step.id, err }); - try self.store.updateStepStatus(step.id, "failed", null, null, "template render failed", step.attempt); - return; - }; + const raw = state_mod.getStateValue(alloc, state_json, path) catch return null; + const raw_value = raw orelse return null; + defer alloc.free(raw_value); - // 4. Mark step as "running" - try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.running", "{}"); - - // 5. Dispatch round 1 to each participant (best-effort, failures logged) - const participant_items = parsed_participants.value.array.items; - for (participant_items) |p_val| { - if (p_val != .object) continue; - const p_obj = p_val.object; - - const role = if (p_obj.get("role")) |r| blk: { - if (r == .string) break :blk r.string; - break :blk "participant"; - } else "participant"; - - // Try to dispatch to a worker matching participant tags - const tags_val = p_obj.get("tags"); - var tag_list: std.ArrayListUnmanaged([]const u8) = .empty; - if (tags_val) |tv| { - if (tv == .array) { - for (tv.array.items) |tag_item| { - if (tag_item == .string) { - try tag_list.append(alloc, tag_item.string); - } - } - } - } + const parsed = json.parseFromSlice(json.Value, alloc, raw_value, .{}) catch return null; + defer parsed.deinit(); + if (parsed.value != .string) return null; + return alloc.dupe(u8, parsed.value.string) catch null; +} - // Get workers - const workers = try self.store.listWorkers(alloc); - var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty; - for (workers) |w| { - const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0; - try worker_infos.append(alloc, .{ - .id = w.id, - .url = w.url, - .token = w.token, - .protocol = w.protocol, - .model = w.model, - .tags_json = w.tags_json, - .max_concurrent = w.max_concurrent, - .status = w.status, - .current_tasks = current_tasks, - }); - } +fn resolveStoreUpdateValue(alloc: std.mem.Allocator, state_json: []const u8, value: json.Value) ![]const u8 { + if (value == .string and std.mem.startsWith(u8, value.string, "state.")) { + const raw = try state_mod.getStateValue(alloc, state_json, value.string); + return raw orelse try alloc.dupe(u8, "null"); + } + return serializeJsonValue(alloc, value); +} - const selected = try dispatch.selectWorker(alloc, worker_infos.items, tag_list.items); - if (selected) |worker| { - const result = try dispatch.dispatchStep( - alloc, - worker.url, - worker.token, - worker.protocol, - worker.model, - run_row.id, - step.id, - rendered_prompt, - ); - if (result.success) { - try self.store.insertChatMessage(run_row.id, step.id, 1, role, worker.id, result.output); - } else { - log.warn("group_chat dispatch failed for role {s}: {s}", .{ role, result.error_text orelse "unknown" }); - } - } else { - log.debug("no worker available for group_chat participant role {s}", .{role}); - } - } +fn putStoreValueViaHttp( + alloc: std.mem.Allocator, + base_url: []const u8, + api_token: ?[]const u8, + namespace: []const u8, + key: []const u8, + value_json: []const u8, +) !void { + var client = tracker_client.TrackerClient.init(alloc, base_url, api_token); + const ok = try client.storePutValue(namespace, key, value_json); + if (!ok) return error.StoreWriteFailed; +} - log.info("group_chat step {s} started round 1 with {d} participants", .{ step.id, participant_items.len }); +fn encodePathSegment(allocator: std.mem.Allocator, value: []const u8) ![]const u8 { + var buf: std.ArrayListUnmanaged(u8) = .empty; + errdefer buf.deinit(allocator); + + for (value) |byte| { + if ((byte >= 'A' and byte <= 'Z') or + (byte >= 'a' and byte <= 'z') or + (byte >= '0' and byte <= '9') or + byte == '-' or + byte == '_' or + byte == '.' or + byte == '~') + { + try buf.append(allocator, byte); + } else { + try buf.writer(allocator).print("%{X:0>2}", .{byte}); + } } - // ── pollRunningGroupChatStep ───────────────────────────────────── - // - // Each tick: check current round, dispatch next round or complete. - - fn pollRunningGroupChatStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Get all chat messages for this step - const messages = try self.store.getChatMessages(alloc, step.id); - - // 2. Parse configuration - const max_rounds = try getStepFieldInt(alloc, run_row.workflow_json, step.def_step_id, "max_rounds") orelse 5; - const exit_condition = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "exit_condition"); - - // 3. Parse participants to know expected count per round - const participants_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "participants") orelse return; - const parsed_participants = std.json.parseFromSlice(std.json.Value, alloc, participants_raw, .{}) catch return; - if (parsed_participants.value != .array) return; - const num_participants: i64 = @intCast(parsed_participants.value.array.items.len); - - // 4. Determine current round from messages - var current_round: i64 = 0; - var current_round_count: i64 = 0; - for (messages) |msg| { - if (msg.round > current_round) { - current_round = msg.round; - current_round_count = 1; - } else if (msg.round == current_round) { - current_round_count += 1; - } - } + return buf.toOwnedSlice(allocator); +} - if (current_round == 0) return; // No messages yet, wait for initial dispatch +var test_store_write_base_url: []const u8 = ""; +var test_store_write_api_token: ?[]const u8 = null; +var test_store_write_namespace: []const u8 = ""; +var test_store_write_key: []const u8 = ""; +var test_store_write_value_json: []const u8 = ""; - // 5. Check if current round is complete (all participants responded) - if (current_round_count < num_participants) { - // Round not complete, wait - return; - } +fn mockStoreWriter( + alloc: std.mem.Allocator, + base_url: []const u8, + api_token: ?[]const u8, + namespace: []const u8, + key: []const u8, + value_json: []const u8, +) !void { + _ = alloc; + test_store_write_base_url = base_url; + test_store_write_api_token = api_token; + test_store_write_namespace = namespace; + test_store_write_key = key; + test_store_write_value_json = value_json; +} - // 6. Check exit condition in latest round's messages - if (exit_condition) |cond| { - for (messages) |msg| { - if (msg.round == current_round) { - if (std.mem.indexOf(u8, msg.message, cond) != null) { - // Exit condition met — complete with transcript - const transcript = try buildChatTranscript(alloc, messages); - const output = try wrapOutput(alloc, transcript); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("group_chat step {s} completed (exit condition met at round {d})", .{ step.id, current_round }); - return; - } - } - } - } +/// Get worker tags from node definition. +fn getNodeTags(alloc: std.mem.Allocator, node_json: []const u8) []const []const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return &.{}; + if (parsed.value != .object) return &.{}; + const tags = parsed.value.object.get("worker_tags") orelse return &.{}; + if (tags != .array) return &.{}; - // 7. Check if max rounds reached - if (current_round >= max_rounds) { - const transcript = try buildChatTranscript(alloc, messages); - const output = try wrapOutput(alloc, transcript); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("group_chat step {s} completed (max rounds {d} reached)", .{ step.id, max_rounds }); - return; + var result: std.ArrayListUnmanaged([]const u8) = .empty; + for (tags.array.items) |item| { + if (item == .string) { + result.append(alloc, item.string) catch continue; } + } + return result.toOwnedSlice(alloc) catch &.{}; +} - // 8. Start next round — build chat history and dispatch - const next_round = current_round + 1; - const chat_history = try buildChatTranscript(alloc, messages); +// ── JSON / Serialization Helpers ──────────────────────────────────── - const round_template = try getStepField(alloc, run_row.workflow_json, step.def_step_id, "round_template") orelse { - // No round_template — complete with what we have - const output = try wrapOutput(alloc, chat_history); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - return; - }; +fn serializeJsonValue(alloc: std.mem.Allocator, value: json.Value) ![]const u8 { + var out: std.io.Writer.Allocating = .init(alloc); + var jw: json.Stringify = .{ .writer = &out.writer }; + try jw.write(value); + return try out.toOwnedSlice(); +} - // Dispatch to each participant with round_template - const participant_items = parsed_participants.value.array.items; - for (participant_items) |p_val| { - if (p_val != .object) continue; - const p_obj = p_val.object; - - const role = if (p_obj.get("role")) |r| blk: { - if (r == .string) break :blk r.string; - break :blk "participant"; - } else "participant"; - - // Render round_template with {{chat_history}} and {{role}} - var rendered = try std.mem.replaceOwned(u8, alloc, round_template, "{{chat_history}}", chat_history); - rendered = try std.mem.replaceOwned(u8, alloc, rendered, "{{role}}", role); - - // Get participant tags - const tags_val = p_obj.get("tags"); - var tag_list: std.ArrayListUnmanaged([]const u8) = .empty; - if (tags_val) |tv| { - if (tv == .array) { - for (tv.array.items) |tag_item| { - if (tag_item == .string) { - try tag_list.append(alloc, tag_item.string); - } - } - } - } +/// Wrap a raw output string as {"output": "..."} JSON. +fn wrapOutput(alloc: std.mem.Allocator, output: []const u8) ![]const u8 { + return json.Stringify.valueAlloc(alloc, .{ + .output = output, + }, .{}); +} - // Select worker and dispatch - const workers = try self.store.listWorkers(alloc); - var worker_infos: std.ArrayListUnmanaged(dispatch.WorkerInfo) = .empty; - for (workers) |w| { - const current_tasks = self.store.countRunningStepsByWorker(w.id) catch 0; - try worker_infos.append(alloc, .{ - .id = w.id, - .url = w.url, - .token = w.token, - .protocol = w.protocol, - .model = w.model, - .tags_json = w.tags_json, - .max_concurrent = w.max_concurrent, - .status = w.status, - .current_tasks = current_tasks, - }); - } +/// Escape a string as a JSON string literal (with quotes). +fn jsonStringify(alloc: std.mem.Allocator, s: []const u8) ![]const u8 { + return json.Stringify.valueAlloc(alloc, s, .{}); +} - const selected = try dispatch.selectWorker(alloc, worker_infos.items, tag_list.items); - if (selected) |worker| { - const result = try dispatch.dispatchStep( - alloc, - worker.url, - worker.token, - worker.protocol, - worker.model, - run_row.id, - step.id, - rendered, - ); - if (result.success) { - try self.store.insertChatMessage(run_row.id, step.id, next_round, role, worker.id, result.output); - } else { - log.warn("group_chat round {d} dispatch failed for role {s}", .{ next_round, role }); - } - } else { - log.debug("no worker for group_chat round {d} participant role {s}", .{ next_round, role }); - } - } +/// Resolve the state path used by a send node. `items_key` is the canonical +/// field; `items_from` is accepted as a compatibility alias. +fn getSendItemsPath(alloc: std.mem.Allocator, node_json: []const u8) ?[]const u8 { + return getNodeField(alloc, node_json, "items_key") orelse + getNodeField(alloc, node_json, "items_from"); +} - log.info("group_chat step {s} dispatched round {d}", .{ step.id, next_round }); +/// Build the state update payload for a task/agent node result. +/// +/// Precedence: +/// 1. explicit worker-provided `state_updates` +/// 2. node `output_key` / `output_mapping` +/// 3. legacy fallback to `{"output": "..."}` +fn buildTaskStateUpdates(alloc: std.mem.Allocator, node_json: []const u8, output: []const u8) ![]const u8 { + if (extractStateUpdates(alloc, output)) |updates| { + return updates; } - // ── executeSagaStep ───────────────────────────────────────────── - // - // First tick (step is "ready"): - // - Parse body array and compensations map from workflow definition - // - Create first body step as child (status="ready") - // - Initialize saga_state entries for all body steps - // - Mark saga step as "running" - - fn executeSagaStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - // 1. Parse body array from step definition - const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse { - log.warn("no body for saga step {s}", .{step.def_step_id}); - try self.store.updateStepStatus(step.id, "failed", null, null, "missing body in saga definition", step.attempt); - return; - }; - - const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch { - try self.store.updateStepStatus(step.id, "failed", null, null, "invalid body JSON in saga definition", step.attempt); - return; - }; - - if (body_parsed.value != .array or body_parsed.value.array.items.len == 0) { - try self.store.updateStepStatus(step.id, "failed", null, null, "body must be a non-empty array", step.attempt); - return; - } - - const body_items = body_parsed.value.array.items; + const output_key = getNodeField(alloc, node_json, "output_key"); + const output_mapping_json = getNodeObjectField(alloc, node_json, "output_mapping"); + if (output_key == null and output_mapping_json == null) { + return std.fmt.allocPrint(alloc, "{{\"output\":{s}}}", .{try jsonStringify(alloc, output)}); + } - // 2. Parse compensations map (optional) - const comp_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "compensations"); - var comp_map: ?std.json.ObjectMap = null; - if (comp_raw) |cr| { - const comp_parsed = std.json.parseFromSlice(std.json.Value, alloc, cr, .{}) catch null; - if (comp_parsed) |cp| { - if (cp.value == .object) { - comp_map = cp.value.object; - } - } - } + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); - // 3. Initialize saga_state for all body steps and create first child - for (body_items, 0..) |body_item, i| { - const body_def_id = switch (body_item) { - .string => |s| s, - else => continue, - }; + var result = json.ObjectMap.init(arena_alloc); + const parsed_output = json.parseFromSlice(json.Value, arena_alloc, output, .{}) catch null; - // Look up compensation for this body step - var comp_def_id: ?[]const u8 = null; - if (comp_map) |cm| { - if (cm.get(body_def_id)) |cv| { - if (cv == .string) { - comp_def_id = cv.string; + if (output_key) |key| { + if (parsed_output) |parsed| { + try result.put(key, parsed.value); + } else { + try result.put(key, .{ .string = output }); + } + } + + if (output_mapping_json) |mapping_json| { + const parsed_mapping = json.parseFromSlice(json.Value, arena_alloc, mapping_json, .{}) catch null; + if (parsed_mapping) |mapping| { + if (mapping.value == .object and parsed_output != null) { + var it = mapping.value.object.iterator(); + while (it.next()) |entry| { + if (entry.value_ptr.* != .string) continue; + const source_path = entry.value_ptr.string; + const raw_val = state_mod.getStateValue(arena_alloc, output, source_path) catch null; + if (raw_val) |value_json| { + const parsed_value = json.parseFromSlice(json.Value, arena_alloc, value_json, .{}) catch continue; + try result.put(entry.key_ptr.*, parsed_value.value); } } } - - // Insert saga_state entry - try self.store.insertSagaState(run_row.id, step.id, body_def_id, comp_def_id); - - // Create child step for first body step only (rest created sequentially) - if (i == 0) { - const body_step_type = try getStepField(alloc, run_row.workflow_json, body_def_id, "type") orelse "task"; - const child_id_buf = ids.generateId(); - const child_id = try alloc.dupe(u8, &child_id_buf); - - try self.store.insertStep( - child_id, - run_row.id, - body_def_id, - body_step_type, - "ready", - step.input_json, - step.max_attempts, - step.timeout_ms, - step.id, // parent_step_id - 0, // item_index - ); - log.info("saga step {s} created first body child {s} (def: {s})", .{ step.id, child_id, body_def_id }); - } } - - // 4. Mark saga step as "running" - try self.store.updateStepStatus(step.id, "running", null, null, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.running", "{}"); - log.info("saga step {s} started with {d} body steps", .{ step.id, body_items.len }); } - // ── pollRunningSagaStep ────────────────────────────────────────── - // - // Each tick: - // - Get saga_state entries to understand progress - // - Find current body step child and check its status - // - If completed: update saga_state, create next body step - // - If all body steps completed: mark saga completed - // - If body step failed: enter compensation mode - // - Track compensation progress - - fn pollRunningSagaStep(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow) !void { - const children = try self.store.getChildSteps(alloc, step.id); - if (children.len == 0) return; - - const saga_states = try self.store.getSagaStates(alloc, run_row.id, step.id); - if (saga_states.len == 0) return; - - // Parse body array to know the order - const body_raw = try getStepFieldRaw(alloc, run_row.workflow_json, step.def_step_id, "body") orelse return; - const body_parsed = std.json.parseFromSlice(std.json.Value, alloc, body_raw, .{}) catch return; - if (body_parsed.value != .array) return; - const body_items = body_parsed.value.array.items; - - // Build body def IDs list in order - var body_def_ids: std.ArrayListUnmanaged([]const u8) = .empty; - for (body_items) |bi| { - if (bi == .string) { - try body_def_ids.append(alloc, bi.string); - } - } - - // Check if we're in compensation mode (any saga_state has status "compensating") - var in_compensation = false; - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.status, "compensating")) { - in_compensation = true; - break; - } - } - - if (in_compensation) { - // In compensation mode: check if current compensation child is done - try self.pollSagaCompensation(alloc, run_row, step, children, saga_states, body_def_ids.items); - return; - } - - // Forward mode: check the current body step child - // Find which body step we're on by looking at saga_states - var current_body_idx: ?usize = null; - var failed_body_def_id: ?[]const u8 = null; - - for (saga_states, 0..) |ss, i| { - if (std.mem.eql(u8, ss.status, "pending")) { - // This is the next body step to process or the current one - // Check if there's a child for this body step - var has_child = false; - for (children) |child| { - if (std.mem.eql(u8, child.def_step_id, ss.body_step_id)) { - has_child = true; - if (std.mem.eql(u8, child.status, "completed")) { - // Body step completed — update saga_state - try self.store.updateSagaState(run_row.id, step.id, ss.body_step_id, "completed"); - log.info("saga body step {s} completed", .{ss.body_step_id}); - // Create next body step if there is one - if (i + 1 < saga_states.len) { - const next_def_id = saga_states[i + 1].body_step_id; - const next_type = try getStepField(alloc, run_row.workflow_json, next_def_id, "type") orelse "task"; - const next_id_buf = ids.generateId(); - const next_id = try alloc.dupe(u8, &next_id_buf); - const next_idx: i64 = @intCast(i + 1); - - try self.store.insertStep( - next_id, - run_row.id, - next_def_id, - next_type, - "ready", - step.input_json, - step.max_attempts, - step.timeout_ms, - step.id, - next_idx, - ); - log.info("saga step {s} created body child {s} (def: {s})", .{ step.id, next_id, next_def_id }); - } - // Don't process further this tick - return; - } else if (std.mem.eql(u8, child.status, "failed")) { - // Body step failed — enter compensation mode - failed_body_def_id = ss.body_step_id; - current_body_idx = i; - break; - } - // Still running/ready — wait - return; - } - } - if (!has_child) { - // First pending step without a child — this shouldn't happen normally - // since executeSagaStep creates the first and we create subsequent ones - return; - } - break; - } - } - - // Check if ALL body steps are completed - var all_completed = true; - for (saga_states) |ss| { - if (!std.mem.eql(u8, ss.status, "completed")) { - all_completed = false; - break; - } - } - - if (all_completed) { - // Saga completed successfully — output is last body step's output - var last_output: ?[]const u8 = null; - for (children) |child| { - if (std.mem.eql(u8, child.status, "completed") and child.output_json != null) { - // Check if this child is the last body step - if (body_def_ids.items.len > 0 and - std.mem.eql(u8, child.def_step_id, body_def_ids.items[body_def_ids.items.len - 1])) - { - last_output = child.output_json; - } - } - } - const output = last_output orelse try wrapOutput(alloc, "saga completed"); - try self.store.updateStepStatus(step.id, "completed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.completed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.completed", run_row.id, step.id, output, self.metrics); - log.info("saga step {s} completed successfully", .{step.id}); - return; - } - - // Check if compensation has fully completed (all compensating states - // have become "compensated" and at least one is "failed") - { - var has_failed_state = false; - var has_unfinished_compensation = false; - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.status, "failed")) { - has_failed_state = true; - } else if (std.mem.eql(u8, ss.status, "compensating")) { - has_unfinished_compensation = true; - } - } - if (has_failed_state and !has_unfinished_compensation) { - try self.finishSagaCompensation(alloc, run_row, step, saga_states); - return; - } - } - - // If a body step failed, start compensation - if (failed_body_def_id) |failed_def| { - log.info("saga step {s} body step {s} failed, starting compensation", .{ step.id, failed_def }); - - // Mark the failed body step in saga_state - try self.store.updateSagaState(run_row.id, step.id, failed_def, "failed"); - - // Find completed body steps and start compensating in reverse - // Mark all completed body steps as "compensating" - var completed_steps: std.ArrayListUnmanaged([]const u8) = .empty; - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.status, "completed")) { - try completed_steps.append(alloc, ss.body_step_id); - try self.store.updateSagaState(run_row.id, step.id, ss.body_step_id, "compensating"); - } - } - - if (completed_steps.items.len == 0) { - // No completed steps to compensate — saga fails immediately - const output = try std.fmt.allocPrint(alloc, "{{\"failed_at\":\"{s}\",\"compensated\":[]}}", .{failed_def}); - try self.store.updateStepStatus(step.id, "failed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.info("saga step {s} failed at {s}, no compensations needed", .{ step.id, failed_def }); - return; - } - - // Create the last completed step's compensation child (reverse order) - // Start from the last completed body step - const last_completed = completed_steps.items[completed_steps.items.len - 1]; - try self.createCompensationChild(alloc, run_row, step, saga_states, last_completed); - } - } - - /// Create a compensation child step for a given body step. - fn createCompensationChild(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, saga_step: types.StepRow, saga_states: []const types.SagaStateRow, body_def_id: []const u8) !void { - // Find the compensation def_id for this body step - var comp_def_id: ?[]const u8 = null; - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.body_step_id, body_def_id)) { - comp_def_id = ss.compensation_step_id; - break; - } - } - - if (comp_def_id == null) { - // No compensation for this step — mark as compensated immediately - try self.store.updateSagaState(run_row.id, saga_step.id, body_def_id, "compensated"); - log.info("saga body step {s} has no compensation, marking compensated", .{body_def_id}); - return; - } - - const comp_type = try getStepField(alloc, run_row.workflow_json, comp_def_id.?, "type") orelse "task"; - const comp_child_id_buf = ids.generateId(); - const comp_child_id = try alloc.dupe(u8, &comp_child_id_buf); - - try self.store.insertStep( - comp_child_id, - run_row.id, - comp_def_id.?, - comp_type, - "ready", - "{}", - 1, // max_attempts - null, // timeout_ms - saga_step.id, // parent_step_id - null, // item_index - ); - log.info("saga step {s} created compensation child {s} for body {s}", .{ saga_step.id, comp_child_id, body_def_id }); - } - - /// Poll compensation progress in a saga step. - fn pollSagaCompensation(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, children: []const types.StepRow, saga_states: []const types.SagaStateRow, body_def_ids: []const []const u8) !void { - // Find the body step currently being compensated (has a running/ready compensation child) - // Work backwards through body_def_ids to find the current compensating step - var compensating_body: ?[]const u8 = null; - var compensating_idx: ?usize = null; - - // Find compensating steps in reverse order (last completed first) - var i: usize = body_def_ids.len; - while (i > 0) { - i -= 1; - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.body_step_id, body_def_ids[i]) and - std.mem.eql(u8, ss.status, "compensating")) - { - compensating_body = body_def_ids[i]; - compensating_idx = i; - break; - } - } - if (compensating_body != null) break; - } - - if (compensating_body == null) { - // All compensations done — build failure output and fail saga - try self.finishSagaCompensation(alloc, run_row, step, saga_states); - return; - } - - // Check if there's a compensation child for this body step - var comp_def_id: ?[]const u8 = null; - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.body_step_id, compensating_body.?)) { - comp_def_id = ss.compensation_step_id; - break; - } - } - - if (comp_def_id == null) { - // No compensation defined — mark as compensated and move on - try self.store.updateSagaState(run_row.id, step.id, compensating_body.?, "compensated"); - return; - } - - // Find the compensation child step - var comp_child: ?types.StepRow = null; - for (children) |child| { - if (std.mem.eql(u8, child.def_step_id, comp_def_id.?)) { - comp_child = child; - } - } - - if (comp_child == null) { - // Compensation child not created yet — create it - try self.createCompensationChild(alloc, run_row, step, saga_states, compensating_body.?); - return; - } - - const comp = comp_child.?; - if (std.mem.eql(u8, comp.status, "completed")) { - // Compensation completed — mark this body step as compensated - try self.store.updateSagaState(run_row.id, step.id, compensating_body.?, "compensated"); - log.info("saga compensation for body step {s} completed", .{compensating_body.?}); - - // Find next compensating step (earlier in the list) - if (compensating_idx.? > 0) { - var next_idx: ?usize = null; - var j: usize = compensating_idx.?; - while (j > 0) { - j -= 1; - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.body_step_id, body_def_ids[j]) and - std.mem.eql(u8, ss.status, "compensating")) - { - next_idx = j; - break; - } - } - if (next_idx != null) break; - } - - // Check if any compensating steps remain. We may have already - // updated some to compensated in previous iterations, so re-check. - // The next tick will pick them up via pollSagaCompensation. - } - } else if (std.mem.eql(u8, comp.status, "failed")) { - // Compensation itself failed — saga fails with compensation error - const err_msg = try std.fmt.allocPrint(alloc, "compensation step {s} failed", .{comp_def_id.?}); - try self.store.updateStepStatus(step.id, "failed", null, null, err_msg, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - log.info("saga step {s} failed during compensation", .{step.id}); - } - // Otherwise compensation child still running/ready — wait - } - - /// Finish saga compensation and mark saga as failed with output. - fn finishSagaCompensation(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, saga_states: []const types.SagaStateRow) !void { - // Build list of compensated steps and find failed_at step - var failed_at: []const u8 = "unknown"; - var compensated: std.ArrayListUnmanaged([]const u8) = .empty; - - for (saga_states) |ss| { - if (std.mem.eql(u8, ss.status, "failed")) { - failed_at = ss.body_step_id; - } else if (std.mem.eql(u8, ss.status, "compensated")) { - try compensated.append(alloc, ss.body_step_id); - } - } - - // Build output JSON - var comp_json: std.ArrayListUnmanaged(u8) = .empty; - try comp_json.append(alloc, '['); - for (compensated.items, 0..) |c, ci| { - if (ci > 0) try comp_json.append(alloc, ','); - try comp_json.append(alloc, '"'); - try comp_json.appendSlice(alloc, c); - try comp_json.append(alloc, '"'); - } - try comp_json.append(alloc, ']'); - const comp_str = try comp_json.toOwnedSlice(alloc); - - const output = try std.fmt.allocPrint(alloc, "{{\"failed_at\":\"{s}\",\"compensated\":{s}}}", .{ failed_at, comp_str }); - - try self.store.updateStepStatus(step.id, "failed", null, output, null, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", output); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, output, self.metrics); - log.info("saga step {s} failed at {s}, compensated {d} steps", .{ step.id, failed_at, compensated.items.len }); - } - - // ── handleCycleBack ───────────────────────────────────────────── - // - // When a condition/router routes to an already-completed step, - // detect the cycle and create new step instances for the cycle body. - - fn handleCycleBack(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, routing_step: types.StepRow, target_def_id: []const u8, all_steps: []const types.StepRow) !bool { - // 1. Check if target step is already completed/skipped - var target_completed = false; - for (all_steps) |s| { - if (std.mem.eql(u8, s.def_step_id, target_def_id) and - (std.mem.eql(u8, s.status, "completed") or std.mem.eql(u8, s.status, "skipped"))) - { - target_completed = true; - break; - } - } - - if (!target_completed) return false; // Not a backward edge - - // 2. Build cycle_key from routing step's def_step_id - const cycle_key = try std.fmt.allocPrint(alloc, "cycle_{s}", .{routing_step.def_step_id}); - - // 3. Get or initialize cycle state - const cycle_state = try self.store.getCycleState(run_row.id, cycle_key); - var iteration_count: i64 = 0; - var max_iterations: i64 = 10; - - if (cycle_state) |cs| { - iteration_count = cs.iteration_count; - max_iterations = cs.max_iterations; - } - - // Check max_cycle_iterations from workflow config - const wf_max = try getStepFieldInt(alloc, run_row.workflow_json, routing_step.def_step_id, "max_cycle_iterations"); - if (wf_max) |m| { - max_iterations = m; - } - - // 4. Check if limit exceeded - if (iteration_count >= max_iterations) { - const err_msg = try std.fmt.allocPrint(alloc, "cycle iteration limit ({d}) exceeded for {s}", .{ max_iterations, cycle_key }); - try self.store.updateStepStatus(routing_step.id, "failed", null, null, err_msg, routing_step.attempt); - try self.store.insertEvent(run_row.id, routing_step.id, "step.failed", "{}"); - try self.store.updateRunStatus(run_row.id, "failed", err_msg); - log.warn("cycle limit exceeded for {s}", .{cycle_key}); - return true; - } - - // 5. Increment cycle iteration - iteration_count += 1; - try self.store.upsertCycleState(run_row.id, cycle_key, iteration_count, max_iterations); - - // 6. Walk workflow_json steps to find the cycle body - // (from target_def_id through routing step's def_step_id) - const parsed = std.json.parseFromSlice(std.json.Value, alloc, run_row.workflow_json, .{}) catch return false; - if (parsed.value != .object) return false; - const steps_val = parsed.value.object.get("steps") orelse return false; - if (steps_val != .array) return false; - - // Build ordered list of step def IDs and their types + depends_on - const StepInfo = struct { - def_id: []const u8, - step_type: []const u8, - depends_on: []const []const u8, - }; - - var step_infos: std.ArrayListUnmanaged(StepInfo) = .empty; - for (steps_val.array.items) |step_val| { - if (step_val != .object) continue; - const step_obj = step_val.object; - const id_val = step_obj.get("id") orelse continue; - if (id_val != .string) continue; - - const stype = if (step_obj.get("type")) |t| blk: { - if (t == .string) break :blk t.string; - break :blk "task"; - } else "task"; - - var deps_list: std.ArrayListUnmanaged([]const u8) = .empty; - if (step_obj.get("depends_on")) |deps_val| { - if (deps_val == .array) { - for (deps_val.array.items) |dep_item| { - if (dep_item == .string) { - try deps_list.append(alloc, dep_item.string); - } - } - } - } - - try step_infos.append(alloc, .{ - .def_id = id_val.string, - .step_type = stype, - .depends_on = try deps_list.toOwnedSlice(alloc), - }); - } - - // Find indices of target and routing step in the workflow - var target_idx: ?usize = null; - var routing_idx: ?usize = null; - for (step_infos.items, 0..) |si, idx| { - if (std.mem.eql(u8, si.def_id, target_def_id)) target_idx = idx; - if (std.mem.eql(u8, si.def_id, routing_step.def_step_id)) routing_idx = idx; - } - - if (target_idx == null or routing_idx == null) return false; - if (target_idx.? >= routing_idx.?) return false; // Not a backward edge - - // 7. Create new step instances for target through routing step - var new_step_ids: std.ArrayListUnmanaged([]const u8) = .empty; - var new_def_ids: std.ArrayListUnmanaged([]const u8) = .empty; - - var idx: usize = target_idx.?; - while (idx <= routing_idx.?) : (idx += 1) { - const si = step_infos.items[idx]; - const new_id_buf = ids.generateId(); - const new_id = try alloc.dupe(u8, &new_id_buf); - - // First step in cycle is "ready", rest are "pending" - const initial_status: []const u8 = if (idx == target_idx.?) "ready" else "pending"; - - try self.store.insertStepWithIteration( - new_id, - run_row.id, - si.def_id, - si.step_type, - initial_status, - "{}", - 1, - null, - null, - null, - iteration_count, - ); - - try new_step_ids.append(alloc, new_id); - try new_def_ids.append(alloc, si.def_id); - } - - // 8. Chain new instances with deps among themselves - for (step_infos.items[target_idx.? .. routing_idx.? + 1], 0..) |si, si_idx| { - const new_id = new_step_ids.items[si_idx]; - for (si.depends_on) |dep_def_id| { - // Check if dep is within the cycle body - const dep_new_id = lookupId(new_def_ids.items, new_step_ids.items, dep_def_id); - if (dep_new_id) |did| { - try self.store.insertStepDep(new_id, did); - } - } - } - - // 9. For any step outside the cycle that depended on the routing step, - // add a dep to the new routing step instance - const new_routing_id = new_step_ids.items[new_step_ids.items.len - 1]; - for (all_steps) |s| { - // Skip steps inside the cycle body - var in_cycle = false; - for (new_def_ids.items) |cd| { - if (std.mem.eql(u8, s.def_step_id, cd)) { - in_cycle = true; - break; - } - } - if (in_cycle) continue; - - // Check if this step depends on the old routing step - const deps = try self.store.getStepDeps(alloc, s.id); - for (deps) |dep_id| { - if (std.mem.eql(u8, dep_id, routing_step.id)) { - // Add new dep to the new routing step instance - try self.store.insertStepDep(s.id, new_routing_id); - break; - } - } - } - - // 10. Mark the routing step as completed (the current instance) - const output = try std.fmt.allocPrint(alloc, "{{\"output\":\"cycle_back\",\"target\":\"{s}\",\"iteration\":{d}}}", .{ target_def_id, iteration_count }); - try self.store.updateStepStatus(routing_step.id, "completed", null, output, null, routing_step.attempt); - try self.store.insertEvent(run_row.id, routing_step.id, "step.completed", output); - log.info("cycle back from {s} to {s} (iteration {d})", .{ routing_step.def_step_id, target_def_id, iteration_count }); - - return true; - } - - // ── checkRunCompletion ─────────────────────────────────────────── - - fn checkRunCompletion(self: *Engine, run_id: []const u8, alloc: std.mem.Allocator) !void { - const steps = try self.store.getStepsByRun(alloc, run_id); - var all_terminal = true; - var any_failed = false; - for (steps) |step| { - if (std.mem.eql(u8, step.status, "completed") or std.mem.eql(u8, step.status, "skipped")) continue; - if (std.mem.eql(u8, step.status, "failed")) { - any_failed = true; - continue; - } - if (std.mem.eql(u8, step.status, "waiting_approval")) { - all_terminal = false; - continue; - } - all_terminal = false; // pending, ready, running - } - if (all_terminal and !any_failed) { - try self.store.updateRunStatus(run_id, "completed", null); - try self.store.insertEvent(run_id, null, "run.completed", "{}"); - // Fire run.completed callbacks - if (try self.store.getRun(alloc, run_id)) |run_row| { - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.completed", run_id, null, "{}", self.metrics); - } - log.info("run {s} completed", .{run_id}); - } else if (all_terminal and any_failed) { - try self.store.updateRunStatus(run_id, "failed", "one or more steps failed"); - try self.store.insertEvent(run_id, null, "run.failed", "{}"); - // Fire run.failed callbacks - if (try self.store.getRun(alloc, run_id)) |run_row| { - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "run.failed", run_id, null, "{}", self.metrics); - } - log.info("run {s} failed", .{run_id}); - } - } - - // ── Helpers ────────────────────────────────────────────────────── - - fn skipStepByDefId(self: *Engine, alloc: std.mem.Allocator, all_steps: []const types.StepRow, run_id: []const u8, target_def_id: []const u8) !void { - for (all_steps) |s| { - if (std.mem.eql(u8, s.def_step_id, target_def_id)) { - try self.store.updateStepStatus(s.id, "skipped", null, null, null, s.attempt); - try self.store.insertEvent(run_id, s.id, "step.skipped", "{}"); - log.info("skipped step {s} (def: {s})", .{ s.id, target_def_id }); - break; - } - } - _ = alloc; - } - - fn failStepWithError(self: *Engine, alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, err_text: []const u8) !void { - try self.store.updateStepStatus(step.id, "failed", null, null, err_text, step.attempt); - try self.store.insertEvent(run_row.id, step.id, "step.failed", "{}"); - callbacks.fireCallbacks(alloc, run_row.callbacks_json, "step.failed", run_row.id, step.id, "{}", self.metrics); - } -}; - -fn computeRetryDelayMs(cfg: RuntimeConfig, step: types.StepRow, now_ms: i64) i64 { - var delay = cfg.retry_base_delay_ms; - var remaining_exp = step.attempt - 1; - while (remaining_exp > 0) : (remaining_exp -= 1) { - if (delay >= cfg.retry_max_delay_ms) break; - const doubled = delay * 2; - delay = if (doubled > cfg.retry_max_delay_ms) cfg.retry_max_delay_ms else doubled; - } - - const jitter_cap = if (cfg.retry_jitter_ms > 0) cfg.retry_jitter_ms else 0; - var jitter: i64 = 0; - if (jitter_cap > 0) { - const seed = std.hash.Wyhash.hash(0, step.id); - const mixed = seed ^ @as(u64, @intCast(now_ms)); - jitter = @as(i64, @intCast(mixed % @as(u64, @intCast(jitter_cap + 1)))); - } - return delay + jitter; -} - -// ── Free functions (workflow JSON helpers) ──────────────────────────── - -/// Parse workflow_json to find a step definition by def_step_id and return a string field. -fn getStepField(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8, field: []const u8) !?[]const u8 { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch { - return null; - }; - // Note: do not deinit here — the alloc is an arena - - const root = parsed.value; - if (root != .object) return null; - - const steps_val = root.object.get("steps") orelse return null; - if (steps_val != .array) return null; - - for (steps_val.array.items) |step_val| { - if (step_val != .object) continue; - const step_obj = step_val.object; - - const id_val = step_obj.get("id") orelse continue; - if (id_val != .string) continue; - if (!std.mem.eql(u8, id_val.string, def_step_id)) continue; - - const field_val = step_obj.get(field) orelse return null; - if (field_val == .string) { - return try alloc.dupe(u8, field_val.string); - } - return null; - } - return null; -} - -/// Parse workflow_json to find a step definition by def_step_id and return a field as raw JSON. -/// Unlike getStepField which only returns strings, this serializes any JSON value type. -fn getStepFieldRaw(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8, field: []const u8) !?[]const u8 { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch { - return null; - }; - - const root = parsed.value; - if (root != .object) return null; - - const steps_val = root.object.get("steps") orelse return null; - if (steps_val != .array) return null; - - for (steps_val.array.items) |step_val| { - if (step_val != .object) continue; - const step_obj = step_val.object; - - const id_val = step_obj.get("id") orelse continue; - if (id_val != .string) continue; - if (!std.mem.eql(u8, id_val.string, def_step_id)) continue; - - const field_val = step_obj.get(field) orelse return null; - if (field_val == .string) { - return try alloc.dupe(u8, field_val.string); - } - // Serialize non-string values as JSON - var out: std.io.Writer.Allocating = .init(alloc); - var jw: std.json.Stringify = .{ .writer = &out.writer }; - jw.write(field_val) catch return null; - return out.toOwnedSlice() catch return null; - } - return null; -} - -/// Parse workflow_json to find a step definition by def_step_id and return an integer field. -fn getStepFieldInt(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8, field: []const u8) !?i64 { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch { - return null; - }; - - const root = parsed.value; - if (root != .object) return null; - - const steps_val = root.object.get("steps") orelse return null; - if (steps_val != .array) return null; - - for (steps_val.array.items) |step_val| { - if (step_val != .object) continue; - const step_obj = step_val.object; - - const id_val = step_obj.get("id") orelse continue; - if (id_val != .string) continue; - if (!std.mem.eql(u8, id_val.string, def_step_id)) continue; - - const field_val = step_obj.get(field) orelse return null; - if (field_val == .integer) return field_val.integer; - return null; - } - return null; -} - -/// Parse workflow_json to find a step definition and get its worker_tags. -fn getStepTags(alloc: std.mem.Allocator, workflow_json: []const u8, def_step_id: []const u8) ![]const []const u8 { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, workflow_json, .{}) catch { - return &.{}; - }; - - const root = parsed.value; - if (root != .object) return &.{}; - - const steps_val = root.object.get("steps") orelse return &.{}; - if (steps_val != .array) return &.{}; - - for (steps_val.array.items) |step_val| { - if (step_val != .object) continue; - const step_obj = step_val.object; - - const id_val = step_obj.get("id") orelse continue; - if (id_val != .string) continue; - if (!std.mem.eql(u8, id_val.string, def_step_id)) continue; - - const tags_val = step_obj.get("worker_tags") orelse return &.{}; - if (tags_val != .array) return &.{}; - - var tags: std.ArrayListUnmanaged([]const u8) = .empty; - for (tags_val.array.items) |tag_item| { - if (tag_item == .string) { - try tags.append(alloc, try alloc.dupe(u8, tag_item.string)); - } - } - return tags.toOwnedSlice(alloc); - } - return &.{}; -} - -/// Build a template Context from a run's input and completed step outputs. -fn buildTemplateContext(alloc: std.mem.Allocator, run_row: types.RunRow, step: types.StepRow, store: *Store) !templates.Context { - // Get all steps for this run to collect outputs - const all_steps = try store.getStepsByRun(alloc, run_row.id); - - var step_outputs: std.ArrayListUnmanaged(templates.Context.StepOutput) = .empty; - for (all_steps) |s| { - if (std.mem.eql(u8, s.status, "completed")) { - // Check if this step has children (fan_out/map) - if (std.mem.eql(u8, s.type, "fan_out") or std.mem.eql(u8, s.type, "map")) { - // Collect child outputs - const children = try store.getChildSteps(alloc, s.id); - var child_outputs: std.ArrayListUnmanaged([]const u8) = .empty; - for (children) |child| { - if (child.output_json) |oj| { - const extracted = extractOutputField(alloc, oj) catch oj; - try child_outputs.append(alloc, extracted); - } - } - try step_outputs.append(alloc, .{ - .step_id = s.def_step_id, - .output = null, - .outputs = child_outputs.items, - }); - } else { - // Regular step — single output - const output = if (s.output_json) |oj| - (extractOutputField(alloc, oj) catch oj) - else - null; - try step_outputs.append(alloc, .{ - .step_id = s.def_step_id, - .output = output, - .outputs = null, - }); - } - } - } - - // Determine item context (for map child steps) - const item: ?[]const u8 = if (step.parent_step_id != null) blk: { - // This is a child step of a map/fan_out — extract item from input_json - break :blk extractItemFromInput(alloc, step.input_json) catch null; - } else null; - - return templates.Context{ - .input_json = run_row.input_json, - .step_outputs = step_outputs.items, - .item = item, - }; -} - -/// Look up a generated ID by definition ID from parallel arrays. -fn lookupId(def_ids: []const []const u8, gen_ids: []const []const u8, target: []const u8) ?[]const u8 { - for (def_ids, 0..) |did, i| { - if (std.mem.eql(u8, did, target)) return gen_ids[i]; - } - return null; -} - -/// Find a step's status by ID from a list of steps. -fn findStepStatus(steps: []const types.StepRow, step_id: []const u8) ?[]const u8 { - for (steps) |s| { - if (std.mem.eql(u8, s.id, step_id)) return s.status; - } - return null; -} - -/// Find a step's def_step_id by step ID from a list of steps. -fn findStepDefId(steps: []const types.StepRow, step_id: []const u8) ?[]const u8 { - for (steps) |s| { - if (std.mem.eql(u8, s.id, step_id)) return s.def_step_id; - } - return null; -} - -/// Find a step's output_json by step ID from a list of steps. -fn findStepOutput(steps: []const types.StepRow, step_id: []const u8) ?[]const u8 { - for (steps) |s| { - if (std.mem.eql(u8, s.id, step_id)) { - if (s.output_json) |oj| { - return oj; - } - return null; - } - } - return null; -} - -/// Wrap a raw output string in a JSON object: {"output": "..."} -fn wrapOutput(alloc: std.mem.Allocator, output: []const u8) ![]const u8 { - // Use JSON serializer for proper escaping - var out: std.ArrayListUnmanaged(u8) = .empty; - try out.appendSlice(alloc, "{\"output\":"); - - // JSON-encode the output string - try out.append(alloc, '"'); - for (output) |ch| { - switch (ch) { - '"' => try out.appendSlice(alloc, "\\\""), - '\\' => try out.appendSlice(alloc, "\\\\"), - '\n' => try out.appendSlice(alloc, "\\n"), - '\r' => try out.appendSlice(alloc, "\\r"), - '\t' => try out.appendSlice(alloc, "\\t"), - else => try out.append(alloc, ch), - } - } - try out.append(alloc, '"'); - try out.append(alloc, '}'); - return try out.toOwnedSlice(alloc); -} - -/// Wrap an item value in a JSON object: {"item": "..."} -fn wrapItemJson(alloc: std.mem.Allocator, item: []const u8) ![]const u8 { - var out: std.ArrayListUnmanaged(u8) = .empty; - try out.appendSlice(alloc, "{\"item\":"); - - try out.append(alloc, '"'); - for (item) |ch| { - switch (ch) { - '"' => try out.appendSlice(alloc, "\\\""), - '\\' => try out.appendSlice(alloc, "\\\\"), - '\n' => try out.appendSlice(alloc, "\\n"), - '\r' => try out.appendSlice(alloc, "\\r"), - '\t' => try out.appendSlice(alloc, "\\t"), - else => try out.append(alloc, ch), - } - } - try out.append(alloc, '"'); - try out.append(alloc, '}'); - return try out.toOwnedSlice(alloc); -} - -/// Extract the "output" field from a JSON string like {"output": "..."}. -fn extractOutputField(alloc: std.mem.Allocator, json_str: []const u8) ![]const u8 { - const parsed = try std.json.parseFromSlice(std.json.Value, alloc, json_str, .{}); - const root = parsed.value; - if (root != .object) return json_str; - const output_val = root.object.get("output") orelse return json_str; - if (output_val == .string) return try alloc.dupe(u8, output_val.string); - return json_str; -} - -/// Extract an array of strings from a JSON field. -fn extractJsonArray(alloc: std.mem.Allocator, json_str: []const u8, field_name: []const u8) !?[][]const u8 { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, json_str, .{}) catch { - return null; - }; - const root = parsed.value; - if (root != .object) return null; - - const arr_val = root.object.get(field_name) orelse return null; - if (arr_val != .array) return null; - - var items: std.ArrayListUnmanaged([]const u8) = .empty; - for (arr_val.array.items) |item| { - switch (item) { - .string => |s| try items.append(alloc, try alloc.dupe(u8, s)), - else => { - // Serialize non-string values as JSON - var json_out: std.io.Writer.Allocating = .init(alloc); - var jw: std.json.Stringify = .{ .writer = &json_out.writer }; - jw.write(item) catch continue; - const slice = json_out.toOwnedSlice() catch continue; - try items.append(alloc, slice); - }, - } - } - const result = try items.toOwnedSlice(alloc); - return result; -} - -/// Serialize an array of strings to a JSON array string. -fn serializeStringArray(alloc: std.mem.Allocator, items: []const []const u8) ![]const u8 { - var buf: std.ArrayListUnmanaged(u8) = .empty; - try buf.append(alloc, '['); - for (items, 0..) |item, i| { - if (i > 0) try buf.append(alloc, ','); - try buf.append(alloc, '"'); - for (item) |ch| { - switch (ch) { - '"' => try buf.appendSlice(alloc, "\\\""), - '\\' => try buf.appendSlice(alloc, "\\\\"), - '\n' => try buf.appendSlice(alloc, "\\n"), - '\r' => try buf.appendSlice(alloc, "\\r"), - '\t' => try buf.appendSlice(alloc, "\\t"), - else => try buf.append(alloc, ch), - } - } - try buf.append(alloc, '"'); - } - try buf.append(alloc, ']'); - return try buf.toOwnedSlice(alloc); -} - -/// Parsed handoff target information. -const HandoffTarget = struct { - tags: []const []const u8, - tags_str: []const u8, - message: ?[]const u8, -}; - -/// Extract handoff_to target from a worker output string. -/// Worker output may be raw text or JSON like: {"output": "...", "handoff_to": {"tags": [...], "message": "..."}} -fn extractHandoffTarget(alloc: std.mem.Allocator, output: []const u8) ?HandoffTarget { - // Try to parse the output as JSON - const parsed = std.json.parseFromSlice(std.json.Value, alloc, output, .{}) catch return null; - const root = parsed.value; - if (root != .object) return null; - - const handoff_val = root.object.get("handoff_to") orelse return null; - if (handoff_val != .object) return null; - - // Extract tags - const tags_val = handoff_val.object.get("tags") orelse return null; - if (tags_val != .array) return null; - - var tag_list: std.ArrayListUnmanaged([]const u8) = .empty; - var tags_str_buf: std.ArrayListUnmanaged(u8) = .empty; - - for (tags_val.array.items, 0..) |tag_item, i| { - if (tag_item == .string) { - tag_list.append(alloc, alloc.dupe(u8, tag_item.string) catch return null) catch return null; - if (i > 0) tags_str_buf.append(alloc, ',') catch return null; - tags_str_buf.appendSlice(alloc, tag_item.string) catch return null; - } - } - - if (tag_list.items.len == 0) return null; - - // Extract message (optional) - var message: ?[]const u8 = null; - if (handoff_val.object.get("message")) |msg_val| { - if (msg_val == .string) { - message = alloc.dupe(u8, msg_val.string) catch null; - } - } - - return HandoffTarget{ - .tags = tag_list.toOwnedSlice(alloc) catch return null, - .tags_str = tags_str_buf.toOwnedSlice(alloc) catch return null, - .message = message, - }; -} - -/// Build a formatted chat transcript from chat messages. -fn buildChatTranscript(alloc: std.mem.Allocator, messages: []const types.ChatMessageRow) ![]const u8 { - var buf: std.ArrayListUnmanaged(u8) = .empty; - for (messages, 0..) |msg, i| { - if (i > 0) try buf.appendSlice(alloc, "\\n"); - const line = try std.fmt.allocPrint(alloc, "[Round {d}] {s}: {s}", .{ msg.round, msg.role, msg.message }); - try buf.appendSlice(alloc, line); - } - return try buf.toOwnedSlice(alloc); -} - -/// Build input_json payload that carries an already rendered prompt for child task steps. -fn buildRenderedPromptInputJson(alloc: std.mem.Allocator, rendered_prompt: []const u8) ![]const u8 { - return std.json.Stringify.valueAlloc(alloc, .{ - .rendered_prompt = rendered_prompt, - }, .{}); -} - -/// Extract optional input_json.rendered_prompt for dynamic child task execution. -fn extractRenderedPromptFromInput(alloc: std.mem.Allocator, input_json: []const u8) ?[]const u8 { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, input_json, .{}) catch { - return null; - }; - const root = parsed.value; - if (root != .object) return null; - const rendered = root.object.get("rendered_prompt") orelse return null; - if (rendered != .string) return null; - return alloc.dupe(u8, rendered.string) catch null; -} - -/// Extract the "item" field from input_json, or return the whole input_json -/// as item text if it's a simple value. -fn extractItemFromInput(alloc: std.mem.Allocator, input_json: []const u8) ![]const u8 { - const parsed = std.json.parseFromSlice(std.json.Value, alloc, input_json, .{}) catch { - return input_json; - }; - const root = parsed.value; - if (root != .object) return input_json; - const item_val = root.object.get("item") orelse return input_json; - if (item_val == .string) return try alloc.dupe(u8, item_val.string); - return input_json; -} - -// ── Tests ───────────────────────────────────────────────────────────── - -test "Engine: init and stop" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - var engine = Engine.init(&store, allocator, 500); - try std.testing.expect(engine.running.load(.acquire)); - engine.stop(); - try std.testing.expect(!engine.running.load(.acquire)); -} - -test "Engine: tick with no active runs" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - var engine = Engine.init(&store, allocator, 500); - // Should not error — no active runs - try engine.tick(); -} - -test "Engine: checkRunCompletion marks run completed" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - // Insert a run - try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]"); - - // Insert a completed step - try store.insertStep("s1", "r1", "step1", "task", "completed", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - try engine.checkRunCompletion("r1", arena.allocator()); - - // Verify run status is "completed" - const run = (try store.getRun(arena.allocator(), "r1")).?; - try std.testing.expectEqualStrings("completed", run.status); -} - -test "Engine: checkRunCompletion marks run failed" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]"); - try store.insertStep("s1", "r1", "step1", "task", "completed", "{}", 1, null, null, null); - try store.insertStep("s2", "r1", "step2", "task", "failed", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - try engine.checkRunCompletion("r1", arena.allocator()); - - const run = (try store.getRun(arena.allocator(), "r1")).?; - try std.testing.expectEqualStrings("failed", run.status); -} - -test "Engine: checkRunCompletion does not complete with pending steps" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - try store.insertRun("r1", null, "running", "{\"steps\":[]}", "{}", "[]"); - try store.insertStep("s1", "r1", "step1", "task", "completed", "{}", 1, null, null, null); - try store.insertStep("s2", "r1", "step2", "task", "pending", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - try engine.checkRunCompletion("r1", arena.allocator()); - - // Run should still be "running" - const run = (try store.getRun(arena.allocator(), "r1")).?; - try std.testing.expectEqualStrings("running", run.status); -} - -test "Engine: pending to ready promotion" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"s1","type":"task","prompt_template":"hello"},{"id":"s2","type":"task","prompt_template":"world","depends_on":["s1"]}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - - // s1 is completed, s2 is pending and depends on s1 - try store.insertStep("step1", "r1", "s1", "task", "completed", "{}", 1, null, null, null); - try store.insertStep("step2", "r1", "s2", "task", "pending", "{}", 1, null, null, null); - try store.insertStepDep("step2", "step1"); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - // Get run row - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - - // processRun should promote step2 from pending to ready - try engine.processRun(arena.allocator(), run_row); - - // Re-fetch step2 - const step2 = (try store.getStep(arena.allocator(), "step2")).?; - // It should be promoted to "ready" (not "pending") - // Note: since there are no workers, the task step won't actually execute, - // so it stays at "ready" - try std.testing.expectEqualStrings("ready", step2.status); -} - -test "Engine: approval step sets waiting_approval" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"approve1","type":"approval"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step1", "r1", "approve1", "approval", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - const step = (try store.getStep(arena.allocator(), "step1")).?; - try std.testing.expectEqualStrings("waiting_approval", step.status); -} - -test "Engine: fan_out creates child steps" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"fan1","type":"fan_out","count":3}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step1", "r1", "fan1", "fan_out", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - // fan_out step should be completed - const step = (try store.getStep(arena.allocator(), "step1")).?; - try std.testing.expectEqualStrings("completed", step.status); - - // Should have created 3 child steps - const children = try store.getChildSteps(arena.allocator(), "step1"); - try std.testing.expectEqual(@as(usize, 3), children.len); - - // Each child should be "ready" and type "task" - for (children) |child| { - try std.testing.expectEqualStrings("ready", child.status); - try std.testing.expectEqualStrings("task", child.type); - } -} - -test "Engine: map creates child steps from input array" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"map1","type":"map","items_from":"$.topics"}]} - ; - const input = - \\{"topics":["AI","ML","DL"]} - ; - try store.insertRun("r1", null, "running", wf, input, "[]"); - try store.insertStep("step1", "r1", "map1", "map", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - // map step should be completed - const step = (try store.getStep(arena.allocator(), "step1")).?; - try std.testing.expectEqualStrings("completed", step.status); - - // Should have created 3 child steps - const children = try store.getChildSteps(arena.allocator(), "step1"); - try std.testing.expectEqual(@as(usize, 3), children.len); -} - -test "getStepField extracts prompt_template" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const wf = - \\{"steps":[{"id":"research","type":"task","prompt_template":"Research {{input.topic}}"}]} - ; - const result = try getStepField(arena.allocator(), wf, "research", "prompt_template"); - try std.testing.expect(result != null); - try std.testing.expectEqualStrings("Research {{input.topic}}", result.?); -} - -test "getStepField returns null for missing step" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const wf = - \\{"steps":[{"id":"research","type":"task"}]} - ; - const result = try getStepField(arena.allocator(), wf, "nonexistent", "prompt_template"); - try std.testing.expect(result == null); -} - -test "getStepFieldInt extracts count" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const wf = - \\{"steps":[{"id":"fan1","type":"fan_out","count":5}]} - ; - const result = try getStepFieldInt(arena.allocator(), wf, "fan1", "count"); - try std.testing.expect(result != null); - try std.testing.expectEqual(@as(i64, 5), result.?); -} - -test "extractJsonArray extracts string array" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const json = - \\{"topics":["AI","ML","DL"]} - ; - const result = try extractJsonArray(arena.allocator(), json, "topics"); - try std.testing.expect(result != null); - try std.testing.expectEqual(@as(usize, 3), result.?.len); - try std.testing.expectEqualStrings("AI", result.?[0]); - try std.testing.expectEqualStrings("ML", result.?[1]); - try std.testing.expectEqualStrings("DL", result.?[2]); -} - -test "wrapOutput creates valid JSON" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const result = try wrapOutput(arena.allocator(), "hello world"); - try std.testing.expectEqualStrings("{\"output\":\"hello world\"}", result); -} - -test "wrapOutput escapes special characters" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const result = try wrapOutput(arena.allocator(), "line1\nline2"); - try std.testing.expectEqualStrings("{\"output\":\"line1\\nline2\"}", result); -} - -test "build/extract rendered_prompt input JSON round-trip" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const input_json = try buildRenderedPromptInputJson(arena.allocator(), "say \"hi\"\\nnext"); - const prompt = extractRenderedPromptFromInput(arena.allocator(), input_json); - try std.testing.expect(prompt != null); - try std.testing.expectEqualStrings("say \"hi\"\\nnext", prompt.?); -} - -test "Engine: task step fallback uses input_json.rendered_prompt" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - try store.insertRun("r-rendered", null, "running", "{\"steps\":[]}", "{}", "[]"); - try store.insertWorker("w-rendered", "http://127.0.0.1:1", "", "webhook", null, "[]", 1, "registered"); - try store.insertStep("parent-step", "r-rendered", "missing-parent-def", "task", "completed", "{}", 1, null, null, null); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const rendered_input = try buildRenderedPromptInputJson(arena.allocator(), "child fallback prompt"); - try store.insertStep( - "child-step", - "r-rendered", - "missing-child-def", - "task", - "ready", - rendered_input, - 2, - null, - "parent-step", - 0, - ); - - var engine = Engine.init(&store, allocator, 500); - const run_row = (try store.getRun(arena.allocator(), "r-rendered")).?; - try engine.processRun(arena.allocator(), run_row); - - const child = (try store.getStep(arena.allocator(), "child-step")).?; - try std.testing.expectEqualStrings("ready", child.status); - try std.testing.expectEqual(@as(i64, 2), child.attempt); -} - -test "Engine: rendered_prompt has priority over parent prompt_template" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"parent","type":"debate","prompt_template":"parent template"},{"id":"child","type":"task","prompt_template":"child template"}]} - ; - try store.insertRun("r-priority", null, "running", wf, "{}", "[]"); - try store.insertStep("parent-step", "r-priority", "parent", "debate", "running", "{}", 1, null, null, null); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const rendered_input = try buildRenderedPromptInputJson(arena.allocator(), "rendered prompt"); - try store.insertStep( - "child-step", - "r-priority", - "child", - "task", - "ready", - rendered_input, - 1, - null, - "parent-step", - 0, - ); - - var engine = Engine.init(&store, allocator, 500); - const run_row = (try store.getRun(arena.allocator(), "r-priority")).?; - const child_step = (try store.getStep(arena.allocator(), "child-step")).?; - const source = (try engine.resolveTaskPromptSource(arena.allocator(), run_row, child_step)).?; - - switch (source) { - .rendered => |prompt| try std.testing.expectEqualStrings("rendered prompt", prompt), - .template => try std.testing.expect(false), - } -} - -test "findStepStatus finds matching step" { - const steps = [_]types.StepRow{ - makeTestStepRow("s1", "completed"), - makeTestStepRow("s2", "pending"), - }; - const status = findStepStatus(&steps, "s2"); - try std.testing.expect(status != null); - try std.testing.expectEqualStrings("pending", status.?); -} - -test "findStepStatus returns null for missing step" { - const steps = [_]types.StepRow{ - makeTestStepRow("s1", "completed"), - }; - const status = findStepStatus(&steps, "s999"); - try std.testing.expect(status == null); -} - -fn makeTestStepRow(id: []const u8, status: []const u8) types.StepRow { - return .{ - .id = id, - .run_id = "r1", - .def_step_id = id, - .type = "task", - .status = status, - .worker_id = null, - .input_json = "{}", - .output_json = null, - .error_text = null, - .attempt = 1, - .max_attempts = 1, - .timeout_ms = null, - .next_attempt_at_ms = null, - .parent_step_id = null, - .item_index = null, - .created_at_ms = 0, - .updated_at_ms = 0, - .started_at_ms = null, - .ended_at_ms = null, - .child_run_id = null, - .iteration_index = 0, - }; -} - -// ── Transform step tests ───────────────────────────────────────────── - -test "Engine: transform step renders output_template" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"t1","type":"task","prompt_template":"hello"},{"id":"tr1","type":"transform","output_template":"result: {{steps.t1.output}}"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - - // Insert task1 as completed with output - try store.insertStep("step_t1", "r1", "t1", "task", "completed", "{}", 1, null, null, null); - try store.updateStepStatus("step_t1", "completed", null, "{\"output\":\"hello\"}", null, 1); - - // Insert transform1 as ready with dependency on task1 - try store.insertStep("step_tr1", "r1", "tr1", "transform", "ready", "{}", 1, null, null, null); - try store.insertStepDep("step_tr1", "step_t1"); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - // Verify transform completed - const s = (try store.getStep(arena.allocator(), "step_tr1")).?; - try std.testing.expectEqualStrings("completed", s.status); - // Output should contain the rendered template - try std.testing.expect(s.output_json != null); - // The output should contain "hello" from the task step - try std.testing.expect(std.mem.indexOf(u8, s.output_json.?, "hello") != null); -} - -test "Engine: transform step fails without output_template" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"tr1","type":"transform"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_tr1", "r1", "tr1", "transform", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - const s = (try store.getStep(arena.allocator(), "step_tr1")).?; - try std.testing.expectEqualStrings("failed", s.status); - try std.testing.expect(s.error_text != null); -} - -// ── Wait step tests ────────────────────────────────────────────────── - -test "Engine: wait step with duration_ms=0 completes after two ticks" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"w1","type":"wait","duration_ms":0}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - // First tick: step becomes "running" with started_at_ms - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - const s1 = (try store.getStep(arena.allocator(), "step_w1")).?; - try std.testing.expectEqualStrings("running", s1.status); - try std.testing.expect(s1.started_at_ms != null); - - // Second tick: step should be "completed" since duration=0 - const run_row2 = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row2); - - const s2 = (try store.getStep(arena.allocator(), "step_w1")).?; - try std.testing.expectEqualStrings("completed", s2.status); - try std.testing.expect(s2.output_json != null); -} - -test "Engine: wait step with signal enters waiting_approval" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"w1","type":"wait","signal":"deploy"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - const s = (try store.getStep(arena.allocator(), "step_w1")).?; - try std.testing.expectEqualStrings("waiting_approval", s.status); -} - -test "Engine: wait step without config fails" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"w1","type":"wait"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - const s = (try store.getStep(arena.allocator(), "step_w1")).?; - try std.testing.expectEqualStrings("failed", s.status); -} - -test "Engine: wait step with invalid duration string fails" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"w1","type":"wait","duration_ms":"abc"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_w1", "r1", "w1", "wait", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); + return serializeJsonValue(alloc, .{ .object = result }); +} - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); +/// Serialize completed_nodes set to JSON array. +fn serializeCompletedNodes(alloc: std.mem.Allocator, completed_nodes: *std.StringHashMap(void)) ![]const u8 { + var arr: std.ArrayListUnmanaged([]const u8) = .empty; + var it = completed_nodes.iterator(); + while (it.next()) |entry| { + try arr.append(alloc, entry.key_ptr.*); + } + return json.Stringify.valueAlloc(alloc, arr.items, .{}); +} - const s = (try store.getStep(arena.allocator(), "step_w1")).?; - try std.testing.expectEqualStrings("failed", s.status); - try std.testing.expect(s.error_text != null); - try std.testing.expect(std.mem.indexOf(u8, s.error_text.?, "duration_ms must be an integer") != null); +/// Serialize route_results map + workflow_version to JSON for checkpoint metadata. +fn serializeRouteResults(alloc: std.mem.Allocator, route_results: *std.StringHashMap([]const u8)) !?[]const u8 { + return serializeRouteResultsWithVersion(alloc, route_results, null); } -// ── Router step tests ──────────────────────────────────────────────── +fn serializeRouteResultsWithVersion(alloc: std.mem.Allocator, route_results: *std.StringHashMap([]const u8), wf_version: ?i64) !?[]const u8 { + if (route_results.count() == 0 and wf_version == null) return null; -test "Engine: router step routes to matching target" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var obj = json.ObjectMap.init(alloc); - const wf = - \\{"steps":[{"id":"classify","type":"task","prompt_template":"classify"},{"id":"router1","type":"router","routes":{"bug":"fix_bug","feature":"add_feature"}},{"id":"fix_bug","type":"task","prompt_template":"fix"},{"id":"add_feature","type":"task","prompt_template":"add"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); + if (route_results.count() > 0) { + var rr_obj = json.ObjectMap.init(alloc); + var it = route_results.iterator(); + while (it.next()) |entry| { + try rr_obj.put(entry.key_ptr.*, .{ .string = entry.value_ptr.* }); + } + try obj.put("route_results", .{ .object = rr_obj }); + } + + if (wf_version) |v| { + try obj.put("workflow_version", .{ .integer = v }); + } - // classify step completed with "bug" in output - try store.insertStep("step_classify", "r1", "classify", "task", "completed", "{}", 1, null, null, null); - try store.updateStepStatus("step_classify", "completed", null, "{\"output\":\"this is a bug report\"}", null, 1); + return try serializeJsonValue(alloc, .{ .object = obj }); +} - // router step is ready, depends on classify - try store.insertStep("step_router", "r1", "router1", "router", "ready", "{}", 1, null, null, null); - try store.insertStepDep("step_router", "step_classify"); +/// Serialize a string array as JSON. +fn serializeStringArray(alloc: std.mem.Allocator, items: []const []const u8) ![]const u8 { + return json.Stringify.valueAlloc(alloc, items, .{}); +} - // Target steps are pending - try store.insertStep("step_fix", "r1", "fix_bug", "task", "pending", "{}", 1, null, null, null); - try store.insertStepDep("step_fix", "step_router"); - try store.insertStep("step_add", "r1", "add_feature", "task", "pending", "{}", 1, null, null, null); - try store.insertStepDep("step_add", "step_router"); +/// Try to extract "state_updates" from worker output JSON. +/// Worker can return: {"state_updates": {"key": "value"}, ...} +fn extractStateUpdates(alloc: std.mem.Allocator, output: []const u8) ?[]const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, output, .{}) catch return null; + if (parsed.value != .object) return null; + const su = parsed.value.object.get("state_updates") orelse return null; + return serializeJsonValue(alloc, su) catch null; +} - var engine = Engine.init(&store, allocator, 500); +/// Extract "goto" field from worker output JSON. +/// Returns array of target node names. Supports: +/// - "goto": "node_name" -> ["node_name"] +/// - "goto": ["node_a", "node_b"] -> ["node_a", "node_b"] +fn extractGotoTargets(alloc: std.mem.Allocator, output: []const u8) ?[]const []const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, output, .{}) catch return null; + if (parsed.value != .object) return null; + const goto_val = parsed.value.object.get("goto") orelse return null; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); + var targets: std.ArrayListUnmanaged([]const u8) = .empty; + if (goto_val == .string) { + targets.append(alloc, goto_val.string) catch return null; + } else if (goto_val == .array) { + for (goto_val.array.items) |item| { + if (item == .string) { + targets.append(alloc, item.string) catch continue; + } + } + } else { + return null; + } - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); + if (targets.items.len == 0) return null; + return targets.toOwnedSlice(alloc) catch null; +} - // Router should be completed - const router = (try store.getStep(arena.allocator(), "step_router")).?; - try std.testing.expectEqualStrings("completed", router.status); - try std.testing.expect(router.output_json != null); - try std.testing.expect(std.mem.indexOf(u8, router.output_json.?, "fix_bug") != null); +/// Parse interrupt_before / interrupt_after arrays from workflow definition. +fn parseBreakpointList(alloc: std.mem.Allocator, workflow_json: []const u8, field: []const u8) []const []const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return &.{}; + return parseBreakpointListFromRoot(alloc, parsed.value, field); +} - // add_feature should be skipped - const add = (try store.getStep(arena.allocator(), "step_add")).?; - try std.testing.expectEqualStrings("skipped", add.status); +fn parseBreakpointListFromRoot(alloc: std.mem.Allocator, root: json.Value, field: []const u8) []const []const u8 { + if (root != .object) return &.{}; + const arr_val = root.object.get(field) orelse return &.{}; + if (arr_val != .array) return &.{}; - // fix_bug should still be pending (not skipped) - const fix = (try store.getStep(arena.allocator(), "step_fix")).?; - try std.testing.expectEqualStrings("pending", fix.status); + var result: std.ArrayListUnmanaged([]const u8) = .empty; + for (arr_val.array.items) |item| { + if (item == .string) { + result.append(alloc, item.string) catch continue; + } + } + return result.toOwnedSlice(alloc) catch &.{}; } -test "Engine: router step uses default when no match" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); +/// Check if a node name is in a breakpoint list. +fn isInBreakpointList(name: []const u8, list: []const []const u8) bool { + for (list) |item| { + if (std.mem.eql(u8, name, item)) return true; + } + return false; +} - const wf = - \\{"steps":[{"id":"classify","type":"task","prompt_template":"classify"},{"id":"router1","type":"router","routes":{"bug":"fix_bug"},"default":"fix_bug"},{"id":"fix_bug","type":"task","prompt_template":"fix"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); +/// Get an integer field from a node's JSON. +fn getNodeFieldInt(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?i64 { + const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null; + if (parsed.value != .object) return null; + const val = parsed.value.object.get(field) orelse return null; + if (val == .integer) return val.integer; + return null; +} - // classify step completed with something that doesn't match any route - try store.insertStep("step_classify", "r1", "classify", "task", "completed", "{}", 1, null, null, null); - try store.updateStepStatus("step_classify", "completed", null, "{\"output\":\"unknown category\"}", null, 1); +/// Get a float field from a node's JSON. +fn getNodeFieldFloat(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?f64 { + const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null; + if (parsed.value != .object) return null; + const val = parsed.value.object.get(field) orelse return null; + if (val == .float) return val.float; + if (val == .integer) return @as(f64, @floatFromInt(val.integer)); + return null; +} - // router step is ready - try store.insertStep("step_router", "r1", "router1", "router", "ready", "{}", 1, null, null, null); - try store.insertStepDep("step_router", "step_classify"); +/// Get a nested object field as JSON string from a node's JSON. +fn getNodeObjectField(alloc: std.mem.Allocator, node_json: []const u8, field: []const u8) ?[]const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, node_json, .{}) catch return null; + if (parsed.value != .object) return null; + const val = parsed.value.object.get(field) orelse return null; + if (val != .object) return null; + return serializeJsonValue(alloc, val) catch null; +} - // Target step - try store.insertStep("step_fix", "r1", "fix_bug", "task", "pending", "{}", 1, null, null, null); - try store.insertStepDep("step_fix", "step_router"); +fn resolveDeclaredRouteValue(alloc: std.mem.Allocator, node_json: []const u8, candidate: ?[]const u8) ?[]const u8 { + const routes_json = getNodeObjectField(alloc, node_json, "routes") orelse return candidate; + const parsed = json.parseFromSlice(json.Value, alloc, routes_json, .{}) catch return candidate; + if (parsed.value != .object) return candidate; - var engine = Engine.init(&store, allocator, 500); + if (candidate) |route_value| { + if (parsed.value.object.get(route_value) != null) return route_value; + } - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); + const default_route = getNodeField(alloc, node_json, "default") orelse return candidate; + if (parsed.value.object.get(default_route) != null) return default_route; + return candidate; +} - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); +// ── Retry Config Helpers (Gap 2) ──────────────────────────────────── - // Router should be completed with default target - const router = (try store.getStep(arena.allocator(), "step_router")).?; - try std.testing.expectEqualStrings("completed", router.status); - try std.testing.expect(router.output_json != null); - try std.testing.expect(std.mem.indexOf(u8, router.output_json.?, "fix_bug") != null); +/// Parse retry.max_attempts from node JSON. Returns null if no retry config. +fn parseRetryMaxAttempts(alloc: std.mem.Allocator, node_json: []const u8) ?u32 { + const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null; + const val = getNodeFieldInt(alloc, retry_json, "max_attempts") orelse return null; + if (val < 1) return 1; + if (val > 100) return 100; + return @intCast(val); } -test "Engine: router step fails without routes" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); +fn parseRetryInitialMs(alloc: std.mem.Allocator, node_json: []const u8) ?u64 { + const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null; + const val = getNodeFieldInt(alloc, retry_json, "initial_interval_ms") orelse return null; + if (val < 0) return 0; + return @intCast(val); +} - const wf = - \\{"steps":[{"id":"classify","type":"task","prompt_template":"classify"},{"id":"router1","type":"router"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); +fn parseRetryBackoff(alloc: std.mem.Allocator, node_json: []const u8) ?f64 { + const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null; + return getNodeFieldFloat(alloc, retry_json, "backoff_factor"); +} - try store.insertStep("step_classify", "r1", "classify", "task", "completed", "{}", 1, null, null, null); - try store.updateStepStatus("step_classify", "completed", null, "{\"output\":\"test\"}", null, 1); +fn parseRetryMaxMs(alloc: std.mem.Allocator, node_json: []const u8) ?u64 { + const retry_json = getNodeObjectField(alloc, node_json, "retry") orelse return null; + const val = getNodeFieldInt(alloc, retry_json, "max_interval_ms") orelse return null; + if (val < 0) return 0; + return @intCast(val); +} - try store.insertStep("step_router", "r1", "router1", "router", "ready", "{}", 1, null, null, null); - try store.insertStepDep("step_router", "step_classify"); +// ── Cache Key Helpers (Gap 3) ─────────────────────────────────────── - var engine = Engine.init(&store, allocator, 500); +/// Parse cache.ttl_ms from node JSON. Returns null if no cache config. +fn parseCacheTtlMs(alloc: std.mem.Allocator, node_json: []const u8) ?i64 { + const cache_json = getNodeObjectField(alloc, node_json, "cache") orelse return null; + return getNodeFieldInt(alloc, cache_json, "ttl_ms"); +} - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); +/// Compute a cache key from node_name + rendered_prompt using FNV hash. +fn computeCacheKey(alloc: std.mem.Allocator, node_name: []const u8, rendered_prompt: []const u8) ![]const u8 { + var hasher = std.hash.Fnv1a_64.init(); + hasher.update(node_name); + hasher.update("|"); + hasher.update(rendered_prompt); + const hash = hasher.final(); + return try std.fmt.allocPrint(alloc, "{x:0>16}", .{hash}); +} - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); +// ── Deferred Node Helpers (Gap 6) ─────────────────────────────────── - const router = (try store.getStep(arena.allocator(), "step_router")).?; - try std.testing.expectEqualStrings("failed", router.status); +/// Collect all deferred node names from workflow. +fn collectDeferredNodes(alloc: std.mem.Allocator, workflow_json: []const u8) []const []const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return &.{}; + return collectDeferredNodesFromRoot(alloc, parsed.value); } -// ── getStepFieldRaw tests ──────────────────────────────────────────── +fn collectDeferredNodesFromRoot(alloc: std.mem.Allocator, root: json.Value) []const []const u8 { + if (root != .object) return &.{}; + const nodes_val = root.object.get("nodes") orelse return &.{}; + if (nodes_val != .object) return &.{}; + + var result: std.ArrayListUnmanaged([]const u8) = .empty; + var it = nodes_val.object.iterator(); + while (it.next()) |entry| { + const name = entry.key_ptr.*; + const node = entry.value_ptr.*; + if (node == .object) { + if (node.object.get("defer")) |d| { + if (d == .bool and d.bool) { + result.append(alloc, name) catch continue; + } + } + } + } + return result.toOwnedSlice(alloc) catch &.{}; +} -test "getStepFieldRaw returns JSON object as string" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); +// ── Managed Values Helpers (Gap 7) ────────────────────────────────── - const wf = - \\{"steps":[{"id":"r1","type":"router","routes":{"bug":"fix_bug","feature":"add_feature"}}]} - ; - const result = try getStepFieldRaw(arena.allocator(), wf, "r1", "routes"); - try std.testing.expect(result != null); - // Should be a JSON string containing the routes object - try std.testing.expect(std.mem.indexOf(u8, result.?, "bug") != null); - try std.testing.expect(std.mem.indexOf(u8, result.?, "fix_bug") != null); +/// Inject __meta into state JSON before node execution. +fn injectMeta(alloc: std.mem.Allocator, state_json: []const u8, run_id: []const u8, node_name: []const u8, step_number: i64, max_steps: i64) ![]const u8 { + const remaining = max_steps - step_number; + const is_last = (step_number >= max_steps - 1); + const meta_json = try std.fmt.allocPrint(alloc, + \\{{"__meta":{{"step":{d},"is_last_step":{s},"remaining_steps":{d},"run_id":"{s}","node_name":"{s}"}}}} + , .{ step_number, if (is_last) "true" else "false", remaining, run_id, node_name }); + + // Merge __meta into state using simple applyUpdates with empty schema (last_value default) + return state_mod.applyUpdates(alloc, state_json, meta_json, "{}"); } -test "getStepFieldRaw returns string values directly" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); +/// Remove __meta from state JSON after node execution (don't persist in checkpoints). +fn stripMeta(alloc: std.mem.Allocator, state_json: []const u8) ![]const u8 { + const parsed = json.parseFromSlice(json.Value, alloc, state_json, .{}) catch return try alloc.dupe(u8, state_json); + if (parsed.value != .object) return try alloc.dupe(u8, state_json); - const wf = - \\{"steps":[{"id":"r1","type":"router","default":"fallback"}]} - ; - const result = try getStepFieldRaw(arena.allocator(), wf, "r1", "default"); - try std.testing.expect(result != null); - try std.testing.expectEqualStrings("fallback", result.?); + var result_obj = json.ObjectMap.init(alloc); + var it = parsed.value.object.iterator(); + while (it.next()) |entry| { + if (!std.mem.eql(u8, entry.key_ptr.*, "__meta")) { + try result_obj.put(entry.key_ptr.*, entry.value_ptr.*); + } + } + return serializeJsonValue(alloc, .{ .object = result_obj }); } -// ── Loop step tests ────────────────────────────────────────────────── +/// Build subgraph input state from parent state using input_mapping. +/// input_mapping is {"child_key": "state.parent_key", ...} +fn buildSubgraphInput(alloc: std.mem.Allocator, parent_state: []const u8, input_mapping_json: []const u8) ![]const u8 { + const mapping_parsed = json.parseFromSlice(json.Value, alloc, input_mapping_json, .{}) catch return try alloc.dupe(u8, "{}"); + if (mapping_parsed.value != .object) return try alloc.dupe(u8, "{}"); -test "Engine: loop step creates first iteration children" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var result = json.ObjectMap.init(alloc); + var it = mapping_parsed.value.object.iterator(); + while (it.next()) |entry| { + const child_key = entry.key_ptr.*; + const parent_path = if (entry.value_ptr.* == .string) entry.value_ptr.string else continue; - // Workflow: loop with body ["t1"] — single body step for simplicity - const wf = - \\{"steps":[{"id":"loop1","type":"loop","max_iterations":3,"exit_condition":"done","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null); + // Resolve the value from parent state + if (state_mod.getStateValue(alloc, parent_state, parent_path) catch null) |value_str| { + const val_parsed = json.parseFromSlice(json.Value, alloc, value_str, .{}) catch continue; + try result.put(child_key, val_parsed.value); + } + } - var engine = Engine.init(&store, allocator, 500); + return serializeJsonValue(alloc, .{ .object = result }); +} - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); +/// Reconcile with nulltickets: check if associated task has been cancelled. +/// Returns true if the run should continue, false if it should be cancelled. +fn reconcileWithTracker(alloc: std.mem.Allocator, tracker_url: []const u8, tracker_api_token: ?[]const u8, task_id: []const u8) bool { + const task_id_enc = encodePathSegment(alloc, task_id) catch return true; + defer alloc.free(task_id_enc); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); + const url = std.fmt.allocPrint(alloc, "{s}/tasks/{s}", .{ tracker_url, task_id_enc }) catch return true; + defer alloc.free(url); - // Loop step should be "running" - const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?; - try std.testing.expectEqualStrings("running", loop_step.status); + var client: std.http.Client = .{ .allocator = alloc }; + defer client.deinit(); - // Should have created 1 child step - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - try std.testing.expectEqual(@as(usize, 1), children.len); - try std.testing.expectEqualStrings("ready", children[0].status); - try std.testing.expectEqualStrings("t1", children[0].def_step_id); - try std.testing.expectEqual(@as(i64, 0), children[0].iteration_index); -} + var response_body: std.io.Writer.Allocating = .init(alloc); + defer response_body.deinit(); -test "Engine: loop step iterates until exit condition" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var auth_header: ?[]const u8 = null; + defer if (auth_header) |value| alloc.free(value); + var headers_buf: [1]std.http.Header = undefined; + const extra_headers: []const std.http.Header = if (tracker_api_token) |token| blk: { + auth_header = std.fmt.allocPrint(alloc, "Bearer {s}", .{token}) catch return true; + headers_buf[0] = .{ .name = "Authorization", .value = auth_header.? }; + break :blk headers_buf[0..1]; + } else &.{}; - const wf = - \\{"steps":[{"id":"loop1","type":"loop","max_iterations":5,"exit_condition":"done","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null); + const result = client.fetch(.{ + .location = .{ .url = url }, + .method = .GET, + .response_writer = &response_body.writer, + .extra_headers = extra_headers, + }) catch return true; // network errors -> continue - var engine = Engine.init(&store, allocator, 500); + const status_code = @intFromEnum(result.status); + if (status_code < 200 or status_code >= 300) return true; - // Tick 1: creates iteration 0 children, marks loop as running - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const body = response_body.written(); + const parsed = json.parseFromSlice(json.Value, alloc, body, .{}) catch return true; + if (parsed.value != .object) return true; - // Get the first child and mark it completed with "not done" - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - try std.testing.expectEqual(@as(usize, 1), children.len); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"not done\"}", null, 1); - } + const stage = parsed.value.object.get("stage") orelse return true; + if (stage != .string) return true; - // Tick 2: exit condition "done" not in "not done"... wait, "not done" contains "done"! - // Let's use a different output that doesn't contain "done" + // Terminal states -> cancel + if (std.mem.eql(u8, stage.string, "done") or + std.mem.eql(u8, stage.string, "cancelled") or + std.mem.eql(u8, stage.string, "canceled")) { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - // Fix: update to something that doesn't contain "done" - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"still working\"}", null, 1); - } + log.info("reconciliation: task {s} is in terminal state '{s}', cancelling run", .{ task_id, stage.string }); + return false; + } + + return true; +} + +// ── Rich Streaming Helpers ────────────────────────────────────────── - // Tick 2: exit condition not met, creates iteration 1 - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } +/// Broadcast multi-mode SSE events for a node execution. +/// Emits events in values, updates, tasks, and debug modes. +fn broadcastNodeEvents( + hub: *sse_mod.SseHub, + alloc: std.mem.Allocator, + run_id: []const u8, + node_name: []const u8, + node_type: []const u8, + state_json: []const u8, + state_updates: ?[]const u8, + step_number: i64, + duration_ms: i64, +) void { + const step_id_buf = ids.generateId(); + const step_id = alloc.dupe(u8, &step_id_buf) catch return; + const now_ms = ids.nowMs(); + // ISO 8601 timestamp (approximate, using epoch ms) + const ts_str = std.fmt.allocPrint(alloc, "{d}", .{now_ms}) catch "0"; - // Should now have 2 children (iteration 0 and 1) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - try std.testing.expectEqual(@as(usize, 2), children.len); + // values mode: full state after step + const values_data = std.fmt.allocPrint(alloc, + \\{{"event":"values","data":{{"step":"{s}","state":{s}}}}} + , .{ node_name, state_json }) catch null; + if (values_data) |vd| { + hub.broadcast(run_id, .{ .event_type = "values", .data = vd, .mode = .values }); + } + + // updates mode: node name + partial updates + const updates_payload = state_updates orelse "{}"; + const updates_data = std.fmt.allocPrint(alloc, + \\{{"event":"updates","data":{{"step":"{s}","updates":{s}}}}} + , .{ node_name, updates_payload }) catch null; + if (updates_data) |ud| { + hub.broadcast(run_id, .{ .event_type = "updates", .data = ud, .mode = .updates }); } - // Mark iteration 1 child as completed with "done" in output - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - // Find iteration 1 child - for (children) |child| { - if (child.iteration_index == 1) { - try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"done\"}", null, 1); + // tasks mode: task_start and task_result + const task_start_data = std.fmt.allocPrint(alloc, + \\{{"id":"{s}","name":"{s}","type":"{s}"}} + , .{ step_id, node_name, node_type }) catch null; + if (task_start_data) |tsd| { + hub.broadcast(run_id, .{ .event_type = "task_start", .data = tsd, .mode = .tasks }); + } + + const task_result_data = std.fmt.allocPrint(alloc, + \\{{"id":"{s}","name":"{s}","result":{s},"duration_ms":{d}}} + , .{ step_id, node_name, updates_payload, duration_ms }) catch null; + if (task_result_data) |trd| { + hub.broadcast(run_id, .{ .event_type = "task_result", .data = trd, .mode = .tasks }); + } + + // debug mode: wrapped with step number and timestamp + const debug_data = std.fmt.allocPrint(alloc, + \\{{"step_number":{d},"timestamp_ms":{s},"type":"task_result","payload":{{"name":"{s}","updates":{s},"duration_ms":{d}}}}} + , .{ step_number, ts_str, node_name, updates_payload, duration_ms }) catch null; + if (debug_data) |dd| { + hub.broadcast(run_id, .{ .event_type = "debug", .data = dd, .mode = .debug }); + } +} + +/// Get workflow version from workflow JSON definition. +fn getWorkflowVersion(alloc: std.mem.Allocator, workflow_json: []const u8) i64 { + const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return 1; + if (parsed.value != .object) return 1; + const val = parsed.value.object.get("version") orelse return 1; + if (val == .integer) return val.integer; + return 1; +} + +/// Get workflow version from checkpoint metadata. +fn getCheckpointWorkflowVersion(alloc: std.mem.Allocator, metadata_json: ?[]const u8) i64 { + const meta = metadata_json orelse return 1; + const parsed = json.parseFromSlice(json.Value, alloc, meta, .{}) catch return 1; + if (parsed.value != .object) return 1; + const val = parsed.value.object.get("workflow_version") orelse return 1; + if (val == .integer) return val.integer; + return 1; +} + +/// Filter completed nodes to only those still present in the workflow definition. +/// Returns true if any nodes were removed (migration happened). +fn migrateCompletedNodes(alloc: std.mem.Allocator, completed_nodes: *std.StringHashMap(void), workflow_json: []const u8) bool { + const parsed = json.parseFromSlice(json.Value, alloc, workflow_json, .{}) catch return false; + if (parsed.value != .object) return false; + const nodes_val = parsed.value.object.get("nodes") orelse return false; + if (nodes_val != .object) return false; + + var to_remove: std.ArrayListUnmanaged([]const u8) = .empty; + var it = completed_nodes.iterator(); + while (it.next()) |entry| { + const name = entry.key_ptr.*; + // Keep special nodes + if (std.mem.eql(u8, name, "__start__") or std.mem.eql(u8, name, "__end__")) continue; + // Remove if node no longer exists in workflow + if (nodes_val.object.get(name) == null) { + to_remove.append(alloc, name) catch continue; + } + } + + if (to_remove.items.len == 0) return false; + + for (to_remove.items) |name| { + _ = completed_nodes.remove(name); + log.warn("migration: removed completed node '{s}' (no longer in workflow)", .{name}); + } + return true; +} + +// ── UI Messages ────────────────────────────────────────────────────── + +/// Process "ui_messages" from worker response JSON. +/// For each message: +/// - If it has "remove": true -> broadcast as "ui_message_delete" SSE event +/// - Otherwise -> broadcast as "ui_message" SSE event +/// Also applies to state.__ui_messages via add_messages reducer. +fn processUiMessages(hub: *sse_mod.SseHub, alloc: std.mem.Allocator, run_id: []const u8, step_id: []const u8, response_json: []const u8) void { + const parsed = json.parseFromSlice(json.Value, alloc, response_json, .{}) catch return; + if (parsed.value != .object) return; + const ui_msgs_val = parsed.value.object.get("ui_messages") orelse return; + if (ui_msgs_val != .array) return; + + for (ui_msgs_val.array.items) |msg| { + if (msg != .object) continue; + + // Check for remove flag + const is_remove = blk: { + if (msg.object.get("remove")) |rm_val| { + if (rm_val == .bool) break :blk rm_val.bool; } - } - } + break :blk false; + }; - // Tick 3: exit condition met, loop completes - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + // Add step_id to the event data + var event_obj = json.ObjectMap.init(alloc); + var it = msg.object.iterator(); + while (it.next()) |entry| { + event_obj.put(entry.key_ptr.*, entry.value_ptr.*) catch continue; + } + event_obj.put("step_id", .{ .string = step_id }) catch {}; + const event_data = serializeJsonValue(alloc, .{ .object = event_obj }) catch continue; - // Loop should be completed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?; - try std.testing.expectEqualStrings("completed", loop_step.status); - try std.testing.expect(loop_step.output_json != null); + if (is_remove) { + hub.broadcast(run_id, .{ .event_type = "ui_message_delete", .data = event_data, .mode = .custom }); + } else { + hub.broadcast(run_id, .{ .event_type = "ui_message", .data = event_data, .mode = .custom }); + } } } -test "Engine: loop step stops at max_iterations" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); +/// Apply ui_messages to run state's __ui_messages key using add_messages reducer. +fn applyUiMessagesToState(alloc: std.mem.Allocator, state_json: []const u8, response_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); - const wf = - \\{"steps":[{"id":"loop1","type":"loop","max_iterations":2,"exit_condition":"never_match","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]} + const resp_parsed = json.parseFromSlice(json.Value, arena_alloc, response_json, .{}) catch return try alloc.dupe(u8, state_json); + if (resp_parsed.value != .object) return try alloc.dupe(u8, state_json); + const ui_msgs_val = resp_parsed.value.object.get("ui_messages") orelse return try alloc.dupe(u8, state_json); + if (ui_msgs_val != .array) return try alloc.dupe(u8, state_json); + + // Serialize the ui_messages array + const ui_msgs_json = serializeJsonValue(arena_alloc, ui_msgs_val) catch return try alloc.dupe(u8, state_json); + + // Build updates: {"__ui_messages": } + const updates = std.fmt.allocPrint(arena_alloc, "{{\"__ui_messages\":{s}}}", .{ui_msgs_json}) catch return try alloc.dupe(u8, state_json); + + // Build a temporary schema that uses add_messages for __ui_messages + const schema = + \\{"__ui_messages":{"type":"array","reducer":"add_messages"}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null); - var engine = Engine.init(&store, allocator, 500); + return state_mod.applyUpdates(alloc, state_json, updates, schema) catch try alloc.dupe(u8, state_json); +} - // Tick 1: creates iteration 0 - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } +// ── Stream Messages ────────────────────────────────────────────────── - // Complete iteration 0 child - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"result0\"}", null, 1); - } +/// Process "stream_messages" from worker response JSON. +/// For each message: broadcast as a "message" SSE event with step context. +fn processStreamMessages(hub: *sse_mod.SseHub, alloc: std.mem.Allocator, run_id: []const u8, step_id: []const u8, node_type: []const u8, response_json: []const u8) void { + const parsed = json.parseFromSlice(json.Value, alloc, response_json, .{}) catch return; + if (parsed.value != .object) return; + const stream_msgs_val = parsed.value.object.get("stream_messages") orelse return; + if (stream_msgs_val != .array) return; - // Tick 2: creates iteration 1 - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + for (stream_msgs_val.array.items) |msg| { + if (msg != .object) continue; - // Complete iteration 1 child - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - for (children) |child| { - if (child.iteration_index == 1) { - try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"result1\"}", null, 1); - } + // Build enriched message with step context + var event_obj = json.ObjectMap.init(alloc); + var it = msg.object.iterator(); + while (it.next()) |entry| { + event_obj.put(entry.key_ptr.*, entry.value_ptr.*) catch continue; } - } - - // Tick 3: max_iterations=2 reached (iterations 0,1), loop completes - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + event_obj.put("step_id", .{ .string = step_id }) catch {}; + event_obj.put("node_type", .{ .string = node_type }) catch {}; + const event_data = serializeJsonValue(alloc, .{ .object = event_obj }) catch continue; - // Loop should be completed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?; - try std.testing.expectEqualStrings("completed", loop_step.status); + hub.broadcast(run_id, .{ .event_type = "message", .data = event_data, .mode = .custom }); } } -test "Engine: loop step fails when child fails" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"loop1","type":"loop","max_iterations":3,"exit_condition":"done","body":["t1"]},{"id":"t1","type":"task","prompt_template":"do work"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null); +// ── Mermaid Graph Export ───────────────────────────────────────────── - var engine = Engine.init(&store, allocator, 500); +/// Generate Mermaid diagram syntax from a workflow JSON definition. +/// Returns a Mermaid flowchart string. +pub fn generateMermaid(alloc: std.mem.Allocator, definition_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); - // Tick 1: creates iteration 0 - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const parsed = try json.parseFromSlice(json.Value, arena_alloc, definition_json, .{}); + if (parsed.value != .object) return try alloc.dupe(u8, "graph TD\n"); - // Mark child as failed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - try store.updateStepStatus(children[0].id, "failed", null, null, "child error", 1); - } + const nodes_val = parsed.value.object.get("nodes") orelse return try alloc.dupe(u8, "graph TD\n"); + if (nodes_val != .object) return try alloc.dupe(u8, "graph TD\n"); - // Tick 2: loop should fail - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const edges_val = parsed.value.object.get("edges") orelse return try alloc.dupe(u8, "graph TD\n"); + if (edges_val != .array) return try alloc.dupe(u8, "graph TD\n"); - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?; - try std.testing.expectEqualStrings("failed", loop_step.status); - } -} + var buf: std.ArrayListUnmanaged(u8) = .empty; -test "Engine: loop step with multiple body steps chains them" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + // Header + try buf.appendSlice(arena_alloc, "graph TD\n"); - const wf = - \\{"steps":[{"id":"loop1","type":"loop","max_iterations":1,"exit_condition":"done","body":["s1","s2"]},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"s2","type":"task","prompt_template":"step2"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null); + // __start__ and __end__ nodes + try buf.appendSlice(arena_alloc, " __start__((Start))\n"); - var engine = Engine.init(&store, allocator, 500); + // Node definitions + var nodes_it = nodes_val.object.iterator(); + while (nodes_it.next()) |entry| { + const name = entry.key_ptr.*; + const node = entry.value_ptr.*; - // Tick 1: creates iteration 0 with 2 body steps chained - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const node_type_str = blk: { + if (node == .object) { + if (node.object.get("type")) |t| { + if (t == .string) break :blk t.string; + } + } + break :blk "task"; + }; - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_loop"); - try std.testing.expectEqual(@as(usize, 2), children.len); - - // First child (s1) should be "ready", second (s2) should be "pending" - // Children are ordered by item_index ASC - var ready_count: usize = 0; - var pending_count: usize = 0; - for (children) |child| { - if (std.mem.eql(u8, child.status, "ready")) ready_count += 1; - if (std.mem.eql(u8, child.status, "pending")) pending_count += 1; + // Choose Mermaid shape based on node type + if (std.mem.eql(u8, node_type_str, "route")) { + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "{"); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "\\nroute}\n"); + } else if (std.mem.eql(u8, node_type_str, "interrupt")) { + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "[/"); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "\\ninterrupt/]\n"); + } else if (std.mem.eql(u8, node_type_str, "send")) { + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "[["); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "\\nsend]]\n"); + } else if (std.mem.eql(u8, node_type_str, "transform")) { + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "("); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "\\ntransform)\n"); + } else if (std.mem.eql(u8, node_type_str, "subgraph")) { + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "["); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "\\nsubgraph]\n"); + } else { + // task, agent, and others: rectangle + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "["); + try buf.appendSlice(arena_alloc, name); + try buf.appendSlice(arena_alloc, "\\n"); + try buf.appendSlice(arena_alloc, node_type_str); + try buf.appendSlice(arena_alloc, "]\n"); + } + } + + // __end__ node + try buf.appendSlice(arena_alloc, " __end__((End))\n"); + + // Edges + for (edges_val.array.items) |edge_item| { + if (edge_item != .array) continue; + if (edge_item.array.items.len < 2) continue; + + const source_raw = if (edge_item.array.items[0] == .string) edge_item.array.items[0].string else continue; + const target = if (edge_item.array.items[1] == .string) edge_item.array.items[1].string else continue; + + // Parse conditional edge "source:value" + if (std.mem.indexOfScalar(u8, source_raw, ':')) |colon_pos| { + const source = source_raw[0..colon_pos]; + const condition = source_raw[colon_pos + 1 ..]; + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, source); + try buf.appendSlice(arena_alloc, " -->|"); + try buf.appendSlice(arena_alloc, condition); + try buf.appendSlice(arena_alloc, "| "); + try buf.appendSlice(arena_alloc, target); + try buf.appendSlice(arena_alloc, "\n"); + } else { + try buf.appendSlice(arena_alloc, " "); + try buf.appendSlice(arena_alloc, source_raw); + try buf.appendSlice(arena_alloc, " --> "); + try buf.appendSlice(arena_alloc, target); + try buf.appendSlice(arena_alloc, "\n"); } - try std.testing.expectEqual(@as(usize, 1), ready_count); - try std.testing.expectEqual(@as(usize, 1), pending_count); } + + return try alloc.dupe(u8, buf.items); } -// ── Sub-workflow step tests ────────────────────────────────────────── +// ── Tests ───────────────────────────────────────────────────────────── -test "Engine: sub_workflow step creates child run" { +test "Engine: init and stop" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); - // Parent workflow has a sub_workflow step with inline workflow - const wf = - \\{"steps":[{"id":"sub1","type":"sub_workflow","workflow":{"steps":[{"id":"inner1","type":"task","prompt_template":"inner work"}]}}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null); - var engine = Engine.init(&store, allocator, 500); - - // Tick 1: creates child run and marks sub_workflow as running - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Verify sub_workflow step is "running" and has child_run_id - var child_run_id: []const u8 = undefined; - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?; - try std.testing.expectEqualStrings("running", sub_step.status); - try std.testing.expect(sub_step.child_run_id != null); - child_run_id = try allocator.dupe(u8, sub_step.child_run_id.?); - } - defer allocator.free(child_run_id); - - // Verify child run exists and has steps - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const child_run = (try store.getRun(arena.allocator(), child_run_id)).?; - try std.testing.expectEqualStrings("running", child_run.status); - - const child_steps = try store.getStepsByRun(arena.allocator(), child_run_id); - try std.testing.expectEqual(@as(usize, 1), child_steps.len); - try std.testing.expectEqualStrings("inner1", child_steps[0].def_step_id); - try std.testing.expectEqualStrings("ready", child_steps[0].status); - } + try std.testing.expect(engine.running.load(.acquire)); + engine.stop(); + try std.testing.expect(!engine.running.load(.acquire)); } -test "Engine: sub_workflow step completes when child run completes" { +test "Engine: tick with no active runs" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); - const wf = - \\{"steps":[{"id":"sub1","type":"sub_workflow","workflow":{"steps":[{"id":"inner1","type":"task","prompt_template":"inner work"}]}}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null); - var engine = Engine.init(&store, allocator, 500); - - // Tick 1: creates child run - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Get child run ID and manually complete its step + run - var child_run_id: []const u8 = undefined; - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?; - child_run_id = try allocator.dupe(u8, sub_step.child_run_id.?); - } - defer allocator.free(child_run_id); - - // Complete the child run's step - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const child_steps = try store.getStepsByRun(arena.allocator(), child_run_id); - try store.updateStepStatus(child_steps[0].id, "completed", null, "{\"output\":\"inner result\"}", null, 1); - } - - // Mark child run as completed - try store.updateRunStatus(child_run_id, "completed", null); - - // Tick 2: sub_workflow should detect child run completed and complete itself - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Verify sub_workflow step completed with child's output - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?; - try std.testing.expectEqualStrings("completed", sub_step.status); - try std.testing.expect(sub_step.output_json != null); - try std.testing.expect(std.mem.indexOf(u8, sub_step.output_json.?, "inner result") != null); - } + try engine.tick(); } -test "Engine: sub_workflow step fails when child run fails" { +test "engine: find ready nodes - simple chain" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + // Edges: __start__ -> a -> b -> __end__ const wf = - \\{"steps":[{"id":"sub1","type":"sub_workflow","workflow":{"steps":[{"id":"inner1","type":"task","prompt_template":"inner work"}]}}]} + \\{"nodes":{"a":{"type":"task"},"b":{"type":"task"}},"edges":[["__start__","a"],["a","b"],["b","__end__"]],"schema":{}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - // Tick 1: creates child run - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - // Get child run ID - var child_run_id: []const u8 = undefined; + // Completed: [] -> ready: [a] { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?; - child_run_id = try allocator.dupe(u8, sub_step.child_run_id.?); + var completed = std.StringHashMap(void).init(alloc); + var routes = std.StringHashMap([]const u8).init(alloc); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + try std.testing.expectEqual(@as(usize, 1), ready.len); + try std.testing.expectEqualStrings("a", ready[0]); } - defer allocator.free(child_run_id); - // Mark child run as failed - try store.updateRunStatus(child_run_id, "failed", "inner step failed"); - - // Tick 2: sub_workflow should detect child run failed + // Completed: [a] -> ready: [b] { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("a", {}); + var routes = std.StringHashMap([]const u8).init(alloc); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + try std.testing.expectEqual(@as(usize, 1), ready.len); + try std.testing.expectEqualStrings("b", ready[0]); } - // Verify sub_workflow step failed + // Completed: [a, b] -> ready: [__end__] { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?; - try std.testing.expectEqualStrings("failed", sub_step.status); - try std.testing.expect(sub_step.error_text != null); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("a", {}); + try completed.put("b", {}); + var routes = std.StringHashMap([]const u8).init(alloc); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + try std.testing.expectEqual(@as(usize, 1), ready.len); + try std.testing.expectEqualStrings("__end__", ready[0]); } } -test "Engine: sub_workflow step fails without workflow" { +test "engine: find ready nodes - parallel" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + // Edges: __start__ -> a, __start__ -> b, a -> c, b -> c const wf = - \\{"steps":[{"id":"sub1","type":"sub_workflow"}]} + \\{"nodes":{"a":{"type":"task"},"b":{"type":"task"},"c":{"type":"task"}},"edges":[["__start__","a"],["__start__","b"],["a","c"],["b","c"]],"schema":{}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_sub", "r1", "sub1", "sub_workflow", "ready", "{}", 1, null, null, null); - var engine = Engine.init(&store, allocator, 500); + // Completed: [] -> ready: [a, b] + { + var completed = std.StringHashMap(void).init(alloc); + var routes = std.StringHashMap([]const u8).init(alloc); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + try std.testing.expectEqual(@as(usize, 2), ready.len); + // Both a and b should be ready (order may vary) + var has_a = false; + var has_b = false; + for (ready) |name| { + if (std.mem.eql(u8, name, "a")) has_a = true; + if (std.mem.eql(u8, name, "b")) has_b = true; + } + try std.testing.expect(has_a); + try std.testing.expect(has_b); + } + // Completed: [a] -> ready: [] (c needs both a and b) { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("a", {}); + var routes = std.StringHashMap([]const u8).init(alloc); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + // b is already in completed? No. So b should be ready + // Wait - b is from __start__ and __start__ is always completed + // b should be ready since its only inbound is __start__ + // But if we only put "a" as completed, b's inbound __start__ is always satisfied + // So b should be ready. And c should NOT be ready since b is not completed. + var has_c = false; + for (ready) |name| { + if (std.mem.eql(u8, name, "c")) has_c = true; + } + try std.testing.expect(!has_c); } + // Completed: [a, b] -> ready: [c] { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const sub_step = (try store.getStep(arena.allocator(), "step_sub")).?; - try std.testing.expectEqualStrings("failed", sub_step.status); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("a", {}); + try completed.put("b", {}); + var routes = std.StringHashMap([]const u8).init(alloc); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + try std.testing.expectEqual(@as(usize, 1), ready.len); + try std.testing.expectEqualStrings("c", ready[0]); } } -test "Engine: loop step fails without body" { +test "engine: find ready nodes - route edges" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + // Edges: __start__ -> r, r:yes -> a, r:no -> b const wf = - \\{"steps":[{"id":"loop1","type":"loop","max_iterations":3,"exit_condition":"done"}]} + \\{"nodes":{"r":{"type":"route"},"a":{"type":"task"},"b":{"type":"task"}},"edges":[["__start__","r"],["r:yes","a"],["r:no","b"]],"schema":{}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_loop", "r1", "loop1", "loop", "ready", "{}", 1, null, null, null); - var engine = Engine.init(&store, allocator, 500); + // Completed: [r] with route result "yes" -> ready: [a] + { + var completed = std.StringHashMap(void).init(alloc); + try completed.put("r", {}); + var routes = std.StringHashMap([]const u8).init(alloc); + try routes.put("r", "yes"); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + try std.testing.expectEqual(@as(usize, 1), ready.len); + try std.testing.expectEqualStrings("a", ready[0]); + } + // Completed: [r] with route result "no" -> ready: [b] { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("r", {}); + var routes = std.StringHashMap([]const u8).init(alloc); + try routes.put("r", "no"); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + try std.testing.expectEqual(@as(usize, 1), ready.len); + try std.testing.expectEqualStrings("b", ready[0]); } + // Completed: [r] with route result "yes" -> b should NOT be ready { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const loop_step = (try store.getStep(arena.allocator(), "step_loop")).?; - try std.testing.expectEqualStrings("failed", loop_step.status); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("r", {}); + var routes = std.StringHashMap([]const u8).init(alloc); + try routes.put("r", "yes"); + const ready = try findReadyNodes(alloc, wf, &completed, &routes); + for (ready) |name| { + try std.testing.expect(!std.mem.eql(u8, name, "b")); + } } } -// ── Debate step tests ──────────────────────────────────────────────── - -test "Engine: debate step creates participant children" { +test "engine: processRun completes simple workflow" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); + // Create a workflow with just a transform node const wf = - \\{"steps":[{"id":"review","type":"debate","count":2,"worker_tags":["reviewer"],"judge_tags":["senior"],"prompt_template":"Review this code","judge_template":"Pick the best:\n{{debate_responses}}"}]} + \\{"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"done\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null); + + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); @@ -4305,30 +2939,26 @@ test "Engine: debate step creates participant children" { const run_row = (try store.getRun(arena.allocator(), "r1")).?; try engine.processRun(arena.allocator(), run_row); - // Debate step should be "running" - const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?; - try std.testing.expectEqualStrings("running", debate_step.status); + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + try std.testing.expectEqualStrings("completed", updated_run.status); - // Should have 2 participant children - const children = try store.getChildSteps(arena.allocator(), "step_debate"); - try std.testing.expectEqual(@as(usize, 2), children.len); - - for (children) |child| { - try std.testing.expectEqualStrings("ready", child.status); - try std.testing.expectEqualStrings("task", child.type); + // Verify state was updated + if (updated_run.state_json) |sj| { + try std.testing.expect(std.mem.indexOf(u8, sj, "done") != null); } } -test "Engine: debate step fails without count" { +test "engine: interrupt node stops run" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); const wf = - \\{"steps":[{"id":"review","type":"debate","prompt_template":"Review this"}]} + \\{"nodes":{"i1":{"type":"interrupt"}},"edges":[["__start__","i1"],["i1","__end__"]],"schema":{}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null); + + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); @@ -4338,290 +2968,394 @@ test "Engine: debate step fails without count" { const run_row = (try store.getRun(arena.allocator(), "r1")).?; try engine.processRun(arena.allocator(), run_row); - const step = (try store.getStep(arena.allocator(), "step_debate")).?; - try std.testing.expectEqualStrings("failed", step.status); + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + try std.testing.expectEqualStrings("interrupted", updated_run.status); } -test "Engine: debate step fails without prompt_template" { +test "engine: route node with conditional edges" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); + // Workflow: start -> route -> (yes: t_yes, no: t_no) -> end const wf = - \\{"steps":[{"id":"review","type":"debate","count":2}]} + \\{"nodes":{"r":{"type":"route","input":"state.decision"},"t_yes":{"type":"transform","updates":"{\"path\":\"yes\"}"},"t_no":{"type":"transform","updates":"{\"path\":\"no\"}"}},"edges":[["__start__","r"],["r:yes","t_yes"],["r:no","t_no"],["t_yes","__end__"],["t_no","__end__"]],"schema":{"decision":{"type":"string","reducer":"last_value"},"path":{"type":"string","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null); + + const init_state = + \\{"decision":"yes"} + ; + + try store.createRunWithState("r1", null, wf, "{}", init_state); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); + // First tick: route node executes and completes const run_row = (try store.getRun(arena.allocator(), "r1")).?; try engine.processRun(arena.allocator(), run_row); - const step = (try store.getStep(arena.allocator(), "step_debate")).?; - try std.testing.expectEqualStrings("failed", step.status); + // May need a second tick to process t_yes and __end__ + const run_row2 = (try store.getRun(arena.allocator(), "r1")).?; + if (std.mem.eql(u8, run_row2.status, "running")) { + try engine.processRun(arena.allocator(), run_row2); + } + + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + try std.testing.expectEqualStrings("completed", updated_run.status); + + // Verify the "yes" path was taken + if (updated_run.state_json) |sj| { + try std.testing.expect(std.mem.indexOf(u8, sj, "yes") != null); + } } -test "Engine: debate step creates judge after participants complete" { +test "engine: route node falls back to declared default route" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); const wf = - \\{"steps":[{"id":"review","type":"debate","count":2,"worker_tags":["reviewer"],"judge_tags":["senior"],"prompt_template":"Review this code","judge_template":"Pick the best:\n{{debate_responses}}"}]} + \\{"nodes":{"r":{"type":"route","input":"state.decision","routes":{"yes":"t_yes","fallback":"t_fallback"},"default":"fallback"},"t_yes":{"type":"transform","updates":"{\"path\":\"yes\"}"},"t_fallback":{"type":"transform","updates":"{\"path\":\"fallback\"}"}},"edges":[["__start__","r"],["r:yes","t_yes"],["r:fallback","t_fallback"],["t_yes","__end__"],["t_fallback","__end__"]],"schema":{"decision":{"type":"string","reducer":"last_value"},"path":{"type":"string","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null); + + try store.createRunWithState("r1", null, wf, "{}", "{\"decision\":\"unknown\"}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); - // Tick 1: creates participant children - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Complete both participant children - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_debate"); - try std.testing.expectEqual(@as(usize, 2), children.len); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"review A\"}", null, 1); - try store.updateStepStatus(children[1].id, "completed", null, "{\"output\":\"review B\"}", null, 1); - } + const run_row = (try store.getRun(arena.allocator(), "r1")).?; + try engine.processRun(arena.allocator(), run_row); - // Tick 2: should create judge child - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); + const run_row2 = (try store.getRun(arena.allocator(), "r1")).?; + if (std.mem.eql(u8, run_row2.status, "running")) { + try engine.processRun(arena.allocator(), run_row2); } - // Should now have 3 children (2 participants + 1 judge) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_debate"); - try std.testing.expectEqual(@as(usize, 3), children.len); - - // Find judge child - var found_judge = false; - for (children) |child| { - if (std.mem.indexOf(u8, child.def_step_id, "_judge") != null) { - found_judge = true; - try std.testing.expectEqualStrings("ready", child.status); - try std.testing.expectEqualStrings("task", child.type); - } - } - try std.testing.expect(found_judge); + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + try std.testing.expectEqualStrings("completed", updated_run.status); + if (updated_run.state_json) |sj| { + try std.testing.expect(std.mem.indexOf(u8, sj, "fallback") != null); } } -test "Engine: debate step completes when judge completes" { +test "wrapOutput creates valid JSON" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const result = try wrapOutput(arena.allocator(), "hello world"); + try std.testing.expectEqualStrings("{\"output\":\"hello world\"}", result); +} + +test "wrapOutput escapes special characters" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const result = try wrapOutput(arena.allocator(), "line1\nline2"); + try std.testing.expectEqualStrings("{\"output\":\"line1\\nline2\"}", result); +} + +test "serializeCompletedNodes" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var completed = std.StringHashMap(void).init(alloc); + try completed.put("a", {}); + try completed.put("b", {}); + + const result = try serializeCompletedNodes(alloc, &completed); + // Should be a JSON array containing "a" and "b" + try std.testing.expect(std.mem.indexOf(u8, result, "\"a\"") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "\"b\"") != null); +} + +test "getNodeJson returns node definition" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); const wf = - \\{"steps":[{"id":"review","type":"debate","count":2,"prompt_template":"Review this","judge_template":"Pick best: {{debate_responses}}"}]} + \\{"nodes":{"a":{"type":"task","prompt_template":"hello"}},"edges":[]} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null); + const result = getNodeJson(arena.allocator(), wf, "a"); + try std.testing.expect(result != null); + try std.testing.expect(std.mem.indexOf(u8, result.?, "task") != null); +} - var engine = Engine.init(&store, allocator, 500); +test "getNodeJson returns null for missing node" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 1: creates participant children - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const wf = + \\{"nodes":{"a":{"type":"task"}},"edges":[]} + ; + const result = getNodeJson(arena.allocator(), wf, "b"); + try std.testing.expect(result == null); +} - // Complete participants - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_debate"); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"A\"}", null, 1); - try store.updateStepStatus(children[1].id, "completed", null, "{\"output\":\"B\"}", null, 1); - } +test "getNodeField extracts string field" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 2: creates judge child - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const node = + \\{"type":"task","prompt_template":"hello {{state.name}}"} + ; + const result = getNodeField(arena.allocator(), node, "prompt_template"); + try std.testing.expect(result != null); + try std.testing.expectEqualStrings("hello {{state.name}}", result.?); +} - // Complete the judge child - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_debate"); - for (children) |child| { - if (std.mem.indexOf(u8, child.def_step_id, "_judge") != null) { - try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"A is best\"}", null, 1); - } - } - } +test "extractStateUpdates from worker response" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 3: debate should be completed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const output = + \\{"state_updates":{"result":"done","count":5},"other":"ignored"} + ; + const result = extractStateUpdates(arena.allocator(), output); + try std.testing.expect(result != null); + try std.testing.expect(std.mem.indexOf(u8, result.?, "done") != null); +} - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?; - try std.testing.expectEqualStrings("completed", debate_step.status); - try std.testing.expect(debate_step.output_json != null); - try std.testing.expect(std.mem.indexOf(u8, debate_step.output_json.?, "A is best") != null); - } +test "extractStateUpdates returns null for plain text" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const result = extractStateUpdates(arena.allocator(), "just plain text"); + try std.testing.expect(result == null); } -test "Engine: debate step completes without judge_template" { +test "buildTaskStateUpdates uses output_key for plain text output" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // No judge_template — should complete with collected responses when participants are done - const wf = - \\{"steps":[{"id":"review","type":"debate","count":2,"prompt_template":"Review this"}]} + const node = + \\{"type":"task","output_key":"plan"} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null); + const result = try buildTaskStateUpdates(arena.allocator(), node, "draft plan"); + try std.testing.expectEqualStrings("{\"plan\":\"draft plan\"}", result); +} - var engine = Engine.init(&store, allocator, 500); +test "buildTaskStateUpdates applies output_mapping from JSON output" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 1: creates participant children - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const node = + \\{"type":"task","output_key":"review_result","output_mapping":{"grade":"grade","feedback":"details.feedback"}} + ; + const output = + \\{"grade":"approve","details":{"feedback":"looks good"}} + ; + const result = try buildTaskStateUpdates(arena.allocator(), node, output); + try std.testing.expect(std.mem.indexOf(u8, result, "\"review_result\":{\"grade\":\"approve\"") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "\"grade\":\"approve\"") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "\"feedback\":\"looks good\"") != null); +} - // Complete participants - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_debate"); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"review 1\"}", null, 1); - try store.updateStepStatus(children[1].id, "completed", null, "{\"output\":\"review 2\"}", null, 1); - } +test "getSendItemsPath prefers canonical items_key" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 2: no judge_template, should complete with responses - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const node = + \\{"type":"send","items_key":"state.files","items_from":"state.legacy"} + ; + const result = getSendItemsPath(arena.allocator(), node); + try std.testing.expect(result != null); + try std.testing.expectEqualStrings("state.files", result.?); +} - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?; - try std.testing.expectEqualStrings("completed", debate_step.status); - try std.testing.expect(debate_step.output_json != null); - } +test "getSendItemsPath accepts legacy items_from alias" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const node = + \\{"type":"send","items_from":"state.files"} + ; + const result = getSendItemsPath(arena.allocator(), node); + try std.testing.expect(result != null); + try std.testing.expectEqualStrings("state.files", result.?); } -test "Engine: debate step fails when participant fails" { +test "extractGotoTargets: string target" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - const wf = - \\{"steps":[{"id":"review","type":"debate","count":2,"prompt_template":"Review this","judge_template":"Pick: {{debate_responses}}"}]} + const output = + \\{"state_updates":{"x":1},"goto":"merge_step"} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_debate", "r1", "review", "debate", "ready", "{}", 1, null, null, null); + const targets = extractGotoTargets(arena.allocator(), output); + try std.testing.expect(targets != null); + try std.testing.expectEqual(@as(usize, 1), targets.?.len); + try std.testing.expectEqualStrings("merge_step", targets.?[0]); +} - var engine = Engine.init(&store, allocator, 500); +test "extractGotoTargets: array targets" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 1: creates participant children - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const output = + \\{"goto":["step_a","step_b"]} + ; + const targets = extractGotoTargets(arena.allocator(), output); + try std.testing.expect(targets != null); + try std.testing.expectEqual(@as(usize, 2), targets.?.len); + try std.testing.expectEqualStrings("step_a", targets.?[0]); + try std.testing.expectEqualStrings("step_b", targets.?[1]); +} - // Fail one participant - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_debate"); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"review A\"}", null, 1); - try store.updateStepStatus(children[1].id, "failed", null, null, "worker error", 1); - } +test "extractGotoTargets: no goto field" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 2: debate should fail - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const targets = extractGotoTargets(arena.allocator(), "{\"state_updates\":{}}"); + try std.testing.expect(targets == null); +} - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const debate_step = (try store.getStep(arena.allocator(), "step_debate")).?; - try std.testing.expectEqualStrings("failed", debate_step.status); - } +test "extractGotoTargets: not JSON" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const targets = extractGotoTargets(arena.allocator(), "plain text"); + try std.testing.expect(targets == null); } -// ── Group chat step tests ──────────────────────────────────────────── +test "parseBreakpointList: valid list" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const wf = + \\{"interrupt_before":["review","merge"],"interrupt_after":["generate"],"nodes":{},"edges":[]} + ; + const before = parseBreakpointList(arena.allocator(), wf, "interrupt_before"); + try std.testing.expectEqual(@as(usize, 2), before.len); + try std.testing.expectEqualStrings("review", before[0]); + try std.testing.expectEqualStrings("merge", before[1]); + + const after = parseBreakpointList(arena.allocator(), wf, "interrupt_after"); + try std.testing.expectEqual(@as(usize, 1), after.len); + try std.testing.expectEqualStrings("generate", after[0]); +} -test "Engine: group_chat step parses participants and starts" { +test "parseBreakpointList: missing field" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); const wf = - \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["architect"],"role":"Architect"},{"tags":["security"],"role":"Security"}],"max_rounds":3,"exit_condition":"CONSENSUS","prompt_template":"Discuss: topic","round_template":"Previous:\n{{chat_history}}\nYour role: {{role}}. Respond."}]} + \\{"nodes":{},"edges":[]} + ; + const result = parseBreakpointList(arena.allocator(), wf, "interrupt_before"); + try std.testing.expectEqual(@as(usize, 0), result.len); +} + +test "isInBreakpointList" { + const list = [_][]const u8{ "review", "merge" }; + try std.testing.expect(isInBreakpointList("review", &list)); + try std.testing.expect(isInBreakpointList("merge", &list)); + try std.testing.expect(!isInBreakpointList("build", &list)); +} + +test "getNodeFieldInt: valid integer" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const node = + \\{"type":"agent","max_turns":10} + ; + const result = getNodeFieldInt(arena.allocator(), node, "max_turns"); + try std.testing.expect(result != null); + try std.testing.expectEqual(@as(i64, 10), result.?); +} + +test "getNodeFieldInt: missing field" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const node = + \\{"type":"task"} + ; + const result = getNodeFieldInt(arena.allocator(), node, "max_turns"); + try std.testing.expect(result == null); +} + +test "getNodeFieldInt: string field returns null" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const node = + \\{"type":"task","max_turns":"five"} + ; + const result = getNodeFieldInt(arena.allocator(), node, "max_turns"); + try std.testing.expect(result == null); +} + +test "buildSubgraphInput: maps values from parent state" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const parent_state = + \\{"fix_result":"patched code","count":42} + ; + const mapping = + \\{"code":"state.fix_result"} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_gc", "r1", "discuss", "group_chat", "ready", "{}", 1, null, null, null); - var engine = Engine.init(&store, allocator, 500); + const result = try buildSubgraphInput(alloc, parent_state, mapping); + const parsed = try json.parseFromSlice(json.Value, alloc, result, .{}); + try std.testing.expect(parsed.value == .object); + const code = parsed.value.object.get("code") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("patched code", code.string); +} +test "buildSubgraphInput: empty mapping" { + const allocator = std.testing.allocator; var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - - // group_chat step should be "running" - const gc_step = (try store.getStep(arena.allocator(), "step_gc")).?; - try std.testing.expectEqualStrings("running", gc_step.status); + const result = try buildSubgraphInput(arena.allocator(), "{\"x\":1}", "{}"); + try std.testing.expectEqualStrings("{}", result); } -test "Engine: group_chat step fails without participants" { +test "engine: breakpoint interrupt_before stops run" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); + // Workflow with interrupt_before on t1 const wf = - \\{"steps":[{"id":"discuss","type":"group_chat","prompt_template":"Discuss"}]} + \\{"interrupt_before":["t1"],"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"done\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_gc", "r1", "discuss", "group_chat", "ready", "{}", 1, null, null, null); + + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); @@ -4631,20 +3365,23 @@ test "Engine: group_chat step fails without participants" { const run_row = (try store.getRun(arena.allocator(), "r1")).?; try engine.processRun(arena.allocator(), run_row); - const step = (try store.getStep(arena.allocator(), "step_gc")).?; - try std.testing.expectEqualStrings("failed", step.status); + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + // Should be interrupted, not completed, because interrupt_before fires before t1 + try std.testing.expectEqualStrings("interrupted", updated_run.status); } -test "Engine: group_chat step fails without prompt_template" { +test "engine: breakpoint interrupt_after stops run after node" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); + // Workflow with interrupt_after on t1; there's a t2 after t1 const wf = - \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["a"],"role":"A"}]}]} + \\{"interrupt_after":["t1"],"nodes":{"t1":{"type":"transform","updates":"{\"x\":\"done\"}"},"t2":{"type":"transform","updates":"{\"y\":\"also\"}"}},"edges":[["__start__","t1"],["t1","t2"],["t2","__end__"]],"schema":{"x":{"type":"string","reducer":"last_value"},"y":{"type":"string","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_gc", "r1", "discuss", "group_chat", "ready", "{}", 1, null, null, null); + + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); @@ -4654,130 +3391,105 @@ test "Engine: group_chat step fails without prompt_template" { const run_row = (try store.getRun(arena.allocator(), "r1")).?; try engine.processRun(arena.allocator(), run_row); - const step = (try store.getStep(arena.allocator(), "step_gc")).?; - try std.testing.expectEqualStrings("failed", step.status); + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + // t1 should have executed (state contains x), but run is interrupted + try std.testing.expectEqualStrings("interrupted", updated_run.status); + // Verify t1's state was saved + if (updated_run.state_json) |sj| { + try std.testing.expect(std.mem.indexOf(u8, sj, "done") != null); + } } -test "Engine: group_chat builds chat history across rounds" { +test "engine: configurable runs inject __config" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); - // Manually insert chat messages and test the poll logic + // Workflow with a transform that sets result const wf = - \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["a"],"role":"Architect"},{"tags":["b"],"role":"Security"}],"max_rounds":2,"exit_condition":"CONSENSUS","prompt_template":"Discuss topic","round_template":"Previous:\n{{chat_history}}\nYour role: {{role}}. Respond."}]} + \\{"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"ok\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"},"__config":{"type":"object","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_gc", "r1", "discuss", "group_chat", "running", "{}", 1, null, null, null); - // Insert round 1 messages (simulating what dispatch would produce) - try store.insertChatMessage("r1", "step_gc", 1, "Architect", null, "I suggest microservices"); - try store.insertChatMessage("r1", "step_gc", 1, "Security", null, "We need auth first"); + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.setConfigJson("r1", "{\"model\":\"gpt-4\"}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); - // Poll: round 1 complete, no CONSENSUS, max_rounds=2, so it should try round 2 - // Since no workers, dispatch will fail silently. Then next poll round_count stays at 2 for round 1. - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Step should still be running (no workers to dispatch round 2) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const step = (try store.getStep(arena.allocator(), "step_gc")).?; - try std.testing.expectEqualStrings("running", step.status); - } - - // Simulate round 2 messages with CONSENSUS - try store.insertChatMessage("r1", "step_gc", 2, "Architect", null, "CONSENSUS reached"); - try store.insertChatMessage("r1", "step_gc", 2, "Security", null, "Agreed, CONSENSUS"); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Poll: round 2 complete with CONSENSUS, should complete - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const run_row = (try store.getRun(arena.allocator(), "r1")).?; + try engine.processRun(arena.allocator(), run_row); - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const step = (try store.getStep(arena.allocator(), "step_gc")).?; - try std.testing.expectEqualStrings("completed", step.status); - try std.testing.expect(step.output_json != null); + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + try std.testing.expectEqualStrings("completed", updated_run.status); + // Verify __config was injected into state + if (updated_run.state_json) |sj| { + try std.testing.expect(std.mem.indexOf(u8, sj, "__config") != null); + try std.testing.expect(std.mem.indexOf(u8, sj, "gpt-4") != null); } } -test "Engine: group_chat completes at max_rounds" { +test "engine: transform store_updates uses trusted tracker settings" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); + test_store_write_base_url = ""; + test_store_write_api_token = null; + test_store_write_namespace = ""; + test_store_write_key = ""; + test_store_write_value_json = ""; + const wf = - \\{"steps":[{"id":"discuss","type":"group_chat","participants":[{"tags":["a"],"role":"A"},{"tags":["b"],"role":"B"}],"max_rounds":1,"exit_condition":"NEVER_MATCH","prompt_template":"Discuss","round_template":"{{chat_history}} {{role}}"}]} + \\{"nodes":{"save":{"type":"transform","updates":"{\"review_result\":{\"grade\":\"approved\"}}","store_updates":{"namespace":"project_context","key":"latest_review","value":"state.review_result"}}},"edges":[["__start__","save"],["save","__end__"]],"schema":{"review_result":{"type":"object","reducer":"last_value"},"__config":{"type":"object","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_gc", "r1", "discuss", "group_chat", "running", "{}", 1, null, null, null); - // Insert round 1 messages (no exit condition match) - try store.insertChatMessage("r1", "step_gc", 1, "A", null, "hello"); - try store.insertChatMessage("r1", "step_gc", 1, "B", null, "world"); + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); + engine.store_writer = mockStoreWriter; + engine.setTrustedTrackerAccess("http://tickets.test", "secret-token"); - // Poll: round 1 complete, no exit match, max_rounds=1, should complete - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const step = (try store.getStep(arena.allocator(), "step_gc")).?; - try std.testing.expectEqualStrings("completed", step.status); - } -} - -test "buildChatTranscript formats messages" { - const allocator = std.testing.allocator; var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); - const messages = [_]types.ChatMessageRow{ - .{ .id = 1, .run_id = "r1", .step_id = "s1", .round = 1, .role = "Architect", .worker_id = null, .message = "hello", .ts_ms = 1000 }, - .{ .id = 2, .run_id = "r1", .step_id = "s1", .round = 1, .role = "Security", .worker_id = null, .message = "world", .ts_ms = 1001 }, - }; + const run_row = (try store.getRun(arena.allocator(), "r1")).?; + try engine.processRun(arena.allocator(), run_row); - const transcript = try buildChatTranscript(arena.allocator(), &messages); - try std.testing.expect(std.mem.indexOf(u8, transcript, "Architect") != null); - try std.testing.expect(std.mem.indexOf(u8, transcript, "Security") != null); - try std.testing.expect(std.mem.indexOf(u8, transcript, "hello") != null); - try std.testing.expect(std.mem.indexOf(u8, transcript, "world") != null); + const updated_run = (try store.getRun(arena.allocator(), "r1")).?; + try std.testing.expectEqualStrings("completed", updated_run.status); + try std.testing.expectEqualStrings("http://tickets.test", test_store_write_base_url); + try std.testing.expect(test_store_write_api_token != null); + try std.testing.expectEqualStrings("secret-token", test_store_write_api_token.?); + try std.testing.expectEqualStrings("project_context", test_store_write_namespace); + try std.testing.expectEqualStrings("latest_review", test_store_write_key); + try std.testing.expectEqualStrings("{\"grade\":\"approved\"}", test_store_write_value_json); } -// ── Saga step tests ────────────────────────────────────────────────── - -test "Engine: saga step creates first body child and initializes state" { +test "engine: workflow cannot override trusted tracker settings" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); + test_store_write_base_url = ""; + test_store_write_api_token = null; + test_store_write_namespace = ""; + test_store_write_key = ""; + test_store_write_value_json = ""; + const wf = - \\{"steps":[{"id":"deploy_saga","type":"saga","body":["provision","deploy","verify"],"compensations":{"provision":"deprovision","deploy":"rollback_deploy"}},{"id":"provision","type":"task","prompt_template":"provision"},{"id":"deploy","type":"task","prompt_template":"deploy"},{"id":"verify","type":"task","prompt_template":"verify"},{"id":"deprovision","type":"task","prompt_template":"deprovision"},{"id":"rollback_deploy","type":"task","prompt_template":"rollback"}]} + \\{"tracker_url":"http://evil.test","tracker_api_token":"evil-token","nodes":{"save":{"type":"transform","updates":"{\"review_result\":{\"grade\":\"approved\"}}","store_updates":{"namespace":"project_context","key":"latest_review","value":"state.review_result"}}},"edges":[["__start__","save"],["save","__end__"]],"schema":{"review_result":{"type":"object","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_saga", "r1", "deploy_saga", "saga", "ready", "{}", 1, null, null, null); + + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); + engine.store_writer = mockStoreWriter; + engine.setTrustedTrackerAccess("http://tickets.test", "secret-token"); var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); @@ -4785,285 +3497,114 @@ test "Engine: saga step creates first body child and initializes state" { const run_row = (try store.getRun(arena.allocator(), "r1")).?; try engine.processRun(arena.allocator(), run_row); - // Saga step should be "running" - const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?; - try std.testing.expectEqualStrings("running", saga_step.status); - - // Should have created 1 child step (first body step) - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - try std.testing.expectEqual(@as(usize, 1), children.len); - try std.testing.expectEqualStrings("provision", children[0].def_step_id); - try std.testing.expectEqualStrings("ready", children[0].status); - - // Should have saga_state entries - const saga_states = try store.getSagaStates(arena.allocator(), "r1", "step_saga"); - try std.testing.expectEqual(@as(usize, 3), saga_states.len); - try std.testing.expectEqualStrings("pending", saga_states[0].status); - try std.testing.expectEqualStrings("pending", saga_states[1].status); - try std.testing.expectEqualStrings("pending", saga_states[2].status); + try std.testing.expectEqualStrings("http://tickets.test", test_store_write_base_url); + try std.testing.expect(test_store_write_api_token != null); + try std.testing.expectEqualStrings("secret-token", test_store_write_api_token.?); } -test "Engine: saga step executes body sequentially and completes" { - const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"saga1","type":"saga","body":["s1","s2"],"compensations":{"s1":"c1"}},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"s2","type":"task","prompt_template":"step2"},{"id":"c1","type":"task","prompt_template":"comp1"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - // Tick 1: creates first body child (s1) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Complete first body child (s1) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - try std.testing.expectEqual(@as(usize, 1), children.len); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"provisioned\"}", null, 1); - } - - // Tick 2: detects s1 completed, creates s2 - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } +test "encodePathSegment percent-encodes reserved characters" { + const encoded = try encodePathSegment(std.testing.allocator, "task/alpha beta"); + defer std.testing.allocator.free(encoded); - // Should now have 2 children - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - try std.testing.expectEqual(@as(usize, 2), children.len); - } + try std.testing.expectEqualStrings("task%2Falpha%20beta", encoded); +} - // Complete second body child (s2) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - for (children) |child| { - if (std.mem.eql(u8, child.def_step_id, "s2")) { - try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"deployed\"}", null, 1); - } - } - } +test "getWorkflowVersion: extracts version" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 3: detects s2 completed, all body steps done, saga completes - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + try std.testing.expectEqual(@as(i64, 2), getWorkflowVersion(arena.allocator(), "{\"version\":2,\"nodes\":{}}")); + try std.testing.expectEqual(@as(i64, 1), getWorkflowVersion(arena.allocator(), "{\"nodes\":{}}")); + try std.testing.expectEqual(@as(i64, 1), getWorkflowVersion(arena.allocator(), "invalid")); +} - // Tick 4: saga polls — should now detect all completed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } +test "getCheckpointWorkflowVersion: extracts from metadata" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Saga should be completed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?; - try std.testing.expectEqualStrings("completed", saga_step.status); - try std.testing.expect(saga_step.output_json != null); - } + try std.testing.expectEqual(@as(i64, 3), getCheckpointWorkflowVersion(arena.allocator(), "{\"workflow_version\":3}")); + try std.testing.expectEqual(@as(i64, 1), getCheckpointWorkflowVersion(arena.allocator(), "{\"route_results\":{}}")); + try std.testing.expectEqual(@as(i64, 1), getCheckpointWorkflowVersion(arena.allocator(), null)); } -test "Engine: saga step runs compensation in reverse on failure" { +test "migrateCompletedNodes: filters removed nodes" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const alloc = arena.allocator(); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("analyze", {}); + try completed.put("old_node", {}); + try completed.put("__start__", {}); const wf = - \\{"steps":[{"id":"saga1","type":"saga","body":["s1","s2"],"compensations":{"s1":"c1","s2":"c2"}},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"s2","type":"task","prompt_template":"step2"},{"id":"c1","type":"task","prompt_template":"comp1"},{"id":"c2","type":"task","prompt_template":"comp2"}]} + \\{"nodes":{"analyze":{"type":"task"},"new_node":{"type":"task"}},"edges":[]} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - // Tick 1: creates first body child (s1) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Complete first body child (s1) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - try store.updateStepStatus(children[0].id, "completed", null, "{\"output\":\"provisioned\"}", null, 1); - } - - // Tick 2: creates s2 - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Fail second body child (s2) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - for (children) |child| { - if (std.mem.eql(u8, child.def_step_id, "s2")) { - try store.updateStepStatus(child.id, "failed", null, null, "deploy failed", 1); - } - } - } - - // Tick 3: detects s2 failed, starts compensation (s1 was completed, so compensate s1) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Tick 4: compensation child creation may happen here - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Should have created compensation child for s1 - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - var found_comp = false; - for (children) |child| { - if (std.mem.eql(u8, child.def_step_id, "c1")) { - found_comp = true; - } - } - try std.testing.expect(found_comp); - } + const migrated = migrateCompletedNodes(alloc, &completed, wf); + try std.testing.expect(migrated); + try std.testing.expect(completed.get("analyze") != null); + try std.testing.expect(completed.get("__start__") != null); + try std.testing.expect(completed.get("old_node") == null); +} - // Complete the compensation child - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - for (children) |child| { - if (std.mem.eql(u8, child.def_step_id, "c1")) { - try store.updateStepStatus(child.id, "completed", null, "{\"output\":\"deprovisioned\"}", null, 1); - } - } - } +test "migrateCompletedNodes: no changes needed" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 5: compensation done - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const alloc = arena.allocator(); + var completed = std.StringHashMap(void).init(alloc); + try completed.put("analyze", {}); - // Tick 6: saga should finalize as failed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const wf = + \\{"nodes":{"analyze":{"type":"task"}},"edges":[]} + ; - // Saga should be failed with compensation output - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?; - try std.testing.expectEqualStrings("failed", saga_step.status); - try std.testing.expect(saga_step.output_json != null); - // Output should contain failed_at and compensated - try std.testing.expect(std.mem.indexOf(u8, saga_step.output_json.?, "failed_at") != null); - try std.testing.expect(std.mem.indexOf(u8, saga_step.output_json.?, "compensated") != null); - } + const migrated = migrateCompletedNodes(alloc, &completed, wf); + try std.testing.expect(!migrated); } -test "Engine: saga step fails immediately with no completed steps to compensate" { +test "serializeRouteResultsWithVersion: includes version" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"saga1","type":"saga","body":["s1"],"compensations":{"s1":"c1"}},{"id":"s1","type":"task","prompt_template":"step1"},{"id":"c1","type":"task","prompt_template":"comp1"}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - var engine = Engine.init(&store, allocator, 500); + const alloc = arena.allocator(); + var route_results = std.StringHashMap([]const u8).init(alloc); - // Tick 1: creates first body child (s1) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const result = try serializeRouteResultsWithVersion(alloc, &route_results, 5); + try std.testing.expect(result != null); + try std.testing.expect(std.mem.indexOf(u8, result.?, "workflow_version") != null); + try std.testing.expect(std.mem.indexOf(u8, result.?, "5") != null); +} - // Fail the first body child - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const children = try store.getChildSteps(arena.allocator(), "step_saga"); - try store.updateStepStatus(children[0].id, "failed", null, null, "provision failed", 1); - } +test "serializeRouteResultsWithVersion: null version, empty routes" { + const allocator = std.testing.allocator; + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Tick 2: detects s1 failed, no completed steps, saga fails immediately - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + const alloc = arena.allocator(); + var route_results = std.StringHashMap([]const u8).init(alloc); - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?; - try std.testing.expectEqualStrings("failed", saga_step.status); - try std.testing.expect(saga_step.output_json != null); - try std.testing.expect(std.mem.indexOf(u8, saga_step.output_json.?, "compensated\":[]") != null); - } + const result = try serializeRouteResultsWithVersion(alloc, &route_results, null); + try std.testing.expect(result == null); } -test "Engine: saga step fails without body" { +test "engine: workflow version stored in checkpoint metadata" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); const wf = - \\{"steps":[{"id":"saga1","type":"saga"}]} + \\{"version":2,"nodes":{"t1":{"type":"transform","updates":"{\"result\":\"done\"}"}},"edges":[["__start__","t1"],["t1","__end__"]],"schema":{"result":{"type":"string","reducer":"last_value"}}} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_saga", "r1", "saga1", "saga", "ready", "{}", 1, null, null, null); + + try store.createRunWithState("r1", null, wf, "{}", "{}"); + try store.updateRunStatus("r1", "running", null); var engine = Engine.init(&store, allocator, 500); @@ -5073,213 +3614,175 @@ test "Engine: saga step fails without body" { const run_row = (try store.getRun(arena.allocator(), "r1")).?; try engine.processRun(arena.allocator(), run_row); - const saga_step = (try store.getStep(arena.allocator(), "step_saga")).?; - try std.testing.expectEqualStrings("failed", saga_step.status); + // Check that checkpoint has workflow_version in metadata + const latest_cp = (try store.getLatestCheckpoint(arena.allocator(), "r1")).?; + try std.testing.expect(latest_cp.metadata_json != null); + try std.testing.expect(std.mem.indexOf(u8, latest_cp.metadata_json.?, "workflow_version") != null); + try std.testing.expect(std.mem.indexOf(u8, latest_cp.metadata_json.?, "2") != null); } -// ── Graph cycle tests ──────────────────────────────────────────────── +test "OrchestratorEvent: eventKindString returns correct strings" { + try std.testing.expectEqualStrings("run.started", OrchestratorEvent.eventKindString(.run_started)); + try std.testing.expectEqualStrings("run.completed", OrchestratorEvent.eventKindString(.run_completed)); + try std.testing.expectEqualStrings("run.failed", OrchestratorEvent.eventKindString(.run_failed)); + try std.testing.expectEqualStrings("run.interrupted", OrchestratorEvent.eventKindString(.run_interrupted)); + try std.testing.expectEqualStrings("run.cancelled", OrchestratorEvent.eventKindString(.run_cancelled)); + try std.testing.expectEqualStrings("step.started", OrchestratorEvent.eventKindString(.step_started)); + try std.testing.expectEqualStrings("step.completed", OrchestratorEvent.eventKindString(.step_completed)); + try std.testing.expectEqualStrings("step.failed", OrchestratorEvent.eventKindString(.step_failed)); + try std.testing.expectEqualStrings("step.retrying", OrchestratorEvent.eventKindString(.step_retrying)); + try std.testing.expectEqualStrings("checkpoint.created", OrchestratorEvent.eventKindString(.checkpoint_created)); + try std.testing.expectEqualStrings("state.injected", OrchestratorEvent.eventKindString(.state_injected)); +} -test "Engine: condition routes back to earlier step creates new instances" { +test "OrchestratorEvent: toJson serializes correctly" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - // Workflow: compute -> check -> (if true_target=compute, false_target=done) - const wf = - \\{"steps":[{"id":"compute","type":"task","prompt_template":"compute","depends_on":[]},{"id":"check","type":"condition","expression":"retry","true_target":"compute","false_target":"done","depends_on":["compute"]},{"id":"done","type":"task","prompt_template":"done","depends_on":["check"]}]} - ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); - // Step "compute" completed - try store.insertStep("step_compute", "r1", "compute", "task", "completed", "{}", 1, null, null, null); - try store.updateStepStatus("step_compute", "completed", null, "{\"output\":\"retry this\"}", null, 1); + const ev = OrchestratorEvent{ + .event_type = .run_started, + .run_id = "run-123", + .step_id = null, + .node_name = "analyze", + .timestamp_ms = 1700000000000, + .metadata_json = null, + }; - // Step "check" is ready, depends on compute - try store.insertStep("step_check", "r1", "check", "condition", "ready", "{}", 1, null, null, null); - try store.insertStepDep("step_check", "step_compute"); + const json_str = ev.toJson(arena.allocator()); + try std.testing.expect(json_str != null); + try std.testing.expect(std.mem.indexOf(u8, json_str.?, "run.started") != null); + try std.testing.expect(std.mem.indexOf(u8, json_str.?, "run-123") != null); + try std.testing.expect(std.mem.indexOf(u8, json_str.?, "analyze") != null); +} - // Step "done" is pending - try store.insertStep("step_done", "r1", "done", "task", "pending", "{}", 1, null, null, null); - try store.insertStepDep("step_done", "step_check"); +test "engine: validateConfig returns false with no workers" { + const allocator = std.testing.allocator; + var store = try Store.init(allocator, ":memory:"); + defer store.deinit(); var engine = Engine.init(&store, allocator, 500); - - // Tick 1: condition evaluates to true, target "compute" is already completed - // Should detect cycle and create new step instances - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } - - // Verify: condition step should be completed with cycle_back output - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const check_step = (try store.getStep(arena.allocator(), "step_check")).?; - try std.testing.expectEqualStrings("completed", check_step.status); - try std.testing.expect(check_step.output_json != null); - try std.testing.expect(std.mem.indexOf(u8, check_step.output_json.?, "cycle_back") != null); - } - - // Verify: new step instances were created (total steps > 3) - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const all_steps = try store.getStepsByRun(arena.allocator(), "r1"); - // Original: compute, check, done = 3 - // New: compute(iter1), check(iter1) = 2 more - try std.testing.expect(all_steps.len > 3); - - // Find new compute instance with iteration_index > 0 - var found_new_compute = false; - for (all_steps) |s| { - if (std.mem.eql(u8, s.def_step_id, "compute") and s.iteration_index > 0) { - found_new_compute = true; - try std.testing.expectEqualStrings("ready", s.status); - } - } - try std.testing.expect(found_new_compute); - } - - // Verify cycle_state was updated - { - const cycle_state = try store.getCycleState("r1", "cycle_check"); - try std.testing.expect(cycle_state != null); - try std.testing.expectEqual(@as(i64, 1), cycle_state.?.iteration_count); - } + try std.testing.expect(!engine.validateConfig()); } -test "Engine: graph cycle respects max_cycle_iterations" { +test "engine: validateConfig returns true with registered workers" { const allocator = std.testing.allocator; var store = try Store.init(allocator, ":memory:"); defer store.deinit(); - // Workflow with max_cycle_iterations=1 + try store.insertWorker("w1", "http://localhost:9000", "", "webhook", null, "[]", 5, "config"); + var engine = Engine.init(&store, allocator, 500); + try std.testing.expect(engine.validateConfig()); +} + +test "generateMermaid: simple chain" { + const allocator = std.testing.allocator; const wf = - \\{"steps":[{"id":"compute","type":"task","prompt_template":"compute"},{"id":"check","type":"condition","expression":"retry","true_target":"compute","false_target":"done","max_cycle_iterations":1,"depends_on":["compute"]},{"id":"done","type":"task","prompt_template":"done","depends_on":["check"]}]} + \\{"nodes":{"analyze":{"type":"task"},"review":{"type":"task"}},"edges":[["__start__","analyze"],["analyze","review"],["review","__end__"]]} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); + const result = try generateMermaid(allocator, wf); + defer allocator.free(result); - // Pre-set cycle state to max - try store.upsertCycleState("r1", "cycle_check", 1, 1); - - // compute completed - try store.insertStep("step_compute", "r1", "compute", "task", "completed", "{}", 1, null, null, null); - try store.updateStepStatus("step_compute", "completed", null, "{\"output\":\"retry\"}", null, 1); - - // check is ready - try store.insertStep("step_check", "r1", "check", "condition", "ready", "{}", 1, null, null, null); - try store.insertStepDep("step_check", "step_compute"); - - // done is pending - try store.insertStep("step_done", "r1", "done", "task", "pending", "{}", 1, null, null, null); - try store.insertStepDep("step_done", "step_check"); - - var engine = Engine.init(&store, allocator, 500); - - // Tick: condition should fail because cycle limit exceeded - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); - } + try std.testing.expect(std.mem.indexOf(u8, result, "graph TD") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "__start__((Start))") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "__end__((End))") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "analyze[analyze") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "__start__ --> analyze") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "review --> __end__") != null); +} - // Check step should be failed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const check_step = (try store.getStep(arena.allocator(), "step_check")).?; - try std.testing.expectEqualStrings("failed", check_step.status); - try std.testing.expect(check_step.error_text != null); - try std.testing.expect(std.mem.indexOf(u8, check_step.error_text.?, "exceeded") != null); - } +test "generateMermaid: route node with conditional edges" { + const allocator = std.testing.allocator; + const wf = + \\{"nodes":{"decide":{"type":"route"},"approve":{"type":"task"},"reject":{"type":"task"}},"edges":[["__start__","decide"],["decide:yes","approve"],["decide:no","reject"],["approve","__end__"],["reject","__end__"]]} + ; + const result = try generateMermaid(allocator, wf); + defer allocator.free(result); - // Run should be failed - { - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const run = (try store.getRun(arena.allocator(), "r1")).?; - try std.testing.expectEqualStrings("failed", run.status); - } + try std.testing.expect(std.mem.indexOf(u8, result, "decide{decide") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "decide -->|yes| approve") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "decide -->|no| reject") != null); } -// ── Worker handoff tests ───────────────────────────────────────────── - -test "extractHandoffTarget parses handoff_to from output" { +test "generateMermaid: node type shapes" { const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const output = - \\{"output":"cannot handle","handoff_to":{"tags":["security_expert"],"message":"needs security review"}} + const wf = + \\{"nodes":{"t":{"type":"transform"},"i":{"type":"interrupt"},"s":{"type":"send"},"sg":{"type":"subgraph"}},"edges":[["__start__","t"],["t","__end__"]]} ; - const target = extractHandoffTarget(arena.allocator(), output); - try std.testing.expect(target != null); - try std.testing.expectEqual(@as(usize, 1), target.?.tags.len); - try std.testing.expectEqualStrings("security_expert", target.?.tags[0]); - try std.testing.expect(target.?.message != null); - try std.testing.expectEqualStrings("needs security review", target.?.message.?); + const result = try generateMermaid(allocator, wf); + defer allocator.free(result); + + // transform uses rounded parens + try std.testing.expect(std.mem.indexOf(u8, result, "t(t\\ntransform)") != null); + // interrupt uses parallelogram + try std.testing.expect(std.mem.indexOf(u8, result, "i[/i\\ninterrupt/]") != null); + // send uses double brackets + try std.testing.expect(std.mem.indexOf(u8, result, "s[[s\\nsend]]") != null); + // subgraph uses rectangle + try std.testing.expect(std.mem.indexOf(u8, result, "sg[sg\\nsubgraph]") != null); } -test "extractHandoffTarget returns null for normal output" { +test "processUiMessages: broadcasts events" { const allocator = std.testing.allocator; var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); + const alloc = arena.allocator(); - const output = - \\{"output":"all good, no handoff needed"} + var hub = sse_mod.SseHub.init(alloc); + defer hub.deinit(); + + const queue = hub.getOrCreateQueue("run1"); + + const response = + \\{"response":"ok","ui_messages":[{"id":"p1","name":"ProgressBar","props":{"progress":75}},{"id":"old","remove":true}]} ; - const target = extractHandoffTarget(arena.allocator(), output); - try std.testing.expect(target == null); + processUiMessages(&hub, alloc, "run1", "step1", response); + + const snapshot = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, snapshot); + try std.testing.expectEqual(@as(usize, 2), snapshot.events.len); + try std.testing.expectEqualStrings("ui_message", snapshot.events[0].event_type); + try std.testing.expectEqualStrings("ui_message_delete", snapshot.events[1].event_type); + // First event should contain step_id + try std.testing.expect(std.mem.indexOf(u8, snapshot.events[0].data, "step1") != null); } -test "extractHandoffTarget returns null for non-JSON output" { +test "processStreamMessages: broadcasts message events" { const allocator = std.testing.allocator; var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); + const alloc = arena.allocator(); - const target = extractHandoffTarget(arena.allocator(), "plain text output"); - try std.testing.expect(target == null); -} + var hub = sse_mod.SseHub.init(alloc); + defer hub.deinit(); -test "extractHandoffTarget handles handoff without message" { - const allocator = std.testing.allocator; - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); + const queue = hub.getOrCreateQueue("run1"); - const output = - \\{"output":"redirect","handoff_to":{"tags":["expert"]}} + const response = + \\{"response":"done","stream_messages":[{"role":"assistant","content":"Starting..."},{"role":"tool","content":"Found 3 issues","tool":"lint"}]} ; - const target = extractHandoffTarget(arena.allocator(), output); - try std.testing.expect(target != null); - try std.testing.expectEqual(@as(usize, 1), target.?.tags.len); - try std.testing.expectEqualStrings("expert", target.?.tags[0]); - try std.testing.expect(target.?.message == null); + processStreamMessages(&hub, alloc, "run1", "step1", "task", response); + + const snapshot = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, snapshot); + try std.testing.expectEqual(@as(usize, 2), snapshot.events.len); + try std.testing.expectEqualStrings("message", snapshot.events[0].event_type); + try std.testing.expectEqualStrings("message", snapshot.events[1].event_type); + // Should contain step context + try std.testing.expect(std.mem.indexOf(u8, snapshot.events[0].data, "step1") != null); + try std.testing.expect(std.mem.indexOf(u8, snapshot.events[0].data, "task") != null); + try std.testing.expect(std.mem.indexOf(u8, snapshot.events[1].data, "tool") != null); } -test "Engine: task step stays ready when no workers available (handoff path)" { +test "applyUiMessagesToState: creates __ui_messages" { const allocator = std.testing.allocator; - var store = try Store.init(allocator, ":memory:"); - defer store.deinit(); - - const wf = - \\{"steps":[{"id":"t1","type":"task","prompt_template":"do work"}]} + const state = "{}"; + const response = + \\{"response":"ok","ui_messages":[{"id":"p1","name":"ProgressBar"}]} ; - try store.insertRun("r1", null, "running", wf, "{}", "[]"); - try store.insertStep("step_t1", "r1", "t1", "task", "ready", "{}", 1, null, null, null); - - var engine = Engine.init(&store, allocator, 500); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - const run_row = (try store.getRun(arena.allocator(), "r1")).?; - try engine.processRun(arena.allocator(), run_row); + const result = try applyUiMessagesToState(allocator, state, response); + defer allocator.free(result); - // No workers available, step should remain "ready" - const step = (try store.getStep(arena.allocator(), "step_t1")).?; - try std.testing.expectEqualStrings("ready", step.status); + try std.testing.expect(std.mem.indexOf(u8, result, "__ui_messages") != null); + try std.testing.expect(std.mem.indexOf(u8, result, "ProgressBar") != null); } diff --git a/src/main.zig b/src/main.zig index 437a610..45590f0 100644 --- a/src/main.zig +++ b/src/main.zig @@ -12,6 +12,7 @@ const redis_client = @import("redis_client.zig"); const mqtt_client = @import("mqtt_client.zig"); const tracker_mod = @import("tracker.zig"); const workflow_loader = @import("workflow_loader.zig"); +const sse_mod = @import("sse.zig"); const c = @cImport({ @cInclude("signal.h"); }); @@ -148,6 +149,9 @@ pub fn main() !void { var metrics = metrics_mod.Metrics{}; var drain_mode = std.atomic.Value(bool).init(false); + var sse_hub = sse_mod.SseHub.init(allocator); + defer sse_hub.deinit(); + var response_queue = async_dispatch.ResponseQueue.init(allocator); defer response_queue.deinit(); @@ -240,6 +244,14 @@ pub fn main() !void { // Start DAG engine on a background thread const poll_ms: u64 = cfg.engine.poll_interval_ms; + // Hot reload watcher for workflow definitions + var wf_watcher: ?workflow_loader.WorkflowWatcher = null; + if (cfg.tracker) |tracker_cfg| { + if (tracker_cfg.workflows_dir.len > 0) { + wf_watcher = workflow_loader.WorkflowWatcher.init(allocator, tracker_cfg.workflows_dir, &store); + } + } + var engine = engine_mod.Engine.init(&store, allocator, poll_ms); engine.configure(.{ .health_check_interval_ms = @as(i64, @intCast(cfg.engine.health_check_interval_ms)), @@ -250,7 +262,13 @@ pub fn main() !void { .retry_jitter_ms = @as(i64, @intCast(cfg.engine.retry_jitter_ms)), .retry_max_elapsed_ms = @as(i64, @intCast(cfg.engine.retry_max_elapsed_ms)), }, &metrics); + if (cfg.tracker) |tracker_cfg| { + engine.setTrustedTrackerAccess(tracker_cfg.url, tracker_cfg.api_token); + } engine.response_queue = &response_queue; + if (wf_watcher != null) { + engine.workflow_watcher = &wf_watcher.?; + } const engine_thread = try std.Thread.spawn(.{}, engine_mod.Engine.run, .{&engine}); // Spawn listener threads for async protocols @@ -337,6 +355,9 @@ pub fn main() !void { if (tracker_instance) |*ti| { ti.deinit(); } + if (wf_watcher) |*ww| { + ww.deinit(); + } } while (true) { @@ -387,6 +408,8 @@ pub fn main() !void { .strategies = &strategy_map, .tracker_state = if (tracker_instance) |*ti| &ti.state else null, .tracker_cfg = if (cfg.tracker) |*tc| tc else null, + .sse_hub = &sse_hub, + .rate_limits = &engine.rate_limits, }; const response = api.handleRequest(&ctx, request.method, request.target, request.body); @@ -638,4 +661,6 @@ comptime { _ = @import("subprocess.zig"); _ = @import("tracker_client.zig"); _ = @import("tracker.zig"); + _ = @import("state.zig"); + _ = @import("sse.zig"); } diff --git a/src/migrations/004_orchestration.sql b/src/migrations/004_orchestration.sql new file mode 100644 index 0000000..ce69d40 --- /dev/null +++ b/src/migrations/004_orchestration.sql @@ -0,0 +1,97 @@ +-- Note: step_deps table is kept for legacy POST /runs endpoint backward compatibility. +-- cycle_state, chat_messages, saga_state tables are legacy (unused by current engine). + +-- Saved workflow definitions +CREATE TABLE IF NOT EXISTS workflows ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + definition_json TEXT NOT NULL, + version INTEGER DEFAULT 1, + created_at_ms INTEGER NOT NULL, + updated_at_ms INTEGER NOT NULL +); + +-- State checkpoints (snapshots after each step) +CREATE TABLE IF NOT EXISTS checkpoints ( + id TEXT PRIMARY KEY, + run_id TEXT NOT NULL REFERENCES runs(id), + step_id TEXT NOT NULL, + parent_id TEXT REFERENCES checkpoints(id), + state_json TEXT NOT NULL, + completed_nodes_json TEXT NOT NULL, + version INTEGER NOT NULL, + metadata_json TEXT, + created_at_ms INTEGER NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_checkpoints_run ON checkpoints(run_id, version); +CREATE INDEX IF NOT EXISTS idx_checkpoints_parent ON checkpoints(parent_id); + +-- Agent intermediate events (from nullclaw callback) +CREATE TABLE IF NOT EXISTS agent_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL REFERENCES runs(id), + step_id TEXT NOT NULL, + iteration INTEGER NOT NULL, + tool TEXT, + args_json TEXT, + result_text TEXT, + status TEXT NOT NULL, + created_at_ms INTEGER NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_agent_events_run_step ON agent_events(run_id, step_id); + +-- Pending state injections (thread-safe queue for POST /runs/{id}/state) +CREATE TABLE IF NOT EXISTS pending_state_injections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL REFERENCES runs(id), + updates_json TEXT NOT NULL, + apply_after_step TEXT, + created_at_ms INTEGER NOT NULL +); + +-- Extend runs table +ALTER TABLE runs ADD COLUMN state_json TEXT; +ALTER TABLE runs ADD COLUMN workflow_id TEXT REFERENCES workflows(id); +ALTER TABLE runs ADD COLUMN forked_from_run_id TEXT REFERENCES runs(id); +ALTER TABLE runs ADD COLUMN forked_from_checkpoint_id TEXT REFERENCES checkpoints(id); +ALTER TABLE runs ADD COLUMN checkpoint_count INTEGER DEFAULT 0; + +-- Extend steps table +ALTER TABLE steps ADD COLUMN state_before_json TEXT; +ALTER TABLE steps ADD COLUMN state_after_json TEXT; +ALTER TABLE steps ADD COLUMN state_updates_json TEXT; +-- NOTE: parent_step_id already exists from 001_init.sql — do NOT add it again + +-- Subgraph support: parent run linkage and per-run config +ALTER TABLE runs ADD COLUMN parent_run_id TEXT REFERENCES runs(id); +ALTER TABLE runs ADD COLUMN config_json TEXT; + +-- Node-level cache (Gap 3) +CREATE TABLE IF NOT EXISTS node_cache ( + cache_key TEXT PRIMARY KEY, + node_name TEXT NOT NULL, + result_json TEXT NOT NULL, + created_at_ms INTEGER NOT NULL, + ttl_ms INTEGER +); + +-- Pending writes from parallel node execution (Gap 4) +CREATE TABLE IF NOT EXISTS pending_writes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL, + step_id TEXT NOT NULL, + channel TEXT NOT NULL, + value_json TEXT NOT NULL, + created_at_ms INTEGER NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_pending_writes_run ON pending_writes(run_id); + +-- Token accounting columns on runs +ALTER TABLE runs ADD COLUMN total_input_tokens INTEGER DEFAULT 0; +ALTER TABLE runs ADD COLUMN total_output_tokens INTEGER DEFAULT 0; +ALTER TABLE runs ADD COLUMN total_tokens INTEGER DEFAULT 0; + +-- Token accounting columns on steps +ALTER TABLE steps ADD COLUMN input_tokens INTEGER DEFAULT 0; +ALTER TABLE steps ADD COLUMN output_tokens INTEGER DEFAULT 0; +ALTER TABLE steps ADD COLUMN total_tokens INTEGER DEFAULT 0; diff --git a/src/sse.zig b/src/sse.zig new file mode 100644 index 0000000..0c64344 --- /dev/null +++ b/src/sse.zig @@ -0,0 +1,402 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const StreamMode = enum { + values, // Full state after each step + updates, // Only node name + updates + tasks, // Task start/finish with metadata + debug, // Everything with step number + timestamp + custom, // User-defined via node output + + pub fn toString(self: StreamMode) []const u8 { + return @tagName(self); + } + + pub fn fromString(s: []const u8) ?StreamMode { + inline for (@typeInfo(StreamMode).@"enum".fields) |f| { + if (std.mem.eql(u8, s, f.name)) return @enumFromInt(f.value); + } + return null; + } +}; + +pub const SseEvent = struct { + seq: u64 = 0, + event_type: []const u8, // "state_update", "step_started", etc. + data: []const u8, // JSON string + mode: StreamMode = .updates, // default mode +}; + +pub const EventSnapshot = struct { + events: []SseEvent, + latest_seq: u64, + oldest_seq: u64, + gap_detected: bool, +}; + +/// Per-run event queue. Thread-safe via mutex. +pub const RunEventQueue = struct { + events: std.ArrayListUnmanaged(SseEvent), + alloc: Allocator, + mutex: std.Thread.Mutex, + closed: std.atomic.Value(bool), + next_seq: u64, + + const max_retained_events: usize = 2048; + + fn freeEvent(self: *RunEventQueue, event: SseEvent) void { + self.alloc.free(event.event_type); + self.alloc.free(event.data); + } + + pub fn init(alloc: Allocator) RunEventQueue { + return .{ + .events = .empty, + .alloc = alloc, + .mutex = .{}, + .closed = std.atomic.Value(bool).init(false), + .next_seq = 1, + }; + } + + pub fn deinit(self: *RunEventQueue) void { + for (self.events.items) |event| { + self.freeEvent(event); + } + self.events.deinit(self.alloc); + } + + /// Push an event to the queue. Thread-safe. + pub fn push(self: *RunEventQueue, event: SseEvent) void { + self.mutex.lock(); + defer self.mutex.unlock(); + + const event_type = self.alloc.dupe(u8, event.event_type) catch return; + const data = self.alloc.dupe(u8, event.data) catch { + self.alloc.free(event_type); + return; + }; + + self.events.append(self.alloc, .{ + .seq = self.next_seq, + .event_type = event_type, + .data = data, + .mode = event.mode, + }) catch { + self.alloc.free(event_type); + self.alloc.free(data); + return; + }; + self.next_seq += 1; + + while (self.events.items.len > max_retained_events) { + const dropped = self.events.orderedRemove(0); + self.freeEvent(dropped); + } + } + + pub fn snapshotSince(self: *RunEventQueue, alloc: Allocator, after_seq: u64) EventSnapshot { + self.mutex.lock(); + defer self.mutex.unlock(); + + const latest_seq = self.next_seq -| 1; + const oldest_seq = if (self.events.items.len > 0) self.events.items[0].seq else latest_seq; + const gap_detected = after_seq > 0 and self.events.items.len > 0 and after_seq < self.events.items[0].seq and self.events.items[0].seq - after_seq > 1; + + var snapshot_events: std.ArrayListUnmanaged(SseEvent) = .empty; + for (self.events.items) |event| { + if (event.seq <= after_seq) continue; + + const event_type = alloc.dupe(u8, event.event_type) catch continue; + const data = alloc.dupe(u8, event.data) catch { + alloc.free(event_type); + continue; + }; + + snapshot_events.append(alloc, .{ + .seq = event.seq, + .event_type = event_type, + .data = data, + .mode = event.mode, + }) catch { + alloc.free(event_type); + alloc.free(data); + }; + } + + const events = snapshot_events.toOwnedSlice(alloc) catch { + for (snapshot_events.items) |event| { + alloc.free(event.event_type); + alloc.free(event.data); + } + snapshot_events.deinit(alloc); + return .{ + .events = &.{}, + .latest_seq = latest_seq, + .oldest_seq = oldest_seq, + .gap_detected = gap_detected, + }; + }; + + return .{ + .events = events, + .latest_seq = latest_seq, + .oldest_seq = oldest_seq, + .gap_detected = gap_detected, + }; + } + + pub fn freeSnapshot(_: *RunEventQueue, alloc: Allocator, snapshot: EventSnapshot) void { + for (snapshot.events) |event| { + alloc.free(event.event_type); + alloc.free(event.data); + } + if (snapshot.events.len > 0) alloc.free(snapshot.events); + } + + /// Mark queue as closed (run completed/cancelled). + pub fn close(self: *RunEventQueue) void { + self.closed.store(true, .release); + } + + pub fn isClosed(self: *RunEventQueue) bool { + return self.closed.load(.acquire); + } +}; + +/// Central hub managing per-run event queues. +pub const SseHub = struct { + queues: std.StringHashMap(*RunEventQueue), + mutex: std.Thread.Mutex, + alloc: Allocator, + + pub fn init(alloc: Allocator) SseHub { + return .{ + .queues = std.StringHashMap(*RunEventQueue).init(alloc), + .mutex = .{}, + .alloc = alloc, + }; + } + + pub fn deinit(self: *SseHub) void { + var it = self.queues.iterator(); + while (it.next()) |entry| { + entry.value_ptr.*.deinit(); + self.alloc.destroy(entry.value_ptr.*); + self.alloc.free(entry.key_ptr.*); + } + self.queues.deinit(); + } + + /// Get or create queue for a run. + pub fn getOrCreateQueue(self: *SseHub, run_id: []const u8) *RunEventQueue { + self.mutex.lock(); + defer self.mutex.unlock(); + if (self.queues.get(run_id)) |q| return q; + const queue = self.alloc.create(RunEventQueue) catch @panic("OOM: failed to allocate RunEventQueue"); + queue.* = RunEventQueue.init(self.alloc); + const id_copy = self.alloc.dupe(u8, run_id) catch @panic("OOM: failed to duplicate run_id"); + self.queues.put(id_copy, queue) catch @panic("OOM: failed to insert queue into map"); + return queue; + } + + /// Broadcast event to a run's queue. Creates the queue on first write so + /// late subscribers can still read recent buffered events. + pub fn broadcast(self: *SseHub, run_id: []const u8, event: SseEvent) void { + self.mutex.lock(); + defer self.mutex.unlock(); + const queue = if (self.queues.get(run_id)) |existing| + existing + else blk: { + const created = self.alloc.create(RunEventQueue) catch return; + created.* = RunEventQueue.init(self.alloc); + const id_copy = self.alloc.dupe(u8, run_id) catch { + self.alloc.destroy(created); + return; + }; + self.queues.put(id_copy, created) catch { + self.alloc.free(id_copy); + self.alloc.destroy(created); + return; + }; + break :blk created; + }; + queue.push(event); + } + + pub fn closeQueue(self: *SseHub, run_id: []const u8) void { + self.mutex.lock(); + defer self.mutex.unlock(); + if (self.queues.get(run_id)) |queue| { + queue.close(); + } + } + + /// Close and remove queue when run completes. + pub fn removeQueue(self: *SseHub, run_id: []const u8) void { + self.mutex.lock(); + defer self.mutex.unlock(); + if (self.queues.fetchRemove(run_id)) |entry| { + entry.value.close(); + entry.value.deinit(); + self.alloc.destroy(entry.value); + self.alloc.free(entry.key); + } + } +}; + +// ── Tests ───────────────────────────────────────────────────────────── + +test "sse hub snapshotSince supports multiple consumers" { + const alloc = std.testing.allocator; + var hub = SseHub.init(alloc); + defer hub.deinit(); + + const queue = hub.getOrCreateQueue("run1"); + queue.push(.{ .event_type = "step_started", .data = "{}" }); + queue.push(.{ .event_type = "step_completed", .data = "{}" }); + + const first = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, first); + const second = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, second); + + try std.testing.expectEqual(@as(usize, 2), first.events.len); + try std.testing.expectEqual(@as(usize, 2), second.events.len); + try std.testing.expectEqualStrings("step_started", first.events[0].event_type); + try std.testing.expectEqualStrings("step_started", second.events[0].event_type); +} + +test "sse hub queue owns event payloads beyond source arena lifetime" { + const alloc = std.testing.allocator; + var hub = SseHub.init(alloc); + defer hub.deinit(); + + const queue = hub.getOrCreateQueue("run1"); + + var arena = std.heap.ArenaAllocator.init(alloc); + const arena_alloc = arena.allocator(); + + const event_type = try arena_alloc.dupe(u8, "step.completed"); + const payload = try arena_alloc.dupe(u8, "{\"ok\":true}"); + queue.push(.{ .event_type = event_type, .data = payload }); + arena.deinit(); + + const snapshot = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, snapshot); + + try std.testing.expectEqual(@as(usize, 1), snapshot.events.len); + try std.testing.expectEqualStrings("step.completed", snapshot.events[0].event_type); + try std.testing.expectEqualStrings("{\"ok\":true}", snapshot.events[0].data); +} + +test "sse hub broadcast creates queue for late subscribers" { + const alloc = std.testing.allocator; + var hub = SseHub.init(alloc); + defer hub.deinit(); + + hub.broadcast("run1", .{ .event_type = "test", .data = "{}" }); + + const queue = hub.getOrCreateQueue("run1"); + const snapshot = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, snapshot); + + try std.testing.expectEqual(@as(usize, 1), snapshot.events.len); + try std.testing.expectEqualStrings("test", snapshot.events[0].event_type); +} + +test "sse hub remove queue" { + const alloc = std.testing.allocator; + var hub = SseHub.init(alloc); + defer hub.deinit(); + + _ = hub.getOrCreateQueue("run1"); + hub.removeQueue("run1"); + // Queue should be gone + try std.testing.expectEqual(@as(usize, 0), hub.queues.count()); +} + +test "sse hub closeQueue preserves buffered events" { + const alloc = std.testing.allocator; + var hub = SseHub.init(alloc); + defer hub.deinit(); + + hub.broadcast("run1", .{ .event_type = "values", .data = "{}" }); + hub.closeQueue("run1"); + + const queue = hub.getOrCreateQueue("run1"); + try std.testing.expect(queue.isClosed()); + + const snapshot = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, snapshot); + try std.testing.expectEqual(@as(usize, 1), snapshot.events.len); +} + +test "sse queue close" { + const alloc = std.testing.allocator; + var queue = RunEventQueue.init(alloc); + defer queue.deinit(); + + try std.testing.expect(!queue.isClosed()); + queue.close(); + try std.testing.expect(queue.isClosed()); +} + +test "stream mode toString and fromString" { + try std.testing.expectEqualStrings("values", StreamMode.values.toString()); + try std.testing.expectEqualStrings("updates", StreamMode.updates.toString()); + try std.testing.expectEqualStrings("tasks", StreamMode.tasks.toString()); + try std.testing.expectEqualStrings("debug", StreamMode.debug.toString()); + try std.testing.expectEqualStrings("custom", StreamMode.custom.toString()); + + try std.testing.expectEqual(StreamMode.values, StreamMode.fromString("values").?); + try std.testing.expectEqual(StreamMode.debug, StreamMode.fromString("debug").?); + try std.testing.expect(StreamMode.fromString("invalid") == null); +} + +test "sse event default mode is updates" { + const ev = SseEvent{ .event_type = "test", .data = "{}" }; + try std.testing.expectEqual(StreamMode.updates, ev.mode); +} + +test "sse event with explicit mode" { + const ev = SseEvent{ .event_type = "values", .data = "{\"state\":{}}", .mode = .values }; + try std.testing.expectEqual(StreamMode.values, ev.mode); + try std.testing.expectEqualStrings("values", ev.event_type); +} + +test "sse hub broadcast with mode" { + const alloc = std.testing.allocator; + var hub = SseHub.init(alloc); + defer hub.deinit(); + + const queue = hub.getOrCreateQueue("run1"); + queue.push(.{ .event_type = "values", .data = "{\"full\":true}", .mode = .values }); + queue.push(.{ .event_type = "task_start", .data = "{}", .mode = .tasks }); + queue.push(.{ .event_type = "debug", .data = "{}", .mode = .debug }); + + const snapshot = queue.snapshotSince(alloc, 0); + defer queue.freeSnapshot(alloc, snapshot); + try std.testing.expectEqual(@as(usize, 3), snapshot.events.len); + try std.testing.expectEqual(StreamMode.values, snapshot.events[0].mode); + try std.testing.expectEqual(StreamMode.tasks, snapshot.events[1].mode); + try std.testing.expectEqual(StreamMode.debug, snapshot.events[2].mode); +} + +test "sse hub snapshotSince returns only events after cursor" { + const alloc = std.testing.allocator; + var hub = SseHub.init(alloc); + defer hub.deinit(); + + const queue = hub.getOrCreateQueue("run1"); + queue.push(.{ .event_type = "one", .data = "{}" }); + queue.push(.{ .event_type = "two", .data = "{}" }); + queue.push(.{ .event_type = "three", .data = "{}" }); + + const snapshot = queue.snapshotSince(alloc, 2); + defer queue.freeSnapshot(alloc, snapshot); + + try std.testing.expectEqual(@as(usize, 1), snapshot.events.len); + try std.testing.expectEqual(@as(u64, 3), snapshot.events[0].seq); + try std.testing.expectEqualStrings("three", snapshot.events[0].event_type); +} diff --git a/src/state.zig b/src/state.zig new file mode 100644 index 0000000..5c266a6 --- /dev/null +++ b/src/state.zig @@ -0,0 +1,1025 @@ +/// State management module for NullBoiler orchestration. +/// Implements reducers and state operations for the unified state model. +/// Every node in the orchestration graph reads state, returns partial updates, +/// and the engine applies reducers to compute the new state. +const std = @import("std"); +const types = @import("types.zig"); +const ReducerType = types.ReducerType; +const Allocator = std.mem.Allocator; +const json = std.json; + +// ── Helpers ─────────────────────────────────────────────────────────── + +/// Serialize a std.json.Value to an allocated JSON string. +fn serializeValue(alloc: Allocator, value: json.Value) ![]const u8 { + var out: std.io.Writer.Allocating = .init(alloc); + var jw: json.Stringify = .{ .writer = &out.writer }; + try jw.write(value); + return try out.toOwnedSlice(); +} + +/// Extract f64 from a json.Value (handles both .integer and .float). +fn jsonToFloat(val: json.Value) ?f64 { + return switch (val) { + .float => |f| f, + .integer => |i| @as(f64, @floatFromInt(i)), + else => null, + }; +} + +/// Format an f64 as a string. Renders integers without decimal point. +fn formatFloat(alloc: Allocator, f: f64) ![]const u8 { + const i: i64 = @intFromFloat(f); + if (@as(f64, @floatFromInt(i)) == f) { + return try std.fmt.allocPrint(alloc, "{d}", .{i}); + } + return try std.fmt.allocPrint(alloc, "{d}", .{f}); +} + +// ── Overwrite Bypass (Gap 5) ────────────────────────────────────────── + +/// Check if a JSON value is wrapped in {"__overwrite": true, "value": ...}. +fn isOverwrite(value: json.Value) bool { + if (value != .object) return false; + const ow = value.object.get("__overwrite") orelse return false; + if (ow != .bool) return false; + return ow.bool; +} + +/// Extract the "value" field from an overwrite wrapper. +/// Returns the unwrapped json.Value, or .null if "value" key is missing. +fn extractOverwriteValue(value: json.Value) json.Value { + if (value != .object) return value; + return value.object.get("value") orelse .null; +} + +// ── Public API ──────────────────────────────────────────────────────── + +/// Apply a single reducer to merge old_value + update into new_value. +/// Returns newly allocated JSON string owned by the caller. +pub fn applyReducer(alloc: Allocator, reducer: ReducerType, old_value_json: ?[]const u8, update_json: []const u8) ![]const u8 { + switch (reducer) { + .last_value => { + return try alloc.dupe(u8, update_json); + }, + .append => { + return try applyAppend(alloc, old_value_json, update_json); + }, + .merge => { + return try applyMerge(alloc, old_value_json, update_json); + }, + .add => { + return try applyAdd(alloc, old_value_json, update_json); + }, + .min => { + return try applyMin(alloc, old_value_json, update_json); + }, + .max => { + return try applyMax(alloc, old_value_json, update_json); + }, + .add_messages => { + return try applyAddMessages(alloc, old_value_json, update_json); + }, + } +} + +/// Apply partial state updates to full state using schema reducers. +/// For each key in updates_json: +/// 1. Look up reducer type from schema_json (format: {"key": {"type": "...", "reducer": "..."}}) +/// 2. Get old value from state_json (may be null/missing) +/// 3. Apply reducer(old_value, new_value) +/// 4. Write result to output state +pub fn applyUpdates(alloc: Allocator, state_json: []const u8, updates_json: []const u8, schema_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const state_parsed = try json.parseFromSlice(json.Value, arena_alloc, state_json, .{}); + const state_obj = if (state_parsed.value == .object) state_parsed.value.object else json.ObjectMap.init(arena_alloc); + + const updates_parsed = try json.parseFromSlice(json.Value, arena_alloc, updates_json, .{}); + if (updates_parsed.value != .object) return try alloc.dupe(u8, state_json); + + const schema_parsed = try json.parseFromSlice(json.Value, arena_alloc, schema_json, .{}); + const schema_obj = if (schema_parsed.value == .object) schema_parsed.value.object else json.ObjectMap.init(arena_alloc); + + // Start with a copy of all existing state keys + var result_obj = json.ObjectMap.init(arena_alloc); + var state_it = state_obj.iterator(); + while (state_it.next()) |entry| { + try result_obj.put(entry.key_ptr.*, entry.value_ptr.*); + } + + // For each update key, apply the reducer (with overwrite bypass, Gap 5) + var updates_it = updates_parsed.value.object.iterator(); + while (updates_it.next()) |entry| { + const key = entry.key_ptr.*; + const update_value = entry.value_ptr.*; + + // Gap 5: Check for overwrite bypass + if (isOverwrite(update_value)) { + const raw_val = extractOverwriteValue(update_value); + try result_obj.put(key, raw_val); + continue; + } + + // Serialize the update value + const update_str = try serializeValue(arena_alloc, update_value); + + // Look up reducer from schema + const reducer_type = blk: { + if (schema_obj.get(key)) |schema_entry| { + if (schema_entry == .object) { + if (schema_entry.object.get("reducer")) |reducer_val| { + if (reducer_val == .string) { + break :blk ReducerType.fromString(reducer_val.string) orelse .last_value; + } + } + } + } + break :blk ReducerType.last_value; + }; + + // Get old value as JSON string (or null if missing) + const old_str: ?[]const u8 = blk: { + if (state_obj.get(key)) |old_val| { + break :blk try serializeValue(arena_alloc, old_val); + } + break :blk null; + }; + + // Apply the reducer (allocates into arena) + const new_str = try applyReducer(arena_alloc, reducer_type, old_str, update_str); + + // Parse the result back into a json.Value and put in result + const new_parsed = try json.parseFromSlice(json.Value, arena_alloc, new_str, .{}); + try result_obj.put(key, new_parsed.value); + } + + // Serialize the result into the caller's allocator + const result_str = try serializeValue(arena_alloc, json.Value{ .object = result_obj }); + return try alloc.dupe(u8, result_str); +} + +/// Initialize state from input JSON and schema defaults. +/// For each key in schema: +/// - if key exists in input -> use input value +/// - else -> use type default: "" for string, [] for array, 0 for number, false for boolean, {} for object, null otherwise +pub fn initState(alloc: Allocator, input_json: []const u8, schema_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const input_parsed = try json.parseFromSlice(json.Value, arena_alloc, input_json, .{}); + const input_obj = if (input_parsed.value == .object) input_parsed.value.object else json.ObjectMap.init(arena_alloc); + + const schema_parsed = try json.parseFromSlice(json.Value, arena_alloc, schema_json, .{}); + if (schema_parsed.value != .object) return try alloc.dupe(u8, input_json); + + var result_obj = json.ObjectMap.init(arena_alloc); + + var schema_it = schema_parsed.value.object.iterator(); + while (schema_it.next()) |entry| { + const key = entry.key_ptr.*; + const schema_entry = entry.value_ptr.*; + + if (input_obj.get(key)) |input_val| { + try result_obj.put(key, input_val); + } else { + const type_str = blk: { + if (schema_entry == .object) { + if (schema_entry.object.get("type")) |type_val| { + if (type_val == .string) { + break :blk type_val.string; + } + } + } + break :blk ""; + }; + + const default_val: json.Value = if (std.mem.eql(u8, type_str, "string")) + .{ .string = "" } + else if (std.mem.eql(u8, type_str, "array")) + .{ .array = json.Array.init(arena_alloc) } + else if (std.mem.eql(u8, type_str, "number")) + .{ .integer = 0 } + else if (std.mem.eql(u8, type_str, "boolean")) + .{ .bool = false } + else if (std.mem.eql(u8, type_str, "object")) + .{ .object = json.ObjectMap.init(arena_alloc) } + else + .null; + + try result_obj.put(key, default_val); + } + } + + const result_str = try serializeValue(arena_alloc, json.Value{ .object = result_obj }); + return try alloc.dupe(u8, result_str); +} + +/// Extract a value from state JSON by dotted path. +/// Supports: +/// - "state.messages" -> strips "state." prefix, returns value at key "messages" +/// - "state.plan.files" -> nested object access +/// - "state.messages[-1]" -> last element of array +pub fn getStateValue(alloc: Allocator, state_json: []const u8, path: []const u8) !?[]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + // Strip "state." prefix if present + const effective_path = if (std.mem.startsWith(u8, path, "state.")) + path["state.".len..] + else + path; + + const parsed = try json.parseFromSlice(json.Value, arena_alloc, state_json, .{}); + var current = parsed.value; + + // Split by "." and walk the path + var segments = std.mem.splitScalar(u8, effective_path, '.'); + while (segments.next()) |segment| { + // Check for array index like "messages[-1]" + if (std.mem.indexOfScalar(u8, segment, '[')) |bracket_pos| { + const key = segment[0..bracket_pos]; + const index_str = segment[bracket_pos..]; + + // Navigate to the key first + if (current != .object) return null; + current = current.object.get(key) orelse return null; + + // Parse the array index + if (std.mem.eql(u8, index_str, "[-1]")) { + if (current != .array) return null; + if (current.array.items.len == 0) return null; + current = current.array.items[current.array.items.len - 1]; + } else { + // Parse positive index: [N] + if (index_str.len < 3) return null; + const num_str = index_str[1 .. index_str.len - 1]; + const idx = std.fmt.parseInt(usize, num_str, 10) catch return null; + if (current != .array) return null; + if (idx >= current.array.items.len) return null; + current = current.array.items[idx]; + } + } else { + if (current != .object) return null; + current = current.object.get(segment) orelse return null; + } + } + + const result_str = try serializeValue(arena_alloc, current); + return try alloc.dupe(u8, result_str); +} + +/// Convert JSON value to string for route matching. +/// - true/false -> "true"/"false" +/// - numbers -> decimal string representation +/// - "quoted string" -> strip quotes, return inner string +/// - anything else -> return as-is +pub fn stringifyForRoute(alloc: Allocator, value_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const parsed = try json.parseFromSlice(json.Value, arena_alloc, value_json, .{}); + + switch (parsed.value) { + .bool => |b| { + return try alloc.dupe(u8, if (b) "true" else "false"); + }, + .integer => |i| { + return try std.fmt.allocPrint(alloc, "{d}", .{i}); + }, + .float => |f| { + const tmp = try formatFloat(arena_alloc, f); + return try alloc.dupe(u8, tmp); + }, + .string => |s| { + return try alloc.dupe(u8, s); + }, + else => { + return try alloc.dupe(u8, value_json); + }, + } +} + +// ── Reducer implementations ─────────────────────────────────────────── + +/// append: if old is null/empty -> wrap update in array [update]. +/// If old is array -> parse, append update (element or array elements), serialize. +fn applyAppend(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{}); + + const old = old_json orelse { + var arr = json.Array.init(arena_alloc); + try arr.append(update_parsed.value); + const result = try serializeValue(arena_alloc, json.Value{ .array = arr }); + return try alloc.dupe(u8, result); + }; + + if (old.len == 0) { + var arr = json.Array.init(arena_alloc); + try arr.append(update_parsed.value); + const result = try serializeValue(arena_alloc, json.Value{ .array = arr }); + return try alloc.dupe(u8, result); + } + + const old_parsed = try json.parseFromSlice(json.Value, arena_alloc, old, .{}); + + if (old_parsed.value != .array) { + var arr = json.Array.init(arena_alloc); + try arr.append(old_parsed.value); + try arr.append(update_parsed.value); + const result = try serializeValue(arena_alloc, json.Value{ .array = arr }); + return try alloc.dupe(u8, result); + } + + // Old is array - copy elements then append update + var arr = json.Array.init(arena_alloc); + for (old_parsed.value.array.items) |item| { + try arr.append(item); + } + + // If update is an array, append each element; otherwise append the single value + if (update_parsed.value == .array) { + for (update_parsed.value.array.items) |item| { + try arr.append(item); + } + } else { + try arr.append(update_parsed.value); + } + + const result = try serializeValue(arena_alloc, json.Value{ .array = arr }); + return try alloc.dupe(u8, result); +} + +/// merge: deep merge two JSON objects. Update keys override old keys. +/// Nested objects are recursively merged. +fn applyMerge(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{}); + + if (update_parsed.value != .object) { + return try alloc.dupe(u8, update_json); + } + + const old = old_json orelse { + return try alloc.dupe(u8, update_json); + }; + + if (old.len == 0) { + return try alloc.dupe(u8, update_json); + } + + const old_parsed = try json.parseFromSlice(json.Value, arena_alloc, old, .{}); + + if (old_parsed.value != .object) { + return try alloc.dupe(u8, update_json); + } + + const merged = try deepMerge(arena_alloc, old_parsed.value, update_parsed.value); + const result = try serializeValue(arena_alloc, merged); + return try alloc.dupe(u8, result); +} + +/// Recursively deep-merge two JSON objects. +fn deepMerge(alloc: Allocator, base: json.Value, overlay: json.Value) !json.Value { + if (base != .object or overlay != .object) { + return overlay; + } + + var result = json.ObjectMap.init(alloc); + + // Copy all base keys + var base_it = base.object.iterator(); + while (base_it.next()) |entry| { + try result.put(entry.key_ptr.*, entry.value_ptr.*); + } + + // Apply overlay keys, recursively merging nested objects + var overlay_it = overlay.object.iterator(); + while (overlay_it.next()) |entry| { + const key = entry.key_ptr.*; + const overlay_val = entry.value_ptr.*; + + if (result.get(key)) |existing| { + if (existing == .object and overlay_val == .object) { + const merged = try deepMerge(alloc, existing, overlay_val); + try result.put(key, merged); + } else { + try result.put(key, overlay_val); + } + } else { + try result.put(key, overlay_val); + } + } + + return json.Value{ .object = result }; +} + +/// add: parse both as numbers (f64), add, return string. If old is null, treat as 0. +fn applyAdd(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{}); + const update_val = jsonToFloat(update_parsed.value) orelse return error.InvalidNumber; + + const old_val: f64 = blk: { + const old = old_json orelse break :blk 0; + if (old.len == 0) break :blk 0; + const old_parsed = json.parseFromSlice(json.Value, arena_alloc, old, .{}) catch break :blk 0; + break :blk jsonToFloat(old_parsed.value) orelse 0; + }; + + return try formatFloat(alloc, old_val + update_val); +} + +/// min: parse both as numbers, return the smaller. If old is null, return update. +fn applyMin(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{}); + const update_val = jsonToFloat(update_parsed.value) orelse return error.InvalidNumber; + + const old = old_json orelse return try formatFloat(alloc, update_val); + if (old.len == 0) return try formatFloat(alloc, update_val); + + const old_parsed = json.parseFromSlice(json.Value, arena_alloc, old, .{}) catch + return try formatFloat(alloc, update_val); + const old_val = jsonToFloat(old_parsed.value) orelse return try formatFloat(alloc, update_val); + + return try formatFloat(alloc, @min(old_val, update_val)); +} + +/// max: parse both as numbers, return the larger. If old is null, return update. +fn applyMax(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{}); + const update_val = jsonToFloat(update_parsed.value) orelse return error.InvalidNumber; + + const old = old_json orelse return try formatFloat(alloc, update_val); + if (old.len == 0) return try formatFloat(alloc, update_val); + + const old_parsed = json.parseFromSlice(json.Value, arena_alloc, old, .{}) catch + return try formatFloat(alloc, update_val); + const old_val = jsonToFloat(old_parsed.value) orelse return try formatFloat(alloc, update_val); + + return try formatFloat(alloc, @max(old_val, update_val)); +} + +/// add_messages: merge message arrays by "id" field. +/// - If old is null → wrap update in array +/// - If update msg has "remove": true → remove matching id from old +/// - If update msg "id" matches existing → replace in-place +/// - If update msg "id" doesn't match → append +/// - If update msg has no "id" → generate one and append +fn applyAddMessages(alloc: Allocator, old_json: ?[]const u8, update_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + // Parse update: single object or array of objects + const update_parsed = try json.parseFromSlice(json.Value, arena_alloc, update_json, .{}); + var update_msgs = json.Array.init(arena_alloc); + if (update_parsed.value == .array) { + for (update_parsed.value.array.items) |item| { + try update_msgs.append(item); + } + } else if (update_parsed.value == .object) { + try update_msgs.append(update_parsed.value); + } else { + return try alloc.dupe(u8, update_json); + } + + // Parse old array or start empty + var result_msgs = json.Array.init(arena_alloc); + if (old_json) |old| { + if (old.len > 0) { + const old_parsed = try json.parseFromSlice(json.Value, arena_alloc, old, .{}); + if (old_parsed.value == .array) { + for (old_parsed.value.array.items) |item| { + try result_msgs.append(item); + } + } + } + } + + // Process each update message + for (update_msgs.items) |msg| { + if (msg != .object) continue; + + const msg_id: ?[]const u8 = blk: { + if (msg.object.get("id")) |id_val| { + if (id_val == .string) break :blk id_val.string; + } + break :blk null; + }; + + // Check for remove flag + const is_remove = blk: { + if (msg.object.get("remove")) |rm_val| { + if (rm_val == .bool) break :blk rm_val.bool; + } + break :blk false; + }; + + if (is_remove) { + if (msg_id) |id| { + // Filter out the message with matching id + var filtered = json.Array.init(arena_alloc); + for (result_msgs.items) |existing| { + if (existing == .object) { + if (existing.object.get("id")) |eid| { + if (eid == .string and std.mem.eql(u8, eid.string, id)) { + continue; // skip — removing this message + } + } + } + try filtered.append(existing); + } + result_msgs = filtered; + } + continue; + } + + if (msg_id) |id| { + // Try to find and replace existing message with same id + var replaced = false; + for (result_msgs.items, 0..) |existing, i| { + if (existing == .object) { + if (existing.object.get("id")) |eid| { + if (eid == .string and std.mem.eql(u8, eid.string, id)) { + result_msgs.items[i] = msg; + replaced = true; + break; + } + } + } + } + if (!replaced) { + try result_msgs.append(msg); + } + } else { + // No id — generate one and append + var msg_copy = json.ObjectMap.init(arena_alloc); + var it = msg.object.iterator(); + while (it.next()) |entry| { + try msg_copy.put(entry.key_ptr.*, entry.value_ptr.*); + } + const gen_id = try std.fmt.allocPrint(arena_alloc, "msg_{d}", .{result_msgs.items.len}); + try msg_copy.put("id", json.Value{ .string = gen_id }); + try result_msgs.append(json.Value{ .object = msg_copy }); + } + } + + const result = try serializeValue(arena_alloc, json.Value{ .array = result_msgs }); + return try alloc.dupe(u8, result); +} + +// ── Ephemeral State Keys ────────────────────────────────────────────── + +/// Strip ephemeral keys from state before checkpoint persistence. +/// Parses the schema for keys with `"ephemeral": true` and removes +/// those keys from the state JSON. Returns a new JSON string. +pub fn stripEphemeralKeys(alloc: Allocator, state_json: []const u8, schema_json: []const u8) ![]const u8 { + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + // Parse schema to find ephemeral keys + const schema_parsed = try json.parseFromSlice(json.Value, arena_alloc, schema_json, .{}); + if (schema_parsed.value != .object) return try alloc.dupe(u8, state_json); + + var ephemeral_keys = std.StringHashMap(void).init(arena_alloc); + var schema_it = schema_parsed.value.object.iterator(); + while (schema_it.next()) |entry| { + const schema_entry = entry.value_ptr.*; + if (schema_entry == .object) { + if (schema_entry.object.get("ephemeral")) |eph_val| { + if (eph_val == .bool and eph_val.bool) { + try ephemeral_keys.put(entry.key_ptr.*, {}); + } + } + } + } + + if (ephemeral_keys.count() == 0) return try alloc.dupe(u8, state_json); + + // Parse state and remove ephemeral keys + const state_parsed = try json.parseFromSlice(json.Value, arena_alloc, state_json, .{}); + if (state_parsed.value != .object) return try alloc.dupe(u8, state_json); + + var result_obj = json.ObjectMap.init(arena_alloc); + var state_it = state_parsed.value.object.iterator(); + while (state_it.next()) |entry| { + if (ephemeral_keys.get(entry.key_ptr.*) == null) { + try result_obj.put(entry.key_ptr.*, entry.value_ptr.*); + } + } + + const result_str = try serializeValue(arena_alloc, json.Value{ .object = result_obj }); + return try alloc.dupe(u8, result_str); +} + +// ── Tests ───────────────────────────────────────────────────────────── + +fn parseTestJson(alloc: Allocator, json_str: []const u8) !json.Parsed(json.Value) { + return try json.parseFromSlice(json.Value, alloc, json_str, .{}); +} + +test "last_value reducer" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .last_value, "\"old\"", "\"new\""); + defer alloc.free(result); + try std.testing.expectEqualStrings("\"new\"", result); +} + +test "add reducer" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .add, "10", "5"); + defer alloc.free(result); + try std.testing.expectEqualStrings("15", result); +} + +test "add reducer with null old" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .add, null, "7"); + defer alloc.free(result); + try std.testing.expectEqualStrings("7", result); +} + +test "append reducer" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .append, "[1,2]", "3"); + defer alloc.free(result); + try std.testing.expectEqualStrings("[1,2,3]", result); +} + +test "append reducer with null old" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .append, null, "\"hello\""); + defer alloc.free(result); + try std.testing.expectEqualStrings("[\"hello\"]", result); +} + +test "merge reducer - flat objects" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .merge, "{\"a\":1,\"b\":2}", "{\"b\":3,\"c\":4}"); + defer alloc.free(result); + // Parse result to check keys since JSON object key order is not guaranteed + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .object); + + const a = parsed.value.object.get("a") orelse return error.TestUnexpectedResult; + try std.testing.expectEqual(@as(i64, 1), a.integer); + + const b = parsed.value.object.get("b") orelse return error.TestUnexpectedResult; + try std.testing.expectEqual(@as(i64, 3), b.integer); + + const c = parsed.value.object.get("c") orelse return error.TestUnexpectedResult; + try std.testing.expectEqual(@as(i64, 4), c.integer); +} + +test "merge reducer - null old" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .merge, null, "{\"x\":1}"); + defer alloc.free(result); + try std.testing.expectEqualStrings("{\"x\":1}", result); +} + +test "min reducer" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .min, "10", "3"); + defer alloc.free(result); + try std.testing.expectEqualStrings("3", result); +} + +test "max reducer" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .max, "10", "3"); + defer alloc.free(result); + try std.testing.expectEqualStrings("10", result); +} + +test "applyUpdates with mixed reducers" { + const alloc = std.testing.allocator; + const state = + \\{"count":5,"messages":["hello"],"config":{"a":1}} + ; + const updates = + \\{"count":3,"messages":"world","config":{"b":2}} + ; + const schema = + \\{"count":{"type":"number","reducer":"add"},"messages":{"type":"array","reducer":"append"},"config":{"type":"object","reducer":"merge"}} + ; + + const result = try applyUpdates(alloc, state, updates, schema); + defer alloc.free(result); + + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .object); + + // count: 5 + 3 = 8 + const count = parsed.value.object.get("count") orelse return error.TestUnexpectedResult; + try std.testing.expectEqual(@as(i64, 8), count.integer); + + // messages: ["hello"] + "world" = ["hello","world"] + const messages = parsed.value.object.get("messages") orelse return error.TestUnexpectedResult; + try std.testing.expect(messages == .array); + try std.testing.expectEqual(@as(usize, 2), messages.array.items.len); + + // config: merge {a:1} + {b:2} = {a:1, b:2} + const config = parsed.value.object.get("config") orelse return error.TestUnexpectedResult; + try std.testing.expect(config == .object); + try std.testing.expect(config.object.get("a") != null); + try std.testing.expect(config.object.get("b") != null); +} + +test "initState with defaults" { + const alloc = std.testing.allocator; + const input = + \\{"prompt":"hi"} + ; + const schema = + \\{"prompt":{"type":"string","reducer":"last_value"},"messages":{"type":"array","reducer":"append"},"count":{"type":"number","reducer":"add"},"done":{"type":"boolean","reducer":"last_value"},"meta":{"type":"object","reducer":"merge"}} + ; + + const result = try initState(alloc, input, schema); + defer alloc.free(result); + + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .object); + + // prompt should be from input + const prompt = parsed.value.object.get("prompt") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("hi", prompt.string); + + // messages should default to [] + const messages = parsed.value.object.get("messages") orelse return error.TestUnexpectedResult; + try std.testing.expect(messages == .array); + try std.testing.expectEqual(@as(usize, 0), messages.array.items.len); + + // count should default to 0 + const count = parsed.value.object.get("count") orelse return error.TestUnexpectedResult; + try std.testing.expectEqual(@as(i64, 0), count.integer); + + // done should default to false + const done = parsed.value.object.get("done") orelse return error.TestUnexpectedResult; + try std.testing.expectEqual(false, done.bool); + + // meta should default to {} + const meta = parsed.value.object.get("meta") orelse return error.TestUnexpectedResult; + try std.testing.expect(meta == .object); + try std.testing.expectEqual(@as(usize, 0), meta.object.count()); +} + +test "getStateValue simple key" { + const alloc = std.testing.allocator; + const state = + \\{"prompt":"hello","count":42} + ; + const result = try getStateValue(alloc, state, "state.prompt"); + defer if (result) |r| alloc.free(r); + try std.testing.expect(result != null); + try std.testing.expectEqualStrings("\"hello\"", result.?); +} + +test "getStateValue nested" { + const alloc = std.testing.allocator; + const state = + \\{"plan":{"files":["a.zig","b.zig"]}} + ; + const result = try getStateValue(alloc, state, "state.plan.files"); + defer if (result) |r| alloc.free(r); + try std.testing.expect(result != null); + try std.testing.expectEqualStrings("[\"a.zig\",\"b.zig\"]", result.?); +} + +test "getStateValue array last element" { + const alloc = std.testing.allocator; + const state = + \\{"messages":["first","second","third"]} + ; + const result = try getStateValue(alloc, state, "state.messages[-1]"); + defer if (result) |r| alloc.free(r); + try std.testing.expect(result != null); + try std.testing.expectEqualStrings("\"third\"", result.?); +} + +test "stringifyForRoute boolean" { + const alloc = std.testing.allocator; + const result_true = try stringifyForRoute(alloc, "true"); + defer alloc.free(result_true); + try std.testing.expectEqualStrings("true", result_true); + + const result_false = try stringifyForRoute(alloc, "false"); + defer alloc.free(result_false); + try std.testing.expectEqualStrings("false", result_false); +} + +test "stringifyForRoute number" { + const alloc = std.testing.allocator; + const result = try stringifyForRoute(alloc, "42"); + defer alloc.free(result); + try std.testing.expectEqualStrings("42", result); +} + +test "stringifyForRoute string" { + const alloc = std.testing.allocator; + const result = try stringifyForRoute(alloc, "\"hello world\""); + defer alloc.free(result); + try std.testing.expectEqualStrings("hello world", result); +} + +test "add_messages reducer - append new" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .add_messages, + \\[{"id":"1","text":"hello"}] + , + \\{"id":"2","text":"world"} + ); + defer alloc.free(result); + // Parse and verify: should be array with 2 messages + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .array); + try std.testing.expectEqual(@as(usize, 2), parsed.value.array.items.len); + // First message id=1 + const m0 = parsed.value.array.items[0]; + try std.testing.expect(m0 == .object); + const id0 = m0.object.get("id") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("1", id0.string); + // Second message id=2 + const m1 = parsed.value.array.items[1]; + try std.testing.expect(m1 == .object); + const id1 = m1.object.get("id") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("2", id1.string); + const text1 = m1.object.get("text") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("world", text1.string); +} + +test "add_messages reducer - replace by id" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .add_messages, + \\[{"id":"1","text":"old"}] + , + \\{"id":"1","text":"new"} + ); + defer alloc.free(result); + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .array); + try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len); + const m0 = parsed.value.array.items[0]; + const text = m0.object.get("text") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("new", text.string); +} + +test "add_messages reducer - remove by id" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .add_messages, + \\[{"id":"1","text":"hello"},{"id":"2","text":"world"}] + , + \\{"id":"1","remove":true} + ); + defer alloc.free(result); + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .array); + try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len); + const m0 = parsed.value.array.items[0]; + const id0 = m0.object.get("id") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("2", id0.string); +} + +test "add_messages reducer - null old" { + const alloc = std.testing.allocator; + const result = try applyReducer(alloc, .add_messages, null, + \\{"id":"1","text":"first"} + ); + defer alloc.free(result); + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .array); + try std.testing.expectEqual(@as(usize, 1), parsed.value.array.items.len); + const m0 = parsed.value.array.items[0]; + const id0 = m0.object.get("id") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("1", id0.string); + const text0 = m0.object.get("text") orelse return error.TestUnexpectedResult; + try std.testing.expectEqualStrings("first", text0.string); +} + +test "overwrite bypasses reducer" { + const alloc = std.testing.allocator; + // count has "add" reducer, but __overwrite should bypass it + const state = + \\{"count":10} + ; + const updates = + \\{"count":{"__overwrite":true,"value":42}} + ; + const schema = + \\{"count":{"type":"number","reducer":"add"}} + ; + + const result = try applyUpdates(alloc, state, updates, schema); + defer alloc.free(result); + + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .object); + const count = parsed.value.object.get("count") orelse return error.TestUnexpectedResult; + // Should be 42 (overwritten), not 52 (10 + 42 via add reducer) + try std.testing.expectEqual(@as(i64, 42), count.integer); +} + +test "overwrite with array value" { + const alloc = std.testing.allocator; + const state = + \\{"items":[1,2,3]} + ; + const updates = + \\{"items":{"__overwrite":true,"value":[99]}} + ; + const schema = + \\{"items":{"type":"array","reducer":"append"}} + ; + + const result = try applyUpdates(alloc, state, updates, schema); + defer alloc.free(result); + + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + const items = parsed.value.object.get("items") orelse return error.TestUnexpectedResult; + try std.testing.expect(items == .array); + // Should be [99] (overwritten), not [1,2,3,99] (appended) + try std.testing.expectEqual(@as(usize, 1), items.array.items.len); + try std.testing.expectEqual(@as(i64, 99), items.array.items[0].integer); +} + +test "stripEphemeralKeys removes ephemeral keys" { + const alloc = std.testing.allocator; + const state = + \\{"messages":["hello"],"temp_data":"scratch","count":5} + ; + const schema = + \\{"messages":{"type":"array","reducer":"append"},"temp_data":{"type":"string","reducer":"last_value","ephemeral":true},"count":{"type":"number","reducer":"add"}} + ; + + const result = try stripEphemeralKeys(alloc, state, schema); + defer alloc.free(result); + + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .object); + // temp_data should be stripped + try std.testing.expect(parsed.value.object.get("temp_data") == null); + // messages and count should remain + try std.testing.expect(parsed.value.object.get("messages") != null); + try std.testing.expect(parsed.value.object.get("count") != null); +} + +test "stripEphemeralKeys no-op when no ephemeral keys" { + const alloc = std.testing.allocator; + const state = + \\{"messages":["hello"],"count":5} + ; + const schema = + \\{"messages":{"type":"array","reducer":"append"},"count":{"type":"number","reducer":"add"}} + ; + + const result = try stripEphemeralKeys(alloc, state, schema); + defer alloc.free(result); + + const parsed = try parseTestJson(alloc, result); + defer parsed.deinit(); + try std.testing.expect(parsed.value == .object); + try std.testing.expect(parsed.value.object.get("messages") != null); + try std.testing.expect(parsed.value.object.get("count") != null); +} + +test "stripEphemeralKeys with empty state" { + const alloc = std.testing.allocator; + const result = try stripEphemeralKeys(alloc, "{}", "{}"); + defer alloc.free(result); + try std.testing.expectEqualStrings("{}", result); +} diff --git a/src/store.zig b/src/store.zig index 4eeb2dc..ebf0ca5 100644 --- a/src/store.zig +++ b/src/store.zig @@ -140,6 +140,17 @@ pub const Store = struct { } return error.MigrationFailed; } + + // Migration 004 — orchestration schema (workflows, checkpoints, agent_events) + const sql_004 = @embedFile("migrations/004_orchestration.sql"); + prc = c.sqlite3_exec(self.db, sql_004.ptr, null, null, &err_msg); + if (prc != c.SQLITE_OK) { + if (err_msg) |msg| { + log.err("migration 004 failed (rc={d}): {s}", .{ prc, std.mem.span(msg) }); + c.sqlite3_free(msg); + } + return error.MigrationFailed; + } } pub fn beginTransaction(self: *Self) !void { @@ -392,7 +403,7 @@ pub const Store = struct { } pub fn getRun(self: *Self, allocator: std.mem.Allocator, id: []const u8) !?types.RunRow { - const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE id = ?"; + const sql = "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id FROM runs WHERE id = ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; @@ -407,19 +418,23 @@ pub const Store = struct { .id = try allocStr(allocator, stmt, 0), .idempotency_key = try allocStrOpt(allocator, stmt, 1), .status = try allocStr(allocator, stmt, 2), - .workflow_json = try allocStr(allocator, stmt, 3), - .input_json = try allocStr(allocator, stmt, 4), - .callbacks_json = try allocStr(allocator, stmt, 5), - .error_text = try allocStrOpt(allocator, stmt, 6), - .created_at_ms = colInt(stmt, 7), - .updated_at_ms = colInt(stmt, 8), - .started_at_ms = colIntOpt(stmt, 9), - .ended_at_ms = colIntOpt(stmt, 10), + .workflow_id = try allocStrOpt(allocator, stmt, 3), + .workflow_json = try allocStr(allocator, stmt, 4), + .input_json = try allocStr(allocator, stmt, 5), + .callbacks_json = try allocStr(allocator, stmt, 6), + .error_text = try allocStrOpt(allocator, stmt, 7), + .created_at_ms = colInt(stmt, 8), + .updated_at_ms = colInt(stmt, 9), + .started_at_ms = colIntOpt(stmt, 10), + .ended_at_ms = colIntOpt(stmt, 11), + .state_json = try allocStrOpt(allocator, stmt, 12), + .config_json = try allocStrOpt(allocator, stmt, 13), + .parent_run_id = try allocStrOpt(allocator, stmt, 14), }; } pub fn getRunByIdempotencyKey(self: *Self, allocator: std.mem.Allocator, key: []const u8) !?types.RunRow { - const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE idempotency_key = ? ORDER BY created_at_ms DESC LIMIT 1"; + const sql = "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id FROM runs WHERE idempotency_key = ? ORDER BY created_at_ms DESC LIMIT 1"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; @@ -433,35 +448,45 @@ pub const Store = struct { .id = try allocStr(allocator, stmt, 0), .idempotency_key = try allocStrOpt(allocator, stmt, 1), .status = try allocStr(allocator, stmt, 2), - .workflow_json = try allocStr(allocator, stmt, 3), - .input_json = try allocStr(allocator, stmt, 4), - .callbacks_json = try allocStr(allocator, stmt, 5), - .error_text = try allocStrOpt(allocator, stmt, 6), - .created_at_ms = colInt(stmt, 7), - .updated_at_ms = colInt(stmt, 8), - .started_at_ms = colIntOpt(stmt, 9), - .ended_at_ms = colIntOpt(stmt, 10), + .workflow_id = try allocStrOpt(allocator, stmt, 3), + .workflow_json = try allocStr(allocator, stmt, 4), + .input_json = try allocStr(allocator, stmt, 5), + .callbacks_json = try allocStr(allocator, stmt, 6), + .error_text = try allocStrOpt(allocator, stmt, 7), + .created_at_ms = colInt(stmt, 8), + .updated_at_ms = colInt(stmt, 9), + .started_at_ms = colIntOpt(stmt, 10), + .ended_at_ms = colIntOpt(stmt, 11), + .state_json = try allocStrOpt(allocator, stmt, 12), + .config_json = try allocStrOpt(allocator, stmt, 13), + .parent_run_id = try allocStrOpt(allocator, stmt, 14), }; } - pub fn listRuns(self: *Self, allocator: std.mem.Allocator, status_filter: ?[]const u8, limit: i64, offset: i64) ![]types.RunRow { + pub fn listRuns(self: *Self, allocator: std.mem.Allocator, status_filter: ?[]const u8, workflow_id_filter: ?[]const u8, limit: i64, offset: i64) ![]types.RunRow { var stmt: ?*c.sqlite3_stmt = null; - if (status_filter != null) { - const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE status = ? ORDER BY created_at_ms DESC LIMIT ? OFFSET ?"; - if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { - return error.SqlitePrepareFailed; - } - _ = c.sqlite3_bind_text(stmt, 1, status_filter.?.ptr, @intCast(status_filter.?.len), SQLITE_STATIC); - _ = c.sqlite3_bind_int64(stmt, 2, limit); - _ = c.sqlite3_bind_int64(stmt, 3, offset); + const sql = + "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id " ++ + "FROM runs WHERE (? IS NULL OR status = ?) AND (? IS NULL OR workflow_id = ?) ORDER BY created_at_ms DESC LIMIT ? OFFSET ?"; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + if (status_filter) |status| { + _ = c.sqlite3_bind_text(stmt, 1, status.ptr, @intCast(status.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, status.ptr, @intCast(status.len), SQLITE_STATIC); } else { - const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs ORDER BY created_at_ms DESC LIMIT ? OFFSET ?"; - if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { - return error.SqlitePrepareFailed; - } - _ = c.sqlite3_bind_int64(stmt, 1, limit); - _ = c.sqlite3_bind_int64(stmt, 2, offset); + _ = c.sqlite3_bind_null(stmt, 1); + _ = c.sqlite3_bind_null(stmt, 2); + } + if (workflow_id_filter) |workflow_id| { + _ = c.sqlite3_bind_text(stmt, 3, workflow_id.ptr, @intCast(workflow_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 4, workflow_id.ptr, @intCast(workflow_id.len), SQLITE_STATIC); + } else { + _ = c.sqlite3_bind_null(stmt, 3); + _ = c.sqlite3_bind_null(stmt, 4); } + _ = c.sqlite3_bind_int64(stmt, 5, limit); + _ = c.sqlite3_bind_int64(stmt, 6, offset); defer _ = c.sqlite3_finalize(stmt); var list: std.ArrayListUnmanaged(types.RunRow) = .empty; @@ -470,14 +495,18 @@ pub const Store = struct { .id = try allocStr(allocator, stmt, 0), .idempotency_key = try allocStrOpt(allocator, stmt, 1), .status = try allocStr(allocator, stmt, 2), - .workflow_json = try allocStr(allocator, stmt, 3), - .input_json = try allocStr(allocator, stmt, 4), - .callbacks_json = try allocStr(allocator, stmt, 5), - .error_text = try allocStrOpt(allocator, stmt, 6), - .created_at_ms = colInt(stmt, 7), - .updated_at_ms = colInt(stmt, 8), - .started_at_ms = colIntOpt(stmt, 9), - .ended_at_ms = colIntOpt(stmt, 10), + .workflow_id = try allocStrOpt(allocator, stmt, 3), + .workflow_json = try allocStr(allocator, stmt, 4), + .input_json = try allocStr(allocator, stmt, 5), + .callbacks_json = try allocStr(allocator, stmt, 6), + .error_text = try allocStrOpt(allocator, stmt, 7), + .created_at_ms = colInt(stmt, 8), + .updated_at_ms = colInt(stmt, 9), + .started_at_ms = colIntOpt(stmt, 10), + .ended_at_ms = colIntOpt(stmt, 11), + .state_json = try allocStrOpt(allocator, stmt, 12), + .config_json = try allocStrOpt(allocator, stmt, 13), + .parent_run_id = try allocStrOpt(allocator, stmt, 14), }); } return list.toOwnedSlice(allocator); @@ -502,7 +531,7 @@ pub const Store = struct { } pub fn getActiveRuns(self: *Self, allocator: std.mem.Allocator) ![]types.RunRow { - const sql = "SELECT id, idempotency_key, status, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms FROM runs WHERE status IN ('running', 'paused') ORDER BY created_at_ms DESC"; + const sql = "SELECT id, idempotency_key, status, workflow_id, workflow_json, input_json, callbacks_json, error_text, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, state_json, config_json, parent_run_id FROM runs WHERE status = 'running' ORDER BY created_at_ms DESC"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; @@ -515,14 +544,18 @@ pub const Store = struct { .id = try allocStr(allocator, stmt, 0), .idempotency_key = try allocStrOpt(allocator, stmt, 1), .status = try allocStr(allocator, stmt, 2), - .workflow_json = try allocStr(allocator, stmt, 3), - .input_json = try allocStr(allocator, stmt, 4), - .callbacks_json = try allocStr(allocator, stmt, 5), - .error_text = try allocStrOpt(allocator, stmt, 6), - .created_at_ms = colInt(stmt, 7), - .updated_at_ms = colInt(stmt, 8), - .started_at_ms = colIntOpt(stmt, 9), - .ended_at_ms = colIntOpt(stmt, 10), + .workflow_id = try allocStrOpt(allocator, stmt, 3), + .workflow_json = try allocStr(allocator, stmt, 4), + .input_json = try allocStr(allocator, stmt, 5), + .callbacks_json = try allocStr(allocator, stmt, 6), + .error_text = try allocStrOpt(allocator, stmt, 7), + .created_at_ms = colInt(stmt, 8), + .updated_at_ms = colInt(stmt, 9), + .started_at_ms = colIntOpt(stmt, 10), + .ended_at_ms = colIntOpt(stmt, 11), + .state_json = try allocStrOpt(allocator, stmt, 12), + .config_json = try allocStrOpt(allocator, stmt, 13), + .parent_run_id = try allocStrOpt(allocator, stmt, 14), }); } return list.toOwnedSlice(allocator); @@ -693,13 +726,10 @@ pub const Store = struct { } } - pub fn getReadySteps(self: *Self, allocator: std.mem.Allocator, run_id: []const u8) ![]types.StepRow { - const sql = - "SELECT s.id, s.run_id, s.def_step_id, s.type, s.status, s.worker_id, s.input_json, s.output_json, s.error_text, s.attempt, s.max_attempts, s.timeout_ms, s.next_attempt_at_ms, s.parent_step_id, s.item_index, s.created_at_ms, s.updated_at_ms, s.started_at_ms, s.ended_at_ms, s.child_run_id, s.iteration_index " ++ - "FROM steps s WHERE s.run_id = ? AND s.status = 'ready' " ++ - "AND NOT EXISTS (" ++ - "SELECT 1 FROM step_deps d JOIN steps dep ON dep.id = d.depends_on " ++ - "WHERE d.step_id = s.id AND dep.status NOT IN ('completed', 'skipped'))"; + /// Get a retrying step for a given run and node name (def_step_id). + /// Returns the step if it exists with status='ready' and next_attempt_at_ms set. + pub fn getRetryingStepForNode(self: *Self, allocator: std.mem.Allocator, run_id: []const u8, node_name: []const u8) !?types.StepRow { + const sql = "SELECT id, run_id, def_step_id, type, status, worker_id, input_json, output_json, error_text, attempt, max_attempts, timeout_ms, next_attempt_at_ms, parent_step_id, item_index, created_at_ms, updated_at_ms, started_at_ms, ended_at_ms, child_run_id, iteration_index FROM steps WHERE run_id = ? AND def_step_id = ? AND status = 'ready' AND next_attempt_at_ms IS NOT NULL ORDER BY created_at_ms DESC LIMIT 1"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; @@ -707,12 +737,11 @@ pub const Store = struct { defer _ = c.sqlite3_finalize(stmt); _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, node_name.ptr, @intCast(node_name.len), SQLITE_STATIC); - var list: std.ArrayListUnmanaged(types.StepRow) = .empty; - while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { - try list.append(allocator, try readStepRow(allocator, stmt)); - } - return list.toOwnedSlice(allocator); + if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null; + + return try readStepRow(allocator, stmt); } pub fn countStepsByStatus(self: *Self, run_id: []const u8, status: []const u8) !i64 { @@ -747,55 +776,55 @@ pub const Store = struct { return list.toOwnedSlice(allocator); } - /// Get the IDs of steps that a given step depends on. - pub fn getStepDeps(self: *Self, allocator: std.mem.Allocator, step_id: []const u8) ![][]const u8 { - const sql = "SELECT depends_on FROM step_deps WHERE step_id = ?"; + /// Delete steps for a run that were created after a given timestamp. + /// Used during replay to remove steps that will be re-executed. + pub fn deleteStepsAfterTimestamp(self: *Self, run_id: []const u8, after_ms: i64) !void { + const sql = "DELETE FROM steps WHERE run_id = ? AND created_at_ms > ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 2, after_ms); - var list: std.ArrayListUnmanaged([]const u8) = .empty; - while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { - try list.append(allocator, try allocStr(allocator, stmt, 0)); + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; } - return list.toOwnedSlice(allocator); } - /// Count how many running tasks a worker currently has. - pub fn countRunningStepsByWorker(self: *Self, worker_id: []const u8) !i64 { - const sql = "SELECT COUNT(*) FROM steps WHERE worker_id = ? AND status = 'running'"; + /// Delete checkpoints for a run with version greater than a given version. + /// Used during replay to remove checkpoints that will be superseded. + pub fn deleteCheckpointsAfterVersion(self: *Self, run_id: []const u8, after_version: i64) !void { + const sql = "DELETE FROM checkpoints WHERE run_id = ? AND version > ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, worker_id.ptr, @intCast(worker_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 2, after_version); - if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return 0; - return colInt(stmt, 0); + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } } - /// Set started_at_ms for a step (used by wait steps to track timer start). - pub fn setStepStartedAt(self: *Self, step_id: []const u8, ts_ms: i64) !void { - const sql = "UPDATE steps SET started_at_ms = ?, updated_at_ms = ? WHERE id = ?"; + /// Count how many running tasks a worker currently has. + pub fn countRunningStepsByWorker(self: *Self, worker_id: []const u8) !i64 { + const sql = "SELECT COUNT(*) FROM steps WHERE worker_id = ? AND status = 'running'"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_int64(stmt, 1, ts_ms); - _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs()); - _ = c.sqlite3_bind_text(stmt, 3, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 1, worker_id.ptr, @intCast(worker_id.len), SQLITE_STATIC); - if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { - return error.SqliteStepFailed; - } + if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return 0; + return colInt(stmt, 0); } fn readStepRow(allocator: std.mem.Allocator, stmt: ?*c.sqlite3_stmt) !types.StepRow { @@ -1084,134 +1113,268 @@ pub const Store = struct { }; } - // ── Cycle State CRUD ───────────────────────────────────────────── + // ── Sub-workflow Helper ────────────────────────────────────────── - pub fn getCycleState(self: *Self, run_id: []const u8, cycle_key: []const u8) !?struct { iteration_count: i64, max_iterations: i64 } { - const sql = "SELECT iteration_count, max_iterations FROM cycle_state WHERE run_id = ? AND cycle_key = ?"; + pub fn updateStepInputJson(self: *Self, step_id: []const u8, input_json: []const u8) !void { + const sql = "UPDATE steps SET input_json = ? WHERE id = ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 2, cycle_key.ptr, @intCast(cycle_key.len), SQLITE_STATIC); - - if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null; + _ = c.sqlite3_bind_text(stmt, 1, input_json.ptr, @intCast(input_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); - return .{ - .iteration_count = colInt(stmt, 0), - .max_iterations = colInt(stmt, 1), - }; + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } } - pub fn upsertCycleState(self: *Self, run_id: []const u8, cycle_key: []const u8, iteration_count: i64, max_iterations: i64) !void { - const sql = "INSERT OR REPLACE INTO cycle_state (run_id, cycle_key, iteration_count, max_iterations) VALUES (?, ?, ?, ?)"; + pub fn updateStepChildRunId(self: *Self, step_id: []const u8, child_run_id: []const u8) !void { + const sql = "UPDATE steps SET child_run_id = ? WHERE id = ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 2, cycle_key.ptr, @intCast(cycle_key.len), SQLITE_STATIC); - _ = c.sqlite3_bind_int64(stmt, 3, iteration_count); - _ = c.sqlite3_bind_int64(stmt, 4, max_iterations); + _ = c.sqlite3_bind_text(stmt, 1, child_run_id.ptr, @intCast(child_run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { return error.SqliteStepFailed; } } - // ── Chat Message CRUD ──────────────────────────────────────────── + // ── Workflow CRUD ───────────────────────────────────────────────── + + pub fn createWorkflow(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8) !void { + return self.createWorkflowWithVersion(id, name, definition_json, 1); + } - pub fn insertChatMessage(self: *Self, run_id: []const u8, step_id: []const u8, round: i64, role: []const u8, worker_id: ?[]const u8, message: []const u8) !void { - const sql = "INSERT INTO chat_messages (run_id, step_id, round, role, worker_id, message, ts_ms) VALUES (?, ?, ?, ?, ?, ?, ?)"; + pub fn createWorkflowWithVersion(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8, version: i64) !void { + const sql = "INSERT INTO workflows (id, name, definition_json, version, created_at_ms, updated_at_ms) VALUES (?, ?, ?, ?, ?, ?)"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_int64(stmt, 3, round); - _ = c.sqlite3_bind_text(stmt, 4, role.ptr, @intCast(role.len), SQLITE_STATIC); - bindTextOpt(stmt, 5, worker_id); - _ = c.sqlite3_bind_text(stmt, 6, message.ptr, @intCast(message.len), SQLITE_STATIC); - _ = c.sqlite3_bind_int64(stmt, 7, ids.nowMs()); + const now = ids.nowMs(); + _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, name.ptr, @intCast(name.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 3, definition_json.ptr, @intCast(definition_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 4, version); + _ = c.sqlite3_bind_int64(stmt, 5, now); + _ = c.sqlite3_bind_int64(stmt, 6, now); if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { return error.SqliteStepFailed; } } - pub fn getChatMessages(self: *Self, allocator: std.mem.Allocator, step_id: []const u8) ![]types.ChatMessageRow { - const sql = "SELECT id, run_id, step_id, round, role, worker_id, message, ts_ms FROM chat_messages WHERE step_id = ? ORDER BY round, id"; + pub fn getWorkflow(self: *Self, alloc: std.mem.Allocator, id: []const u8) !?types.WorkflowRow { + const sql = "SELECT id, name, definition_json, version, created_at_ms, updated_at_ms FROM workflows WHERE id = ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null; + + return types.WorkflowRow{ + .id = try allocStr(alloc, stmt, 0), + .name = try allocStr(alloc, stmt, 1), + .definition_json = try allocStr(alloc, stmt, 2), + .version = colInt(stmt, 3), + .created_at_ms = colInt(stmt, 4), + .updated_at_ms = colInt(stmt, 5), + }; + } + + pub fn listWorkflows(self: *Self, alloc: std.mem.Allocator) ![]types.WorkflowRow { + const sql = "SELECT id, name, definition_json, version, created_at_ms, updated_at_ms FROM workflows ORDER BY created_at_ms DESC"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); - var list: std.ArrayListUnmanaged(types.ChatMessageRow) = .empty; + var list: std.ArrayListUnmanaged(types.WorkflowRow) = .empty; while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { - try list.append(allocator, .{ - .id = colInt(stmt, 0), - .run_id = try allocStr(allocator, stmt, 1), - .step_id = try allocStr(allocator, stmt, 2), - .round = colInt(stmt, 3), - .role = try allocStr(allocator, stmt, 4), - .worker_id = try allocStrOpt(allocator, stmt, 5), - .message = try allocStr(allocator, stmt, 6), - .ts_ms = colInt(stmt, 7), + try list.append(alloc, .{ + .id = try allocStr(alloc, stmt, 0), + .name = try allocStr(alloc, stmt, 1), + .definition_json = try allocStr(alloc, stmt, 2), + .version = colInt(stmt, 3), + .created_at_ms = colInt(stmt, 4), + .updated_at_ms = colInt(stmt, 5), }); } - return list.toOwnedSlice(allocator); + return list.toOwnedSlice(alloc); + } + + pub fn updateWorkflow(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8) !void { + return self.updateWorkflowWithVersion(id, name, definition_json, null); } - // ── Saga State CRUD ────────────────────────────────────────────── + pub fn updateWorkflowWithVersion(self: *Self, id: []const u8, name: []const u8, definition_json: []const u8, version: ?i64) !void { + if (version) |v| { + const sql = "UPDATE workflows SET name = ?, definition_json = ?, version = ?, updated_at_ms = ? WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, name.ptr, @intCast(name.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, definition_json.ptr, @intCast(definition_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 3, v); + _ = c.sqlite3_bind_int64(stmt, 4, ids.nowMs()); + _ = c.sqlite3_bind_text(stmt, 5, id.ptr, @intCast(id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } else { + const sql = "UPDATE workflows SET name = ?, definition_json = ?, updated_at_ms = ? WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, name.ptr, @intCast(name.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, definition_json.ptr, @intCast(definition_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 3, ids.nowMs()); + _ = c.sqlite3_bind_text(stmt, 4, id.ptr, @intCast(id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + } - pub fn insertSagaState(self: *Self, run_id: []const u8, saga_step_id: []const u8, body_step_id: []const u8, compensation_step_id: ?[]const u8) !void { - const sql = "INSERT INTO saga_state (run_id, saga_step_id, body_step_id, compensation_step_id, status) VALUES (?, ?, ?, ?, 'pending')"; + pub fn deleteWorkflow(self: *Self, id: []const u8) !void { + const sql = "DELETE FROM workflows WHERE id = ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 2, saga_step_id.ptr, @intCast(saga_step_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 3, body_step_id.ptr, @intCast(body_step_id.len), SQLITE_STATIC); - bindTextOpt(stmt, 4, compensation_step_id); + _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC); if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { return error.SqliteStepFailed; } } - pub fn updateSagaState(self: *Self, run_id: []const u8, saga_step_id: []const u8, body_step_id: []const u8, status: []const u8) !void { - const sql = "UPDATE saga_state SET status = ? WHERE run_id = ? AND saga_step_id = ? AND body_step_id = ?"; + // ── Token Accounting ────────────────────────────────────────────── + + pub fn updateStepTokens(self: *Self, step_id: []const u8, input_tokens: i64, output_tokens: i64) !void { + const sql = "UPDATE steps SET input_tokens = ?, output_tokens = ?, total_tokens = ? WHERE id = ?"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, status.ptr, @intCast(status.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 1, input_tokens); + _ = c.sqlite3_bind_int64(stmt, 2, output_tokens); + _ = c.sqlite3_bind_int64(stmt, 3, input_tokens + output_tokens); + _ = c.sqlite3_bind_text(stmt, 4, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + pub fn updateRunTokens(self: *Self, run_id: []const u8, input_delta: i64, output_delta: i64) !void { + const sql = "UPDATE runs SET total_input_tokens = total_input_tokens + ?, total_output_tokens = total_output_tokens + ?, total_tokens = total_tokens + ? WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_int64(stmt, 1, input_delta); + _ = c.sqlite3_bind_int64(stmt, 2, output_delta); + _ = c.sqlite3_bind_int64(stmt, 3, input_delta + output_delta); + _ = c.sqlite3_bind_text(stmt, 4, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + pub fn getRunTokens(self: *Self, run_id: []const u8) !struct { input: i64, output: i64, total: i64 } { + const sql = "SELECT COALESCE(total_input_tokens, 0), COALESCE(total_output_tokens, 0), COALESCE(total_tokens, 0) FROM runs WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_ROW) { + return .{ .input = 0, .output = 0, .total = 0 }; + } + + return .{ + .input = colInt(stmt, 0), + .output = colInt(stmt, 1), + .total = colInt(stmt, 2), + }; + } + + // ── Checkpoint CRUD ─────────────────────────────────────────────── + + pub fn createCheckpoint(self: *Self, id: []const u8, run_id: []const u8, step_id: []const u8, parent_id: ?[]const u8, state_json: []const u8, completed_nodes_json: []const u8, version: i64, metadata_json: ?[]const u8) !void { + const sql = "INSERT INTO checkpoints (id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC); _ = c.sqlite3_bind_text(stmt, 2, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 3, saga_step_id.ptr, @intCast(saga_step_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 4, body_step_id.ptr, @intCast(body_step_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 3, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + bindTextOpt(stmt, 4, parent_id); + _ = c.sqlite3_bind_text(stmt, 5, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 6, completed_nodes_json.ptr, @intCast(completed_nodes_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 7, version); + bindTextOpt(stmt, 8, metadata_json); + _ = c.sqlite3_bind_int64(stmt, 9, ids.nowMs()); if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { return error.SqliteStepFailed; } } - pub fn getSagaStates(self: *Self, allocator: std.mem.Allocator, run_id: []const u8, saga_step_id: []const u8) ![]types.SagaStateRow { - const sql = "SELECT run_id, saga_step_id, body_step_id, compensation_step_id, status FROM saga_state WHERE run_id = ? AND saga_step_id = ? ORDER BY rowid"; + pub fn getCheckpoint(self: *Self, alloc: std.mem.Allocator, id: []const u8) !?types.CheckpointRow { + const sql = "SELECT id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms FROM checkpoints WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null; + + return try readCheckpointRow(alloc, stmt); + } + + pub fn listCheckpoints(self: *Self, alloc: std.mem.Allocator, run_id: []const u8) ![]types.CheckpointRow { + const sql = "SELECT id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms FROM checkpoints WHERE run_id = ? ORDER BY version ASC"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; @@ -1219,88 +1382,445 @@ pub const Store = struct { defer _ = c.sqlite3_finalize(stmt); _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); - _ = c.sqlite3_bind_text(stmt, 2, saga_step_id.ptr, @intCast(saga_step_id.len), SQLITE_STATIC); - var list: std.ArrayListUnmanaged(types.SagaStateRow) = .empty; + var list: std.ArrayListUnmanaged(types.CheckpointRow) = .empty; while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { - try list.append(allocator, .{ - .run_id = try allocStr(allocator, stmt, 0), - .saga_step_id = try allocStr(allocator, stmt, 1), - .body_step_id = try allocStr(allocator, stmt, 2), - .compensation_step_id = try allocStrOpt(allocator, stmt, 3), - .status = try allocStr(allocator, stmt, 4), - }); + try list.append(alloc, try readCheckpointRow(alloc, stmt)); } - return list.toOwnedSlice(allocator); + return list.toOwnedSlice(alloc); } - // ── Sub-workflow Helper ────────────────────────────────────────── + pub fn getLatestCheckpoint(self: *Self, alloc: std.mem.Allocator, run_id: []const u8) !?types.CheckpointRow { + const sql = "SELECT id, run_id, step_id, parent_id, state_json, completed_nodes_json, version, metadata_json, created_at_ms FROM checkpoints WHERE run_id = ? ORDER BY version DESC LIMIT 1"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); - pub fn updateStepInputJson(self: *Self, step_id: []const u8, input_json: []const u8) !void { - const sql = "UPDATE steps SET input_json = ? WHERE id = ?"; + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null; + + return try readCheckpointRow(alloc, stmt); + } + + fn readCheckpointRow(alloc: std.mem.Allocator, stmt: ?*c.sqlite3_stmt) !types.CheckpointRow { + return .{ + .id = try allocStr(alloc, stmt, 0), + .run_id = try allocStr(alloc, stmt, 1), + .step_id = try allocStr(alloc, stmt, 2), + .parent_id = try allocStrOpt(alloc, stmt, 3), + .state_json = try allocStr(alloc, stmt, 4), + .completed_nodes_json = try allocStr(alloc, stmt, 5), + .version = colInt(stmt, 6), + .metadata_json = try allocStrOpt(alloc, stmt, 7), + .created_at_ms = colInt(stmt, 8), + }; + } + + // ── Agent Event CRUD ────────────────────────────────────────────── + + pub fn createAgentEvent(self: *Self, run_id: []const u8, step_id: []const u8, iteration: i64, tool: ?[]const u8, args_json: ?[]const u8, result_text: ?[]const u8, status: []const u8) !void { + const sql = "INSERT INTO agent_events (run_id, step_id, iteration, tool, args_json, result_text, status, created_at_ms) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, input_json.ptr, @intCast(input_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 3, iteration); + bindTextOpt(stmt, 4, tool); + bindTextOpt(stmt, 5, args_json); + bindTextOpt(stmt, 6, result_text); + _ = c.sqlite3_bind_text(stmt, 7, status.ptr, @intCast(status.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 8, ids.nowMs()); if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { return error.SqliteStepFailed; } } - pub fn updateStepChildRunId(self: *Self, step_id: []const u8, child_run_id: []const u8) !void { - const sql = "UPDATE steps SET child_run_id = ? WHERE id = ?"; + pub fn listAgentEvents(self: *Self, alloc: std.mem.Allocator, run_id: []const u8, step_id: []const u8) ![]types.AgentEventRow { + const sql = "SELECT id, run_id, step_id, iteration, tool, args_json, result_text, status, created_at_ms FROM agent_events WHERE run_id = ? AND step_id = ? ORDER BY id ASC"; var stmt: ?*c.sqlite3_stmt = null; if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { return error.SqlitePrepareFailed; } defer _ = c.sqlite3_finalize(stmt); - _ = c.sqlite3_bind_text(stmt, 1, child_run_id.ptr, @intCast(child_run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + var list: std.ArrayListUnmanaged(types.AgentEventRow) = .empty; + while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { + try list.append(alloc, .{ + .id = colInt(stmt, 0), + .run_id = try allocStr(alloc, stmt, 1), + .step_id = try allocStr(alloc, stmt, 2), + .iteration = colInt(stmt, 3), + .tool = try allocStrOpt(alloc, stmt, 4), + .args_json = try allocStrOpt(alloc, stmt, 5), + .result_text = try allocStrOpt(alloc, stmt, 6), + .status = try allocStr(alloc, stmt, 7), + .created_at_ms = colInt(stmt, 8), + }); + } + return list.toOwnedSlice(alloc); + } + + // ── Run State Management ────────────────────────────────────────── + + pub fn updateRunState(self: *Self, run_id: []const u8, state_json: []const u8) !void { + const sql = "UPDATE runs SET state_json = ?, updated_at_ms = ? WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs()); + _ = c.sqlite3_bind_text(stmt, 3, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { return error.SqliteStepFailed; } } -}; -// ── Tests ───────────────────────────────────────────────────────────── + pub fn incrementCheckpointCount(self: *Self, run_id: []const u8) !void { + const sql = "UPDATE runs SET checkpoint_count = COALESCE(checkpoint_count, 0) + 1, updated_at_ms = ? WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); -test "Store: init and deinit" { - const allocator = std.testing.allocator; - var s = try Store.init(allocator, ":memory:"); - defer s.deinit(); -} + _ = c.sqlite3_bind_int64(stmt, 1, ids.nowMs()); + _ = c.sqlite3_bind_text(stmt, 2, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); -test "Store: insert and get worker" { - const allocator = std.testing.allocator; - var s = try Store.init(allocator, ":memory:"); - defer s.deinit(); - try s.insertWorker("w1", "http://localhost:3001", "tok", "webhook", null, "[\"coder\"]", 3, "config"); - const w = (try s.getWorker(allocator, "w1")).?; - defer allocator.free(w.id); - defer allocator.free(w.url); - defer allocator.free(w.token); - defer allocator.free(w.protocol); - if (w.model) |m| allocator.free(m); - defer allocator.free(w.tags_json); - defer allocator.free(w.source); - defer allocator.free(w.status); - try std.testing.expectEqualStrings("w1", w.id); - try std.testing.expectEqualStrings("http://localhost:3001", w.url); - try std.testing.expectEqualStrings("webhook", w.protocol); - try std.testing.expect(w.model == null); - try std.testing.expectEqual(@as(i64, 3), w.max_concurrent); -} + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } -test "Store: insert and list workers" { - const allocator = std.testing.allocator; - var s = try Store.init(allocator, ":memory:"); + pub fn createRunWithState(self: *Self, id: []const u8, workflow_id: ?[]const u8, workflow_json: []const u8, input_json: []const u8, state_json: []const u8) !void { + return self.createRunWithStateAndStatus(id, workflow_id, workflow_json, input_json, state_json, "pending"); + } + + /// Create a run with explicit initial status. Use "running" to avoid the + /// race window between creating with "pending" and updating to "running". + pub fn createRunWithStateAndStatus(self: *Self, id: []const u8, workflow_id: ?[]const u8, workflow_json: []const u8, input_json: []const u8, state_json: []const u8, status: []const u8) !void { + const sql = "INSERT INTO runs (id, status, workflow_id, workflow_json, input_json, callbacks_json, state_json, created_at_ms, updated_at_ms) VALUES (?, ?, ?, ?, ?, '[]', ?, ?, ?)"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + const now = ids.nowMs(); + _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, status.ptr, @intCast(status.len), SQLITE_STATIC); + bindTextOpt(stmt, 3, workflow_id); + _ = c.sqlite3_bind_text(stmt, 4, workflow_json.ptr, @intCast(workflow_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 5, input_json.ptr, @intCast(input_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 6, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 7, now); + _ = c.sqlite3_bind_int64(stmt, 8, now); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + pub fn setParentRunId(self: *Self, run_id: []const u8, parent_run_id: []const u8) !void { + const sql = "UPDATE runs SET parent_run_id = ?, updated_at_ms = ? WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, parent_run_id.ptr, @intCast(parent_run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs()); + _ = c.sqlite3_bind_text(stmt, 3, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + pub fn setConfigJson(self: *Self, run_id: []const u8, config_json: []const u8) !void { + const sql = "UPDATE runs SET config_json = ?, updated_at_ms = ? WHERE id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, config_json.ptr, @intCast(config_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 2, ids.nowMs()); + _ = c.sqlite3_bind_text(stmt, 3, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + pub fn createForkedRun(self: *Self, id: []const u8, workflow_json: []const u8, state_json: []const u8, forked_from_run_id: []const u8, forked_from_checkpoint_id: []const u8) !void { + const sql = "INSERT INTO runs (id, status, workflow_json, input_json, callbacks_json, state_json, forked_from_run_id, forked_from_checkpoint_id, created_at_ms, updated_at_ms) VALUES (?, 'pending', ?, '{}', '[]', ?, ?, ?, ?, ?)"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + const now = ids.nowMs(); + _ = c.sqlite3_bind_text(stmt, 1, id.ptr, @intCast(id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, workflow_json.ptr, @intCast(workflow_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 3, state_json.ptr, @intCast(state_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 4, forked_from_run_id.ptr, @intCast(forked_from_run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 5, forked_from_checkpoint_id.ptr, @intCast(forked_from_checkpoint_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 6, now); + _ = c.sqlite3_bind_int64(stmt, 7, now); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + // ── Pending State Injection CRUD ────────────────────────────────── + + pub fn createPendingInjection(self: *Self, run_id: []const u8, updates_json: []const u8, apply_after_step: ?[]const u8) !void { + const sql = "INSERT INTO pending_state_injections (run_id, updates_json, apply_after_step, created_at_ms) VALUES (?, ?, ?, ?)"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, updates_json.ptr, @intCast(updates_json.len), SQLITE_STATIC); + bindTextOpt(stmt, 3, apply_after_step); + _ = c.sqlite3_bind_int64(stmt, 4, ids.nowMs()); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + pub fn consumePendingInjections(self: *Self, alloc: std.mem.Allocator, run_id: []const u8, completed_step: []const u8) ![]types.PendingInjectionRow { + // Select injections where apply_after_step matches the completed step or is NULL + const sql = "SELECT id, run_id, updates_json, apply_after_step, created_at_ms FROM pending_state_injections WHERE run_id = ? AND (apply_after_step IS NULL OR apply_after_step = ?) ORDER BY id ASC"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, completed_step.ptr, @intCast(completed_step.len), SQLITE_STATIC); + + var list: std.ArrayListUnmanaged(types.PendingInjectionRow) = .empty; + while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { + try list.append(alloc, .{ + .id = colInt(stmt, 0), + .run_id = try allocStr(alloc, stmt, 1), + .updates_json = try allocStr(alloc, stmt, 2), + .apply_after_step = try allocStrOpt(alloc, stmt, 3), + .created_at_ms = colInt(stmt, 4), + }); + } + + const result = try list.toOwnedSlice(alloc); + + // Delete consumed injections + if (result.len > 0) { + const del_sql = "DELETE FROM pending_state_injections WHERE run_id = ? AND (apply_after_step IS NULL OR apply_after_step = ?)"; + var del_stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, del_sql, -1, &del_stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(del_stmt); + + _ = c.sqlite3_bind_text(del_stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(del_stmt, 2, completed_step.ptr, @intCast(completed_step.len), SQLITE_STATIC); + + if (c.sqlite3_step(del_stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + return result; + } + + pub fn discardPendingInjections(self: *Self, run_id: []const u8) !void { + const sql = "DELETE FROM pending_state_injections WHERE run_id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + // ── Node Cache (Gap 3) ─────────────────────────────────────────── + + pub fn getCachedResult(self: *Self, alloc: std.mem.Allocator, cache_key: []const u8) !?[]const u8 { + const sql = "SELECT result_json, created_at_ms, ttl_ms FROM node_cache WHERE cache_key = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, cache_key.ptr, @intCast(cache_key.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_ROW) return null; + + const result_json = try allocStr(alloc, stmt, 0); + const created_at_ms = colInt(stmt, 1); + const ttl_ms = colIntOpt(stmt, 2); + + // Check expiration + if (ttl_ms) |ttl| { + const now_ms = ids.nowMs(); + if (now_ms - created_at_ms > ttl) { + // Expired — delete and return null + const del_sql = "DELETE FROM node_cache WHERE cache_key = ?"; + var del_stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, del_sql, -1, &del_stmt, null) == c.SQLITE_OK) { + _ = c.sqlite3_bind_text(del_stmt, 1, cache_key.ptr, @intCast(cache_key.len), SQLITE_STATIC); + _ = c.sqlite3_step(del_stmt); + _ = c.sqlite3_finalize(del_stmt); + } + alloc.free(result_json); + return null; + } + } + + return result_json; + } + + pub fn setCachedResult(self: *Self, cache_key: []const u8, node_name: []const u8, result_json: []const u8, ttl_ms: ?i64) !void { + const sql = "INSERT OR REPLACE INTO node_cache (cache_key, node_name, result_json, created_at_ms, ttl_ms) VALUES (?, ?, ?, ?, ?)"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, cache_key.ptr, @intCast(cache_key.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, node_name.ptr, @intCast(node_name.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 3, result_json.ptr, @intCast(result_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 4, ids.nowMs()); + bindIntOpt(stmt, 5, ttl_ms); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + // ── Pending Writes (Gap 4) ─────────────────────────────────────── + + pub fn savePendingWrite(self: *Self, run_id: []const u8, step_id: []const u8, channel: []const u8, value_json: []const u8) !void { + const sql = "INSERT INTO pending_writes (run_id, step_id, channel, value_json, created_at_ms) VALUES (?, ?, ?, ?, ?)"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 2, step_id.ptr, @intCast(step_id.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 3, channel.ptr, @intCast(channel.len), SQLITE_STATIC); + _ = c.sqlite3_bind_text(stmt, 4, value_json.ptr, @intCast(value_json.len), SQLITE_STATIC); + _ = c.sqlite3_bind_int64(stmt, 5, ids.nowMs()); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } + + pub fn getPendingWrites(self: *Self, alloc: std.mem.Allocator, run_id: []const u8) ![]types.PendingWriteRow { + const sql = "SELECT id, run_id, step_id, channel, value_json, created_at_ms FROM pending_writes WHERE run_id = ? ORDER BY id ASC"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + var list: std.ArrayListUnmanaged(types.PendingWriteRow) = .empty; + while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { + try list.append(alloc, .{ + .id = colInt(stmt, 0), + .run_id = try allocStr(alloc, stmt, 1), + .step_id = try allocStr(alloc, stmt, 2), + .channel = try allocStr(alloc, stmt, 3), + .value_json = try allocStr(alloc, stmt, 4), + .created_at_ms = colInt(stmt, 5), + }); + } + return list.toOwnedSlice(alloc); + } + + pub fn clearPendingWrites(self: *Self, run_id: []const u8) !void { + const sql = "DELETE FROM pending_writes WHERE run_id = ?"; + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(self.db, sql, -1, &stmt, null) != c.SQLITE_OK) { + return error.SqlitePrepareFailed; + } + defer _ = c.sqlite3_finalize(stmt); + + _ = c.sqlite3_bind_text(stmt, 1, run_id.ptr, @intCast(run_id.len), SQLITE_STATIC); + + if (c.sqlite3_step(stmt) != c.SQLITE_DONE) { + return error.SqliteStepFailed; + } + } +}; + +// ── Tests ───────────────────────────────────────────────────────────── + +test "Store: init and deinit" { + const allocator = std.testing.allocator; + var s = try Store.init(allocator, ":memory:"); + defer s.deinit(); +} + +test "Store: insert and get worker" { + const allocator = std.testing.allocator; + var s = try Store.init(allocator, ":memory:"); + defer s.deinit(); + try s.insertWorker("w1", "http://localhost:3001", "tok", "webhook", null, "[\"coder\"]", 3, "config"); + const w = (try s.getWorker(allocator, "w1")).?; + defer allocator.free(w.id); + defer allocator.free(w.url); + defer allocator.free(w.token); + defer allocator.free(w.protocol); + if (w.model) |m| allocator.free(m); + defer allocator.free(w.tags_json); + defer allocator.free(w.source); + defer allocator.free(w.status); + try std.testing.expectEqualStrings("w1", w.id); + try std.testing.expectEqualStrings("http://localhost:3001", w.url); + try std.testing.expectEqualStrings("webhook", w.protocol); + try std.testing.expect(w.model == null); + try std.testing.expectEqual(@as(i64, 3), w.max_concurrent); +} + +test "Store: insert and list workers" { + const allocator = std.testing.allocator; + var s = try Store.init(allocator, ":memory:"); defer s.deinit(); try s.insertWorker("w1", "http://localhost:3001", "tok", "webhook", null, "[]", 1, "config"); try s.insertWorker("w2", "http://localhost:3002", "tok", "webhook", null, "[]", 2, "registered"); @@ -1386,6 +1906,7 @@ test "Store: insert and get run" { allocator.free(run.id); if (run.idempotency_key) |ik| allocator.free(ik); allocator.free(run.status); + if (run.workflow_id) |wid| allocator.free(wid); allocator.free(run.workflow_json); allocator.free(run.input_json); allocator.free(run.callbacks_json); @@ -1421,6 +1942,7 @@ test "Store: transaction commit persists inserted run" { allocator.free(run.id); if (run.idempotency_key) |ik| allocator.free(ik); allocator.free(run.status); + if (run.workflow_id) |wid| allocator.free(wid); allocator.free(run.workflow_json); allocator.free(run.input_json); allocator.free(run.callbacks_json); @@ -1435,34 +1957,67 @@ test "Store: list runs with filter" { try s.insertRun("r1", null, "running", "{}", "{}", "[]"); try s.insertRun("r2", null, "pending", "{}", "{}", "[]"); try s.insertRun("r3", null, "running", "{}", "{}", "[]"); + try s.createWorkflow("wf_filter", "Filter WF", "{\"nodes\":{}}"); + try s.createRunWithState("r4", "wf_filter", "{\"nodes\":{}}", "{}", "{}"); - const running = try s.listRuns(allocator, "running", 100, 0); + const running = try s.listRuns(allocator, "running", null, 100, 0); defer { for (running) |r| { allocator.free(r.id); if (r.idempotency_key) |ik| allocator.free(ik); allocator.free(r.status); + if (r.workflow_id) |wid| allocator.free(wid); allocator.free(r.workflow_json); allocator.free(r.input_json); allocator.free(r.callbacks_json); + if (r.error_text) |et| allocator.free(et); + if (r.state_json) |sj| allocator.free(sj); + if (r.config_json) |cj| allocator.free(cj); + if (r.parent_run_id) |pid| allocator.free(pid); } allocator.free(running); } try std.testing.expectEqual(@as(usize, 2), running.len); - const all = try s.listRuns(allocator, null, 100, 0); + const all = try s.listRuns(allocator, null, null, 100, 0); defer { for (all) |r| { allocator.free(r.id); if (r.idempotency_key) |ik| allocator.free(ik); allocator.free(r.status); + if (r.workflow_id) |wid| allocator.free(wid); allocator.free(r.workflow_json); allocator.free(r.input_json); allocator.free(r.callbacks_json); + if (r.error_text) |et| allocator.free(et); + if (r.state_json) |sj| allocator.free(sj); + if (r.config_json) |cj| allocator.free(cj); + if (r.parent_run_id) |pid| allocator.free(pid); } allocator.free(all); } - try std.testing.expectEqual(@as(usize, 3), all.len); + try std.testing.expectEqual(@as(usize, 4), all.len); + + const filtered = try s.listRuns(allocator, null, "wf_filter", 100, 0); + defer { + for (filtered) |r| { + allocator.free(r.id); + if (r.idempotency_key) |ik| allocator.free(ik); + allocator.free(r.status); + if (r.workflow_id) |wid| allocator.free(wid); + allocator.free(r.workflow_json); + allocator.free(r.input_json); + allocator.free(r.callbacks_json); + if (r.error_text) |et| allocator.free(et); + if (r.state_json) |sj| allocator.free(sj); + if (r.config_json) |cj| allocator.free(cj); + if (r.parent_run_id) |pid| allocator.free(pid); + } + allocator.free(filtered); + } + try std.testing.expectEqual(@as(usize, 1), filtered.len); + try std.testing.expectEqualStrings("r4", filtered[0].id); + try std.testing.expectEqualStrings("wf_filter", filtered[0].workflow_id.?); } test "Store: update run status" { @@ -1476,6 +2031,7 @@ test "Store: update run status" { allocator.free(run.id); if (run.idempotency_key) |ik| allocator.free(ik); allocator.free(run.status); + if (run.workflow_id) |wid| allocator.free(wid); allocator.free(run.workflow_json); allocator.free(run.input_json); allocator.free(run.callbacks_json); @@ -1491,8 +2047,7 @@ test "Store: get active runs" { defer s.deinit(); try s.insertRun("r1", null, "running", "{}", "{}", "[]"); try s.insertRun("r2", null, "pending", "{}", "{}", "[]"); - try s.insertRun("r3", null, "paused", "{}", "{}", "[]"); - try s.insertRun("r4", null, "completed", "{}", "{}", "[]"); + try s.insertRun("r3", null, "completed", "{}", "{}", "[]"); const active = try s.getActiveRuns(allocator); defer { @@ -1500,40 +2055,18 @@ test "Store: get active runs" { allocator.free(r.id); if (r.idempotency_key) |ik| allocator.free(ik); allocator.free(r.status); + if (r.workflow_id) |wid| allocator.free(wid); allocator.free(r.workflow_json); allocator.free(r.input_json); allocator.free(r.callbacks_json); + if (r.error_text) |et| allocator.free(et); + if (r.state_json) |sj| allocator.free(sj); + if (r.config_json) |cj| allocator.free(cj); + if (r.parent_run_id) |pid| allocator.free(pid); } allocator.free(active); } - try std.testing.expectEqual(@as(usize, 2), active.len); -} - -test "Store: step deps and ready steps" { - const allocator = std.testing.allocator; - var s = try Store.init(allocator, ":memory:"); - defer s.deinit(); - - try s.insertRun("r1", null, "running", "{}", "{}", "[]"); - try s.insertStep("s1", "r1", "step1", "task", "ready", "{}", 1, null, null, null); - try s.insertStep("s2", "r1", "step2", "task", "ready", "{}", 1, null, null, null); - try s.insertStepDep("s2", "s1"); - - // s1 should be ready (no unsatisfied deps), s2 should NOT (depends on s1 which is 'ready' not 'completed') - const ready = try s.getReadySteps(allocator, "r1"); - defer { - for (ready) |step| { - allocator.free(step.id); - allocator.free(step.run_id); - allocator.free(step.def_step_id); - allocator.free(step.type); - allocator.free(step.status); - allocator.free(step.input_json); - } - allocator.free(ready); - } - try std.testing.expectEqual(@as(usize, 1), ready.len); - try std.testing.expectEqualStrings("s1", ready[0].id); + try std.testing.expectEqual(@as(usize, 1), active.len); } test "Store: count steps by status" { @@ -1679,137 +2212,444 @@ test "Store: get nonexistent step returns null" { try std.testing.expect(step == null); } -test "cycle state: upsert and get" { +test "updateStepChildRunId: sets child_run_id on step" { const allocator = std.testing.allocator; var s = try Store.init(allocator, ":memory:"); defer s.deinit(); - // Insert a run first (cycle_state references runs(id)) + // Create a run and step try s.insertRun("r1", null, "running", "{}", "{}", "[]"); + try s.insertRun("child_r1", null, "running", "{}", "{}", "[]"); + try s.insertStep("s1", "r1", "sub_wf", "sub_workflow", "running", "{}", 1, null, null, null); + + // Update child_run_id + try s.updateStepChildRunId("s1", "child_r1"); + + // Get step and verify child_run_id is set + const step = (try s.getStep(allocator, "s1")).?; + defer { + allocator.free(step.id); + allocator.free(step.run_id); + allocator.free(step.def_step_id); + allocator.free(step.type); + allocator.free(step.status); + allocator.free(step.input_json); + if (step.child_run_id) |crid| allocator.free(crid); + } + try std.testing.expectEqualStrings("child_r1", step.child_run_id.?); +} + +test "workflow CRUD" { + const allocator = std.testing.allocator; + var s = try Store.init(allocator, ":memory:"); + defer s.deinit(); + + // Create + try s.createWorkflow("wf1", "My Workflow", "{\"steps\":[]}"); - // Upsert cycle state - try s.upsertCycleState("r1", "loop_A", 1, 10); + // Get + const wf = (try s.getWorkflow(allocator, "wf1")).?; + defer { + allocator.free(wf.id); + allocator.free(wf.name); + allocator.free(wf.definition_json); + } + try std.testing.expectEqualStrings("wf1", wf.id); + try std.testing.expectEqualStrings("My Workflow", wf.name); + try std.testing.expectEqualStrings("{\"steps\":[]}", wf.definition_json); + try std.testing.expect(wf.created_at_ms > 0); + try std.testing.expect(wf.updated_at_ms > 0); + + // Update + try s.updateWorkflow("wf1", "Updated Workflow", "{\"steps\":[{\"id\":\"s1\"}]}"); + const wf2 = (try s.getWorkflow(allocator, "wf1")).?; + defer { + allocator.free(wf2.id); + allocator.free(wf2.name); + allocator.free(wf2.definition_json); + } + try std.testing.expectEqualStrings("Updated Workflow", wf2.name); + try std.testing.expectEqualStrings("{\"steps\":[{\"id\":\"s1\"}]}", wf2.definition_json); - // Get and verify values - const cs = (try s.getCycleState("r1", "loop_A")).?; - try std.testing.expectEqual(@as(i64, 1), cs.iteration_count); - try std.testing.expectEqual(@as(i64, 10), cs.max_iterations); + // List + try s.createWorkflow("wf2", "Second Workflow", "{}"); + const workflows = try s.listWorkflows(allocator); + defer { + for (workflows) |w| { + allocator.free(w.id); + allocator.free(w.name); + allocator.free(w.definition_json); + } + allocator.free(workflows); + } + try std.testing.expectEqual(@as(usize, 2), workflows.len); - // Upsert again with new iteration_count - try s.upsertCycleState("r1", "loop_A", 5, 10); + // Delete + try s.deleteWorkflow("wf1"); + const deleted = try s.getWorkflow(allocator, "wf1"); + try std.testing.expect(deleted == null); - // Verify updated value - const cs2 = (try s.getCycleState("r1", "loop_A")).?; - try std.testing.expectEqual(@as(i64, 5), cs2.iteration_count); - try std.testing.expectEqual(@as(i64, 10), cs2.max_iterations); + // Remaining list + const remaining = try s.listWorkflows(allocator); + defer { + for (remaining) |w| { + allocator.free(w.id); + allocator.free(w.name); + allocator.free(w.definition_json); + } + allocator.free(remaining); + } + try std.testing.expectEqual(@as(usize, 1), remaining.len); + try std.testing.expectEqualStrings("wf2", remaining[0].id); } -test "cycle state: get returns null for nonexistent" { +test "checkpoint lifecycle" { const allocator = std.testing.allocator; var s = try Store.init(allocator, ":memory:"); defer s.deinit(); - const cs = try s.getCycleState("no_run", "no_key"); - try std.testing.expect(cs == null); + // Create a run + try s.insertRun("r1", null, "running", "{}", "{}", "[]"); + + // Create checkpoints with parent chain + try s.createCheckpoint("cp1", "r1", "step_a", null, "{\"x\":1}", "[\"step_a\"]", 1, null); + try s.createCheckpoint("cp2", "r1", "step_b", "cp1", "{\"x\":2}", "[\"step_a\",\"step_b\"]", 2, "{\"note\":\"test\"}"); + try s.createCheckpoint("cp3", "r1", "step_c", "cp2", "{\"x\":3}", "[\"step_a\",\"step_b\",\"step_c\"]", 3, null); + + // Get single checkpoint + const cp1 = (try s.getCheckpoint(allocator, "cp1")).?; + defer { + allocator.free(cp1.id); + allocator.free(cp1.run_id); + allocator.free(cp1.step_id); + if (cp1.parent_id) |pid| allocator.free(pid); + allocator.free(cp1.state_json); + allocator.free(cp1.completed_nodes_json); + if (cp1.metadata_json) |mj| allocator.free(mj); + } + try std.testing.expectEqualStrings("cp1", cp1.id); + try std.testing.expectEqualStrings("r1", cp1.run_id); + try std.testing.expectEqualStrings("step_a", cp1.step_id); + try std.testing.expect(cp1.parent_id == null); + try std.testing.expectEqualStrings("{\"x\":1}", cp1.state_json); + try std.testing.expectEqual(@as(i64, 1), cp1.version); + try std.testing.expect(cp1.metadata_json == null); + + // Get checkpoint with parent and metadata + const cp2 = (try s.getCheckpoint(allocator, "cp2")).?; + defer { + allocator.free(cp2.id); + allocator.free(cp2.run_id); + allocator.free(cp2.step_id); + if (cp2.parent_id) |pid| allocator.free(pid); + allocator.free(cp2.state_json); + allocator.free(cp2.completed_nodes_json); + if (cp2.metadata_json) |mj| allocator.free(mj); + } + try std.testing.expectEqualStrings("cp1", cp2.parent_id.?); + try std.testing.expectEqualStrings("{\"note\":\"test\"}", cp2.metadata_json.?); + + // List checkpoints (ordered by version ASC) + const cps = try s.listCheckpoints(allocator, "r1"); + defer { + for (cps) |cp| { + allocator.free(cp.id); + allocator.free(cp.run_id); + allocator.free(cp.step_id); + if (cp.parent_id) |pid| allocator.free(pid); + allocator.free(cp.state_json); + allocator.free(cp.completed_nodes_json); + if (cp.metadata_json) |mj| allocator.free(mj); + } + allocator.free(cps); + } + try std.testing.expectEqual(@as(usize, 3), cps.len); + try std.testing.expectEqualStrings("cp1", cps[0].id); + try std.testing.expectEqualStrings("cp3", cps[2].id); + + // Get latest checkpoint + const latest = (try s.getLatestCheckpoint(allocator, "r1")).?; + defer { + allocator.free(latest.id); + allocator.free(latest.run_id); + allocator.free(latest.step_id); + if (latest.parent_id) |pid| allocator.free(pid); + allocator.free(latest.state_json); + allocator.free(latest.completed_nodes_json); + if (latest.metadata_json) |mj| allocator.free(mj); + } + try std.testing.expectEqualStrings("cp3", latest.id); + try std.testing.expectEqual(@as(i64, 3), latest.version); + + // Get nonexistent checkpoint + const none = try s.getCheckpoint(allocator, "nonexistent"); + try std.testing.expect(none == null); + + // Get latest for run with no checkpoints + const no_latest = try s.getLatestCheckpoint(allocator, "no_run"); + try std.testing.expect(no_latest == null); } -test "chat messages: insert and get ordered by round" { +test "agent events" { const allocator = std.testing.allocator; var s = try Store.init(allocator, ":memory:"); defer s.deinit(); + // Create a run try s.insertRun("r1", null, "running", "{}", "{}", "[]"); - try s.insertStep("s1", "r1", "chat_step", "group_chat", "running", "{}", 1, null, null, null); - // Insert messages with different rounds (out of order) - try s.insertChatMessage("r1", "s1", 2, "assistant", "w1", "round 2 message"); - try s.insertChatMessage("r1", "s1", 1, "user", null, "round 1 message"); - try s.insertChatMessage("r1", "s1", 1, "assistant", "w1", "round 1 reply"); + // Create agent events + try s.createAgentEvent("r1", "step_a", 1, "read_file", "{\"path\":\"foo.txt\"}", "contents here", "completed"); + try s.createAgentEvent("r1", "step_a", 2, "write_file", "{\"path\":\"bar.txt\"}", null, "completed"); + try s.createAgentEvent("r1", "step_a", 3, null, null, null, "thinking"); + try s.createAgentEvent("r1", "step_b", 1, "search", "{}", "results", "completed"); - // Verify getChatMessages returns them ordered by round, id - const msgs = try s.getChatMessages(allocator, "s1"); + // List by run+step + const events_a = try s.listAgentEvents(allocator, "r1", "step_a"); + defer { + for (events_a) |ev| { + allocator.free(ev.run_id); + allocator.free(ev.step_id); + if (ev.tool) |t| allocator.free(t); + if (ev.args_json) |a| allocator.free(a); + if (ev.result_text) |r| allocator.free(r); + allocator.free(ev.status); + } + allocator.free(events_a); + } + try std.testing.expectEqual(@as(usize, 3), events_a.len); + try std.testing.expectEqualStrings("read_file", events_a[0].tool.?); + try std.testing.expectEqual(@as(i64, 1), events_a[0].iteration); + try std.testing.expectEqualStrings("contents here", events_a[0].result_text.?); + try std.testing.expect(events_a[2].tool == null); + try std.testing.expectEqualStrings("thinking", events_a[2].status); + + // List different step + const events_b = try s.listAgentEvents(allocator, "r1", "step_b"); defer { - for (msgs) |m| { - allocator.free(m.run_id); - allocator.free(m.step_id); - allocator.free(m.role); - if (m.worker_id) |wid| allocator.free(wid); - allocator.free(m.message); - } - allocator.free(msgs); - } - try std.testing.expectEqual(@as(usize, 3), msgs.len); - // First two should be round 1 (ordered by id within round) - try std.testing.expectEqual(@as(i64, 1), msgs[0].round); - try std.testing.expectEqual(@as(i64, 1), msgs[1].round); - try std.testing.expectEqual(@as(i64, 2), msgs[2].round); - try std.testing.expectEqualStrings("round 1 message", msgs[0].message); - try std.testing.expectEqualStrings("round 1 reply", msgs[1].message); - try std.testing.expectEqualStrings("round 2 message", msgs[2].message); + for (events_b) |ev| { + allocator.free(ev.run_id); + allocator.free(ev.step_id); + if (ev.tool) |t| allocator.free(t); + if (ev.args_json) |a| allocator.free(a); + if (ev.result_text) |r| allocator.free(r); + allocator.free(ev.status); + } + allocator.free(events_b); + } + try std.testing.expectEqual(@as(usize, 1), events_b.len); + try std.testing.expectEqualStrings("search", events_b[0].tool.?); + + // Empty list for nonexistent + const empty = try s.listAgentEvents(allocator, "r1", "nonexistent"); + defer allocator.free(empty); + try std.testing.expectEqual(@as(usize, 0), empty.len); } -test "saga state: insert, update status, and get" { +test "pending state injections" { const allocator = std.testing.allocator; var s = try Store.init(allocator, ":memory:"); defer s.deinit(); + // Create a run try s.insertRun("r1", null, "running", "{}", "{}", "[]"); - try s.insertStep("saga1", "r1", "saga_def", "saga", "running", "{}", 1, null, null, null); - try s.insertStep("body1", "r1", "body_def1", "task", "pending", "{}", 1, null, "saga1", null); - try s.insertStep("body2", "r1", "body_def2", "task", "pending", "{}", 1, null, "saga1", null); - try s.insertStep("comp1", "r1", "comp_def1", "task", "pending", "{}", 1, null, "saga1", null); - // Insert saga states for body steps - try s.insertSagaState("r1", "saga1", "body1", "comp1"); - try s.insertSagaState("r1", "saga1", "body2", null); + // Create pending injections + try s.createPendingInjection("r1", "{\"counter\":5}", "step_a"); + try s.createPendingInjection("r1", "{\"flag\":true}", "step_b"); + try s.createPendingInjection("r1", "{\"immediate\":1}", null); // apply immediately (NULL apply_after_step) - // Update one to 'completed' - try s.updateSagaState("r1", "saga1", "body1", "completed"); + // Consume by step_a -- should get the step_a injection and the NULL one + const consumed_a = try s.consumePendingInjections(allocator, "r1", "step_a"); + defer { + for (consumed_a) |inj| { + allocator.free(inj.run_id); + allocator.free(inj.updates_json); + if (inj.apply_after_step) |s_a| allocator.free(s_a); + } + allocator.free(consumed_a); + } + try std.testing.expectEqual(@as(usize, 2), consumed_a.len); + try std.testing.expectEqualStrings("{\"counter\":5}", consumed_a[0].updates_json); + try std.testing.expectEqualStrings("{\"immediate\":1}", consumed_a[1].updates_json); + + // Consuming again for step_a should return empty (already consumed) + const consumed_again = try s.consumePendingInjections(allocator, "r1", "step_a"); + defer allocator.free(consumed_again); + try std.testing.expectEqual(@as(usize, 0), consumed_again.len); - // Verify getSagaStates returns correct statuses - const states = try s.getSagaStates(allocator, "r1", "saga1"); + // step_b injection should still be pending + const consumed_b = try s.consumePendingInjections(allocator, "r1", "step_b"); defer { - for (states) |st| { - allocator.free(st.run_id); - allocator.free(st.saga_step_id); - allocator.free(st.body_step_id); - if (st.compensation_step_id) |cid| allocator.free(cid); - allocator.free(st.status); - } - allocator.free(states); - } - try std.testing.expectEqual(@as(usize, 2), states.len); - try std.testing.expectEqualStrings("body1", states[0].body_step_id); - try std.testing.expectEqualStrings("completed", states[0].status); - try std.testing.expectEqualStrings("comp1", states[0].compensation_step_id.?); - try std.testing.expectEqualStrings("body2", states[1].body_step_id); - try std.testing.expectEqualStrings("pending", states[1].status); - try std.testing.expect(states[1].compensation_step_id == null); + for (consumed_b) |inj| { + allocator.free(inj.run_id); + allocator.free(inj.updates_json); + if (inj.apply_after_step) |s_a| allocator.free(s_a); + } + allocator.free(consumed_b); + } + try std.testing.expectEqual(@as(usize, 1), consumed_b.len); + try std.testing.expectEqualStrings("{\"flag\":true}", consumed_b[0].updates_json); + + // Test discard + try s.createPendingInjection("r1", "{\"discard_me\":true}", "step_c"); + try s.discardPendingInjections("r1"); + const after_discard = try s.consumePendingInjections(allocator, "r1", "step_c"); + defer allocator.free(after_discard); + try std.testing.expectEqual(@as(usize, 0), after_discard.len); } -test "updateStepChildRunId: sets child_run_id on step" { +test "run state management" { const allocator = std.testing.allocator; var s = try Store.init(allocator, ":memory:"); defer s.deinit(); - // Create a run and step - try s.insertRun("r1", null, "running", "{}", "{}", "[]"); - try s.insertRun("child_r1", null, "running", "{}", "{}", "[]"); - try s.insertStep("s1", "r1", "sub_wf", "sub_workflow", "running", "{}", 1, null, null, null); + // Create run with state + try s.createRunWithState("r1", null, "{\"steps\":[]}", "{\"input\":1}", "{\"counter\":0}"); + const run = (try s.getRun(allocator, "r1")).?; + defer { + allocator.free(run.id); + if (run.idempotency_key) |ik| allocator.free(ik); + allocator.free(run.status); + if (run.workflow_id) |wid| allocator.free(wid); + allocator.free(run.workflow_json); + allocator.free(run.input_json); + allocator.free(run.callbacks_json); + if (run.error_text) |et| allocator.free(et); + if (run.state_json) |sj| allocator.free(sj); + } + try std.testing.expectEqualStrings("r1", run.id); + try std.testing.expectEqualStrings("pending", run.status); + try std.testing.expectEqualStrings("{\"steps\":[]}", run.workflow_json); - // Update child_run_id - try s.updateStepChildRunId("s1", "child_r1"); + // Create run with workflow_id + try s.createWorkflow("wf1", "Test WF", "{\"steps\":[]}"); + try s.createRunWithState("r2", "wf1", "{\"steps\":[]}", "{}", "{}"); + const run2 = (try s.getRun(allocator, "r2")).?; + defer { + allocator.free(run2.id); + if (run2.idempotency_key) |ik| allocator.free(ik); + allocator.free(run2.status); + if (run2.workflow_id) |wid| allocator.free(wid); + allocator.free(run2.workflow_json); + allocator.free(run2.input_json); + allocator.free(run2.callbacks_json); + if (run2.error_text) |et| allocator.free(et); + if (run2.state_json) |sj| allocator.free(sj); + } + try std.testing.expectEqualStrings("r2", run2.id); + try std.testing.expectEqualStrings("wf1", run2.workflow_id.?); + + // Update run state + try s.updateRunState("r1", "{\"counter\":42}"); + + // Increment checkpoint count + try s.incrementCheckpointCount("r1"); + try s.incrementCheckpointCount("r1"); + + // Create forked run + try s.createCheckpoint("cp1", "r1", "step_a", null, "{}", "[]", 1, null); + try s.createForkedRun("r3", "{\"steps\":[]}", "{\"counter\":42}", "r1", "cp1"); + const forked = (try s.getRun(allocator, "r3")).?; + defer { + allocator.free(forked.id); + if (forked.idempotency_key) |ik| allocator.free(ik); + allocator.free(forked.status); + if (forked.workflow_id) |wid| allocator.free(wid); + allocator.free(forked.workflow_json); + allocator.free(forked.input_json); + allocator.free(forked.callbacks_json); + if (forked.error_text) |et| allocator.free(et); + if (forked.state_json) |sj| allocator.free(sj); + } + try std.testing.expectEqualStrings("r3", forked.id); + try std.testing.expectEqualStrings("pending", forked.status); +} - // Get step and verify child_run_id is set - const step = (try s.getStep(allocator, "s1")).?; +test "token accounting: update step and run tokens" { + const allocator = std.testing.allocator; + var s = try Store.init(allocator, ":memory:"); + defer s.deinit(); + + try s.createRunWithState("r-tok", null, "{}", "{}", "{}"); + try s.updateRunStatus("r-tok", "running", null); + try s.insertStep("s-tok", "r-tok", "task1", "task", "completed", "{}", 1, null, null, null); + + // Update step tokens + try s.updateStepTokens("s-tok", 100, 200); + + // Update run tokens + try s.updateRunTokens("r-tok", 100, 200); + + // Verify run tokens + const tokens = try s.getRunTokens("r-tok"); + try std.testing.expectEqual(@as(i64, 100), tokens.input); + try std.testing.expectEqual(@as(i64, 200), tokens.output); + try std.testing.expectEqual(@as(i64, 300), tokens.total); + + // Accumulate more tokens + try s.updateRunTokens("r-tok", 50, 75); + const tokens2 = try s.getRunTokens("r-tok"); + try std.testing.expectEqual(@as(i64, 150), tokens2.input); + try std.testing.expectEqual(@as(i64, 275), tokens2.output); + try std.testing.expectEqual(@as(i64, 425), tokens2.total); +} + +test "workflow version CRUD" { + const allocator = std.testing.allocator; + var s = try Store.init(allocator, ":memory:"); + defer s.deinit(); + + // Create workflow with default version (1) + try s.createWorkflow("wf1", "Test Workflow", "{\"nodes\":{}}"); + const wf1 = (try s.getWorkflow(allocator, "wf1")).?; defer { - allocator.free(step.id); - allocator.free(step.run_id); - allocator.free(step.def_step_id); - allocator.free(step.type); - allocator.free(step.status); - allocator.free(step.input_json); - if (step.child_run_id) |crid| allocator.free(crid); + allocator.free(wf1.id); + allocator.free(wf1.name); + allocator.free(wf1.definition_json); } - try std.testing.expectEqualStrings("child_r1", step.child_run_id.?); + try std.testing.expectEqual(@as(i64, 1), wf1.version); + + // Create workflow with explicit version + try s.createWorkflowWithVersion("wf2", "Versioned Workflow", "{\"nodes\":{}}", 5); + const wf2 = (try s.getWorkflow(allocator, "wf2")).?; + defer { + allocator.free(wf2.id); + allocator.free(wf2.name); + allocator.free(wf2.definition_json); + } + try std.testing.expectEqual(@as(i64, 5), wf2.version); + + // Update workflow with new version + try s.updateWorkflowWithVersion("wf2", "Updated", "{\"nodes\":{\"a\":{}}}", 6); + const wf3 = (try s.getWorkflow(allocator, "wf2")).?; + defer { + allocator.free(wf3.id); + allocator.free(wf3.name); + allocator.free(wf3.definition_json); + } + try std.testing.expectEqual(@as(i64, 6), wf3.version); + try std.testing.expectEqualStrings("Updated", wf3.name); + + // Update without changing version + try s.updateWorkflow("wf1", "Still v1", "{\"nodes\":{\"b\":{}}}"); + const wf4 = (try s.getWorkflow(allocator, "wf1")).?; + defer { + allocator.free(wf4.id); + allocator.free(wf4.name); + allocator.free(wf4.definition_json); + } + try std.testing.expectEqual(@as(i64, 1), wf4.version); + + // List workflows should include version + const workflows = try s.listWorkflows(allocator); + defer { + for (workflows) |w| { + allocator.free(w.id); + allocator.free(w.name); + allocator.free(w.definition_json); + } + allocator.free(workflows); + } + try std.testing.expectEqual(@as(usize, 2), workflows.len); } diff --git a/src/templates.zig b/src/templates.zig index d928522..0332f02 100644 --- a/src/templates.zig +++ b/src/templates.zig @@ -1,38 +1,39 @@ /// Template engine for prompt rendering. /// Resolves `{{...}}` expressions against workflow context. /// -/// Supported expressions: +/// Legacy Context + render(): /// - `{{input.X}}` -- look up key X in the workflow input JSON /// - `{{input.X.Y}}` -- nested object lookups inside workflow input JSON -/// - `{{steps.ID.output}}` -- output of a single completed step -/// - `{{steps.ID.outputs}}` -- JSON array of outputs from map/fan_out child steps /// - `{{item}}` -- current item string for map iterations -/// - `{{task.X}}` -- look up field X in the NullTickets task JSON (supports nested paths like `task.metadata.repo_url`) +/// - `{{task.X}}` -- look up field X in the NullTickets task JSON +/// - `{{attempt}}` -- current retry attempt number +/// +/// State-based renderTemplate(): +/// - `{{state.X}}` -- look up key X in the unified state JSON +/// - `{{state.X.Y}}` -- nested paths with optional [-1] array indexing +/// - `{{input.X}}` -- look up key X in the workflow input JSON +/// - `{{item}}` -- current item string for send iterations +/// - `{{store.ns.key}}` -- fetch NullTickets store entry value /// /// Conditional blocks: /// - `{% if %}...{% endif %}` /// - `{% if %}...{% else %}...{% endif %}` /// Conditionals are processed before expression substitution. /// Truthiness: non-null, non-empty, not "false", not "null" string values are truthy. - const std = @import("std"); // ── Context ─────────────────────────────────────────────────────────── pub const Context = struct { input_json: []const u8, // raw JSON string of workflow input - step_outputs: []const StepOutput, // completed step outputs - item: ?[]const u8, // current map item (null if not in map) - debate_responses: ?[]const u8 = null, // JSON array string for debate judge template - chat_history: ?[]const u8 = null, // formatted chat transcript for group_chat round_template - role: ?[]const u8 = null, // participant role for group_chat round_template + step_outputs: []const StepOutput, // completed step outputs (legacy, for tracker.zig) + item: ?[]const u8, // current item string (null if not in map/send) task_json: ?[]const u8 = null, // raw JSON string of NullTickets task data attempt: ?u32 = null, // current retry attempt number pub const StepOutput = struct { step_id: []const u8, output: ?[]const u8, // single output (for task steps) - outputs: ?[]const []const u8, // array of outputs (for fan_out/map parent) }; }; @@ -222,27 +223,6 @@ fn resolveExpression(allocator: std.mem.Allocator, expr: []const u8, ctx: Contex return error.ItemNotAvailable; } - if (std.mem.eql(u8, expr, "debate_responses")) { - if (ctx.debate_responses) |dr| { - return allocator.dupe(u8, dr) catch return error.OutOfMemory; - } - return allocator.dupe(u8, "[]") catch return error.OutOfMemory; - } - - if (std.mem.eql(u8, expr, "chat_history")) { - if (ctx.chat_history) |ch| { - return allocator.dupe(u8, ch) catch return error.OutOfMemory; - } - return allocator.dupe(u8, "") catch return error.OutOfMemory; - } - - if (std.mem.eql(u8, expr, "role")) { - if (ctx.role) |r| { - return allocator.dupe(u8, r) catch return error.OutOfMemory; - } - return allocator.dupe(u8, "") catch return error.OutOfMemory; - } - if (std.mem.eql(u8, expr, "attempt")) { if (ctx.attempt) |a| { return std.fmt.allocPrint(allocator, "{d}", .{a}) catch return error.OutOfMemory; @@ -292,7 +272,7 @@ fn resolveInputField(allocator: std.mem.Allocator, input_json: []const u8, field } fn resolveStepRef(allocator: std.mem.Allocator, rest: []const u8, step_outputs: []const Context.StepOutput) RenderError![]const u8 { - // rest is "ID.output" or "ID.outputs" + // rest is "ID.output" const dot_pos = std.mem.lastIndexOfScalar(u8, rest, '.') orelse return error.UnknownExpression; const step_id = rest[0..dot_pos]; const field = rest[dot_pos + 1 ..]; @@ -306,9 +286,6 @@ fn resolveStepRef(allocator: std.mem.Allocator, rest: []const u8, step_outputs: } return allocator.dupe(u8, "") catch return error.OutOfMemory; } - if (std.mem.eql(u8, field, "outputs")) { - return serializeOutputs(allocator, so.outputs); - } return error.UnknownExpression; } } @@ -336,38 +313,6 @@ fn resolveTaskField(allocator: std.mem.Allocator, task_json: []const u8, field_p return jsonValueToString(allocator, current); } -fn serializeOutputs(allocator: std.mem.Allocator, outputs: ?[]const []const u8) RenderError![]const u8 { - const items = outputs orelse { - return allocator.dupe(u8, "[]") catch return error.OutOfMemory; - }; - - var buf: std.ArrayListUnmanaged(u8) = .empty; - errdefer buf.deinit(allocator); - - buf.append(allocator, '[') catch return error.OutOfMemory; - for (items, 0..) |item, i| { - if (i > 0) { - buf.append(allocator, ',') catch return error.OutOfMemory; - } - // Write JSON-escaped string - buf.append(allocator, '"') catch return error.OutOfMemory; - for (item) |c| { - switch (c) { - '"' => buf.appendSlice(allocator, "\\\"") catch return error.OutOfMemory, - '\\' => buf.appendSlice(allocator, "\\\\") catch return error.OutOfMemory, - '\n' => buf.appendSlice(allocator, "\\n") catch return error.OutOfMemory, - '\r' => buf.appendSlice(allocator, "\\r") catch return error.OutOfMemory, - '\t' => buf.appendSlice(allocator, "\\t") catch return error.OutOfMemory, - else => buf.append(allocator, c) catch return error.OutOfMemory, - } - } - buf.append(allocator, '"') catch return error.OutOfMemory; - } - buf.append(allocator, ']') catch return error.OutOfMemory; - - return buf.toOwnedSlice(allocator) catch return error.OutOfMemory; -} - fn jsonValueToString(allocator: std.mem.Allocator, val: std.json.Value) RenderError![]const u8 { switch (val) { .string => |s| { @@ -403,7 +348,446 @@ fn jsonValueToString(allocator: std.mem.Allocator, val: std.json.Value) RenderEr } } -// ── Tests ───────────────────────────────────────────────────────────── +// ── New state-based template engine ─────────────────────────────────── + +const state_mod = @import("state.zig"); +const tracker_client = @import("tracker_client.zig"); +const Allocator = std.mem.Allocator; + +pub const StoreFetcher = *const fn ( + alloc: Allocator, + base_url: []const u8, + api_token: ?[]const u8, + namespace: []const u8, + key: []const u8, +) anyerror!?[]const u8; + +pub const StoreAccess = struct { + base_url: []const u8, + api_token: ?[]const u8 = null, + fetcher: StoreFetcher, +}; + +pub fn fetchStoreValueHttp( + alloc: Allocator, + base_url: []const u8, + api_token: ?[]const u8, + namespace: []const u8, + key: []const u8, +) !?[]const u8 { + var client = tracker_client.TrackerClient.init(alloc, base_url, api_token); + return client.storeGetValue(namespace, key); +} + +/// Strip surrounding double quotes from a JSON string value. +/// `"hello"` -> `hello`, `42` -> `42`, `[1,2]` -> `[1,2]` +fn stripJsonQuotes(s: []const u8) []const u8 { + if (s.len >= 2 and s[0] == '"' and s[s.len - 1] == '"') { + return s[1 .. s.len - 1]; + } + return s; +} + +/// Look up a value from a JSON blob by dotted path (no prefix stripping). +/// E.g. lookupJsonPath(alloc, '{"topic":"AI"}', "topic") -> "AI" +fn lookupJsonPath(alloc: Allocator, json_bytes: []const u8, path: []const u8) !?[]const u8 { + // Reuse state_mod.getStateValue but without "state." prefix. + // getStateValue strips "state." if present, otherwise uses path as-is. + return try state_mod.getStateValue(alloc, json_bytes, path); +} + +/// Resolve a template expression (the text inside `{{ }}`) to a string value. +/// Handles state.X, input.X, item, item.X expressions. +fn resolveNewExpression( + alloc: Allocator, + expr: []const u8, + state_json: []const u8, + input_json: ?[]const u8, + item_json: ?[]const u8, + store_access: ?StoreAccess, +) ![]const u8 { + if (std.mem.startsWith(u8, expr, "state.")) { + // Use getStateValue which handles "state." prefix, nested paths, [-1] indexing + const raw = try state_mod.getStateValue(alloc, state_json, expr); + if (raw) |r| { + // Strip quotes for strings; leave numbers/bools/arrays/objects as-is + const stripped = stripJsonQuotes(r); + if (stripped.ptr != r.ptr or stripped.len != r.len) { + // It was a quoted string — dupe the unquoted version and free the original + const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory; + alloc.free(r); + return result; + } + return r; + } + return alloc.dupe(u8, "") catch return error.OutOfMemory; + } + + if (std.mem.startsWith(u8, expr, "input.")) { + const ij = input_json orelse { + return alloc.dupe(u8, "") catch return error.OutOfMemory; + }; + const field = expr["input.".len..]; + const raw = try lookupJsonPath(alloc, ij, field); + if (raw) |r| { + const stripped = stripJsonQuotes(r); + if (stripped.ptr != r.ptr or stripped.len != r.len) { + const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory; + alloc.free(r); + return result; + } + return r; + } + return alloc.dupe(u8, "") catch return error.OutOfMemory; + } + + if (std.mem.eql(u8, expr, "item")) { + if (item_json) |ij| { + const stripped = stripJsonQuotes(ij); + return alloc.dupe(u8, stripped) catch return error.OutOfMemory; + } + return alloc.dupe(u8, "") catch return error.OutOfMemory; + } + + if (std.mem.startsWith(u8, expr, "item.")) { + const ij = item_json orelse { + return alloc.dupe(u8, "") catch return error.OutOfMemory; + }; + const field = expr["item.".len..]; + const raw = try lookupJsonPath(alloc, ij, field); + if (raw) |r| { + const stripped = stripJsonQuotes(r); + if (stripped.ptr != r.ptr or stripped.len != r.len) { + const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory; + alloc.free(r); + return result; + } + return r; + } + return alloc.dupe(u8, "") catch return error.OutOfMemory; + } + + // {{config.X}} — alias for {{state.__config.X}} + if (std.mem.startsWith(u8, expr, "config.")) { + const config_path = try std.fmt.allocPrint(alloc, "state.__config.{s}", .{expr["config.".len..]}); + defer alloc.free(config_path); + const raw = try state_mod.getStateValue(alloc, state_json, config_path); + if (raw) |r| { + const stripped = stripJsonQuotes(r); + if (stripped.ptr != r.ptr or stripped.len != r.len) { + const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory; + alloc.free(r); + return result; + } + return r; + } + return alloc.dupe(u8, "") catch return error.OutOfMemory; + } + + if (std.mem.startsWith(u8, expr, "store.")) { + const access = store_access orelse return error.StoreNotConfigured; + const store_expr = expr["store.".len..]; + const dot = std.mem.indexOfScalar(u8, store_expr, '.') orelse return error.InvalidStoreExpression; + const namespace = store_expr[0..dot]; + const key = store_expr[dot + 1 ..]; + if (namespace.len == 0 or key.len == 0) return error.InvalidStoreExpression; + + const raw = try access.fetcher(alloc, access.base_url, access.api_token, namespace, key); + if (raw) |r| { + const stripped = stripJsonQuotes(r); + if (stripped.ptr != r.ptr or stripped.len != r.len) { + const result = alloc.dupe(u8, stripped) catch return error.OutOfMemory; + alloc.free(r); + return result; + } + return r; + } + return alloc.dupe(u8, "") catch return error.OutOfMemory; + } + + // Unknown expression — return empty + return alloc.dupe(u8, "") catch return error.OutOfMemory; +} + +/// Check if a condition expression is truthy for the new template engine. +/// Truthy: non-null, non-empty, not "false", not "0", not "null", not empty array "[]" +fn isNewTruthy( + alloc: Allocator, + expr: []const u8, + state_json: []const u8, + input_json: ?[]const u8, + item_json: ?[]const u8, + store_access: ?StoreAccess, +) bool { + const value = resolveNewExpression(alloc, expr, state_json, input_json, item_json, store_access) catch return false; + defer alloc.free(value); + + if (value.len == 0) return false; + if (std.mem.eql(u8, value, "false")) return false; + if (std.mem.eql(u8, value, "0")) return false; + if (std.mem.eql(u8, value, "null")) return false; + if (std.mem.eql(u8, value, "[]")) return false; + return true; +} + +/// Process `{% if expr %}...{% endif %}` conditional blocks for the new engine. +fn processNewConditionals( + alloc: Allocator, + template: []const u8, + state_json: []const u8, + input_json: ?[]const u8, + item_json: ?[]const u8, + store_access: ?StoreAccess, +) ![]const u8 { + var result: std.ArrayListUnmanaged(u8) = .empty; + errdefer result.deinit(alloc); + + var pos: usize = 0; + + while (pos < template.len) { + if (std.mem.indexOfPos(u8, template, pos, "{%")) |open| { + result.appendSlice(alloc, template[pos..open]) catch return error.OutOfMemory; + + const after_open = open + 2; + const close = std.mem.indexOfPos(u8, template, after_open, "%}") orelse + return error.OutOfMemory; + const tag_content = std.mem.trim(u8, template[after_open..close], " \t\n\r"); + const after_tag = close + 2; + + if (std.mem.startsWith(u8, tag_content, "if ")) { + const expr = std.mem.trim(u8, tag_content["if ".len..], " \t\n\r"); + + // Find matching {% endif %} at this nesting level + var depth: usize = 0; + var scan: usize = after_tag; + var else_start: ?usize = null; + var else_end: ?usize = null; + var endif_start: ?usize = null; + var endif_end: ?usize = null; + + while (scan < template.len) { + if (std.mem.indexOfPos(u8, template, scan, "{%")) |inner_open| { + const inner_after = inner_open + 2; + const inner_close = std.mem.indexOfPos(u8, template, inner_after, "%}") orelse + return error.OutOfMemory; + const inner_tag = std.mem.trim(u8, template[inner_after..inner_close], " \t\n\r"); + const inner_after_tag = inner_close + 2; + + if (std.mem.startsWith(u8, inner_tag, "if ")) { + depth += 1; + scan = inner_after_tag; + } else if (std.mem.eql(u8, inner_tag, "else") and depth == 0) { + else_start = inner_open; + else_end = inner_after_tag; + scan = inner_after_tag; + } else if (std.mem.eql(u8, inner_tag, "endif")) { + if (depth == 0) { + endif_start = inner_open; + endif_end = inner_after_tag; + break; + } + depth -= 1; + scan = inner_after_tag; + } else { + scan = inner_after_tag; + } + } else { + break; + } + } + + if (endif_end == null) { + return error.OutOfMemory; + } + + const truthy = isNewTruthy(alloc, expr, state_json, input_json, item_json, store_access); + + if (truthy) { + const branch_end = else_start orelse endif_start.?; + const branch = template[after_tag..branch_end]; + const processed = try processNewConditionals(alloc, branch, state_json, input_json, item_json, store_access); + defer alloc.free(processed); + result.appendSlice(alloc, processed) catch return error.OutOfMemory; + } else { + if (else_end) |ee| { + const branch = template[ee..endif_start.?]; + const processed = try processNewConditionals(alloc, branch, state_json, input_json, item_json, store_access); + defer alloc.free(processed); + result.appendSlice(alloc, processed) catch return error.OutOfMemory; + } + } + + pos = endif_end.?; + } else { + result.appendSlice(alloc, template[open..after_tag]) catch return error.OutOfMemory; + pos = after_tag; + } + } else { + result.appendSlice(alloc, template[pos..]) catch return error.OutOfMemory; + break; + } + } + + return result.toOwnedSlice(alloc) catch return error.OutOfMemory; +} + +/// Render a template using the new state-based interpolation syntax. +/// +/// Supported expressions: +/// - `{{state.X}}` — state key value +/// - `{{state.X.Y}}` — nested state access +/// - `{{state.X[-1]}}` — last array element from state +/// - `{{input.X}}` — original input (read-only) +/// - `{{item}}` — current item in send context +/// - `{{item.X}}` — nested access on item +/// - `{% if state.X %}...{% endif %}` — conditionals +/// +/// Processing order: +/// 1. Process `{% if ... %}...{% endif %}` blocks +/// 2. Process `{{...}}` interpolations +pub fn renderTemplate( + alloc: Allocator, + template: []const u8, + state_json: []const u8, + input_json: ?[]const u8, + item_json: ?[]const u8, +) ![]const u8 { + return renderTemplateWithStore(alloc, template, state_json, input_json, item_json, null); +} + +pub fn renderTemplateWithStore( + alloc: Allocator, + template: []const u8, + state_json: []const u8, + input_json: ?[]const u8, + item_json: ?[]const u8, + store_access: ?StoreAccess, +) ![]const u8 { + // Phase 1: Process conditional blocks + const preprocessed = try processNewConditionals(alloc, template, state_json, input_json, item_json, store_access); + defer alloc.free(preprocessed); + + // Phase 2: Resolve {{expression}} substitutions + var result: std.ArrayListUnmanaged(u8) = .empty; + errdefer result.deinit(alloc); + + var pos: usize = 0; + + while (pos < preprocessed.len) { + if (std.mem.indexOfPos(u8, preprocessed, pos, "{{")) |open| { + result.appendSlice(alloc, preprocessed[pos..open]) catch return error.OutOfMemory; + + const after_open = open + 2; + if (std.mem.indexOfPos(u8, preprocessed, after_open, "}}")) |close| { + const raw_expr = preprocessed[after_open..close]; + const expr = std.mem.trim(u8, raw_expr, " \t\n\r"); + + const value = try resolveNewExpression(alloc, expr, state_json, input_json, item_json, store_access); + defer alloc.free(value); + + result.appendSlice(alloc, value) catch return error.OutOfMemory; + pos = close + 2; + } else { + // Unterminated — just append the rest as literal + result.appendSlice(alloc, preprocessed[pos..]) catch return error.OutOfMemory; + break; + } + } else { + result.appendSlice(alloc, preprocessed[pos..]) catch return error.OutOfMemory; + break; + } + } + + return result.toOwnedSlice(alloc) catch return error.OutOfMemory; +} + +// ── New template engine tests ───────────────────────────────────────── + +test "template state interpolation" { + const alloc = std.testing.allocator; + const s = "{\"name\":\"test\",\"count\":42}"; + const result = try renderTemplate(alloc, "Hello {{state.name}}, count={{state.count}}", s, null, null); + defer alloc.free(result); + try std.testing.expectEqualStrings("Hello test, count=42", result); +} + +test "template input interpolation" { + const alloc = std.testing.allocator; + const result = try renderTemplate(alloc, "Topic: {{input.topic}}", "{}", "{\"topic\":\"AI\"}", null); + defer alloc.free(result); + try std.testing.expectEqualStrings("Topic: AI", result); +} + +test "template item interpolation" { + const alloc = std.testing.allocator; + const result = try renderTemplate(alloc, "File: {{item.path}}", "{}", null, "{\"path\":\"main.py\"}"); + defer alloc.free(result); + try std.testing.expectEqualStrings("File: main.py", result); +} + +test "template conditional true" { + const alloc = std.testing.allocator; + const result = try renderTemplate(alloc, "{% if state.name %}Hi {{state.name}}{% endif %}", "{\"name\":\"Bob\"}", null, null); + defer alloc.free(result); + try std.testing.expectEqualStrings("Hi Bob", result); +} + +test "template conditional false" { + const alloc = std.testing.allocator; + const result = try renderTemplate(alloc, "{% if state.missing %}hidden{% endif %}visible", "{}", null, null); + defer alloc.free(result); + try std.testing.expectEqualStrings("visible", result); +} + +test "template no interpolation" { + const alloc = std.testing.allocator; + const result = try renderTemplate(alloc, "plain text", "{}", null, null); + defer alloc.free(result); + try std.testing.expectEqualStrings("plain text", result); +} + +fn mockStoreFetcher( + alloc: Allocator, + base_url: []const u8, + api_token: ?[]const u8, + namespace: []const u8, + key: []const u8, +) !?[]const u8 { + _ = base_url; + _ = api_token; + if (std.mem.eql(u8, namespace, "prefs") and std.mem.eql(u8, key, "theme")) { + return try alloc.dupe(u8, "\"dark\""); + } + return null; +} + +test "template store interpolation" { + const alloc = std.testing.allocator; + const result = try renderTemplateWithStore( + alloc, + "Theme: {{store.prefs.theme}}", + "{}", + null, + null, + .{ + .base_url = "http://example.test", + .fetcher = mockStoreFetcher, + }, + ); + defer alloc.free(result); + + try std.testing.expectEqualStrings("Theme: dark", result); +} + +test "template store interpolation errors without store access" { + const alloc = std.testing.allocator; + try std.testing.expectError( + error.StoreNotConfigured, + renderTemplateWithStore(alloc, "Theme: {{store.prefs.theme}}", "{}", null, null, null), + ); +} + +// ── Old template engine tests ───────────────────────────────────────── test "render literal text unchanged" { const allocator = std.testing.allocator; @@ -443,7 +827,7 @@ test "render step output" { const result = try render(allocator, "Result: {{steps.s1.output}}", .{ .input_json = "{}", .step_outputs = &.{ - .{ .step_id = "s1", .output = "found data", .outputs = null }, + .{ .step_id = "s1", .output = "found data" }, }, .item = null, }); @@ -451,22 +835,6 @@ test "render step output" { try std.testing.expectEqualStrings("Result: found data", result); } -test "render step outputs array" { - const allocator = std.testing.allocator; - const outputs: []const []const u8 = &.{ "result1", "result2" }; - const result = try render(allocator, "All: {{steps.s1.outputs}}", .{ - .input_json = "{}", - .step_outputs = &.{ - .{ .step_id = "s1", .output = null, .outputs = outputs }, - }, - .item = null, - }); - defer allocator.free(result); - // Should produce a JSON array like: ["result1","result2"] - try std.testing.expect(std.mem.indexOf(u8, result, "result1") != null); - try std.testing.expect(std.mem.indexOf(u8, result, "result2") != null); -} - test "render item in map context" { const allocator = std.testing.allocator; const result = try render(allocator, "Research: {{item}}", .{ @@ -575,43 +943,6 @@ test "item without map context returns error" { try std.testing.expectError(error.ItemNotAvailable, err); } -test "render debate_responses expression" { - const allocator = std.testing.allocator; - const result = try render(allocator, "Pick best:\n{{debate_responses}}", .{ - .input_json = "{}", - .step_outputs = &.{}, - .item = null, - .debate_responses = "[\"resp1\",\"resp2\"]", - }); - defer allocator.free(result); - try std.testing.expect(std.mem.indexOf(u8, result, "resp1") != null); - try std.testing.expect(std.mem.indexOf(u8, result, "resp2") != null); -} - -test "render chat_history and role expressions" { - const allocator = std.testing.allocator; - const result = try render(allocator, "Previous:\n{{chat_history}}\nYour role: {{role}}", .{ - .input_json = "{}", - .step_outputs = &.{}, - .item = null, - .chat_history = "Architect: design first", - .role = "Frontend Dev", - }); - defer allocator.free(result); - try std.testing.expectEqualStrings("Previous:\nArchitect: design first\nYour role: Frontend Dev", result); -} - -test "debate_responses defaults to empty array when not set" { - const allocator = std.testing.allocator; - const result = try render(allocator, "{{debate_responses}}", .{ - .input_json = "{}", - .step_outputs = &.{}, - .item = null, - }); - defer allocator.free(result); - try std.testing.expectEqualStrings("[]", result); -} - test "render task.title variable" { const allocator = std.testing.allocator; const result = try render(allocator, "Work on: {{task.title}}", .{ diff --git a/src/tracker.zig b/src/tracker.zig index 6c1805d..c61950b 100644 --- a/src/tracker.zig +++ b/src/tracker.zig @@ -257,6 +257,14 @@ pub const Tracker = struct { _ = self.used_ports.swapRemove(port); } + /// Startup cleanup: remove all stale workspaces from a previous run. + /// Workspaces are ephemeral and will be recreated by hooks when tasks are + /// claimed again, so a clean slate on restart is safe. + pub fn startupCleanup(self: *Tracker) void { + log.info("startup: cleaning terminal workspaces", .{}); + workspace_mod.cleanAll(self.cfg.workspace.root); + } + /// Thread entry point — run the poll loop until shutdown is requested. pub fn run(self: *Tracker) void { log.info("tracker started (poll_interval={d}ms, agent_id={s})", .{ @@ -265,7 +273,7 @@ pub const Tracker = struct { }); // Startup cleanup: remove all stale workspaces from previous run - workspace_mod.cleanAll(self.cfg.workspace.root); + self.startupCleanup(); const poll_ns: u64 = @as(u64, self.cfg.poll_interval_ms) * std.time.ns_per_ms; @@ -378,6 +386,9 @@ pub const Tracker = struct { } /// Poll NullTickets for each workflow's claim_roles and claim available tasks. + // TODO(task14): When nulltickets schema changes are integrated, update WorkflowDef + // and pollAndClaim to handle the new workflow format (e.g. new claim fields, task + // shape, or execution modes introduced in the orchestration milestone). fn pollAndClaim(self: *Tracker, tick_alloc: std.mem.Allocator) void { const base_url = self.cfg.url orelse return; diff --git a/src/tracker_client.zig b/src/tracker_client.zig index 57b5c65..212e926 100644 --- a/src/tracker_client.zig +++ b/src/tracker_client.zig @@ -183,7 +183,8 @@ pub const TrackerClient = struct { const url = try std.fmt.allocPrint(self.allocator, "{s}/artifacts", .{self.base_url}); defer self.allocator.free(url); - const body = try std.fmt.allocPrint(self.allocator, + const body = try std.fmt.allocPrint( + self.allocator, "{{\"task_id\":{f},\"run_id\":{f},\"kind\":{f},\"uri\":{f},\"meta\":{s}}}", .{ std.json.fmt(task_id, .{}), @@ -226,6 +227,59 @@ pub const TrackerClient = struct { return result.body; } + pub fn storeGetValue(self: *TrackerClient, namespace: []const u8, key: []const u8) !?[]const u8 { + const namespace_enc = try encodePathSegment(self.allocator, namespace); + defer self.allocator.free(namespace_enc); + const key_enc = try encodePathSegment(self.allocator, key); + defer self.allocator.free(key_enc); + + const url = try std.fmt.allocPrint( + self.allocator, + "{s}/store/{s}/{s}", + .{ trimTrailingSlash(self.base_url), namespace_enc, key_enc }, + ); + defer self.allocator.free(url); + + const result = try self.httpRequest(url, .GET, null, null); + defer self.allocator.free(result.body); + + if (result.status_code == 404) return null; + if (result.status_code < 200 or result.status_code >= 300) return null; + + const parsed = std.json.parseFromSlice(std.json.Value, self.allocator, result.body, .{ + .allocate = .alloc_always, + .ignore_unknown_fields = true, + }) catch return null; + defer parsed.deinit(); + if (parsed.value != .object) return null; + + const value = parsed.value.object.get("value") orelse return null; + const value_json = try std.json.Stringify.valueAlloc(self.allocator, value, .{}); + return value_json; + } + + pub fn storePutValue(self: *TrackerClient, namespace: []const u8, key: []const u8, value_json: []const u8) !bool { + const namespace_enc = try encodePathSegment(self.allocator, namespace); + defer self.allocator.free(namespace_enc); + const key_enc = try encodePathSegment(self.allocator, key); + defer self.allocator.free(key_enc); + + const url = try std.fmt.allocPrint( + self.allocator, + "{s}/store/{s}/{s}", + .{ trimTrailingSlash(self.base_url), namespace_enc, key_enc }, + ); + defer self.allocator.free(url); + + const body = try std.fmt.allocPrint(self.allocator, "{{\"value\":{s}}}", .{value_json}); + defer self.allocator.free(body); + + const result = try self.httpRequest(url, .PUT, body, null); + defer self.allocator.free(result.body); + + return result.status_code >= 200 and result.status_code < 300; + } + fn httpRequest( self: *TrackerClient, url: []const u8, @@ -272,6 +326,36 @@ pub const TrackerClient = struct { } }; +fn trimTrailingSlash(url: []const u8) []const u8 { + if (url.len > 0 and url[url.len - 1] == '/') return url[0 .. url.len - 1]; + return url; +} + +fn encodePathSegment(allocator: std.mem.Allocator, value: []const u8) ![]const u8 { + var buf: std.ArrayListUnmanaged(u8) = .empty; + errdefer buf.deinit(allocator); + + for (value) |ch| { + if (isUnreserved(ch)) { + try buf.append(allocator, ch); + continue; + } + try buf.writer(allocator).print("%{X:0>2}", .{ch}); + } + + return try buf.toOwnedSlice(allocator); +} + +fn isUnreserved(ch: u8) bool { + return (ch >= 'A' and ch <= 'Z') or + (ch >= 'a' and ch <= 'z') or + (ch >= '0' and ch <= '9') or + ch == '-' or + ch == '_' or + ch == '.' or + ch == '~'; +} + fn parseTaskInfo(allocator: std.mem.Allocator, task_value: std.json.Value) !TaskInfo { if (task_value != .object) return error.InvalidTaskPayload; const obj = task_value.object; @@ -365,3 +449,11 @@ test "TrackerClient exposes optimistic transition support" { try std.testing.expect(@hasDecl(TrackerClient, "transition")); try std.testing.expect(@hasDecl(TrackerClient, "postArtifact")); } + +test "encodePathSegment percent-encodes reserved characters" { + const allocator = std.testing.allocator; + const encoded = try encodePathSegment(allocator, "team alpha/key"); + defer allocator.free(encoded); + + try std.testing.expectEqualStrings("team%20alpha%2Fkey", encoded); +} diff --git a/src/types.zig b/src/types.zig index fbe28c2..b4dd51a 100644 --- a/src/types.zig +++ b/src/types.zig @@ -7,10 +7,11 @@ const std = @import("std"); pub const RunStatus = enum { pending, running, - paused, + interrupted, completed, failed, cancelled, + forked, pub fn toString(self: RunStatus) []const u8 { return @tagName(self); @@ -31,7 +32,7 @@ pub const StepStatus = enum { completed, failed, skipped, - waiting_approval, + interrupted, pub fn toString(self: StepStatus) []const u8 { return @tagName(self); @@ -47,19 +48,12 @@ pub const StepStatus = enum { pub const StepType = enum { task, - fan_out, - map, - condition, - approval, - reduce, - loop, - sub_workflow, - wait, - router, + route, + interrupt, + agent, + send, transform, - saga, - debate, - group_chat, + subgraph, pub fn toString(self: StepType) []const u8 { return @tagName(self); @@ -153,6 +147,7 @@ pub const RunRow = struct { id: []const u8, idempotency_key: ?[]const u8, status: []const u8, + workflow_id: ?[]const u8 = null, workflow_json: []const u8, input_json: []const u8, callbacks_json: []const u8, @@ -161,6 +156,9 @@ pub const RunRow = struct { updated_at_ms: i64, started_at_ms: ?i64, ended_at_ms: ?i64, + state_json: ?[]const u8 = null, + config_json: ?[]const u8 = null, + parent_run_id: ?[]const u8 = null, }; pub const StepRow = struct { @@ -206,17 +204,6 @@ pub const ArtifactRow = struct { created_at_ms: i64, }; -pub const ChatMessageRow = struct { - id: i64, - run_id: []const u8, - step_id: []const u8, - round: i64, - role: []const u8, - worker_id: ?[]const u8, - message: []const u8, - ts_ms: i64, -}; - pub const TrackerRunRow = struct { task_id: []const u8, tracker_run_id: []const u8, @@ -238,12 +225,75 @@ pub const TrackerRunRow = struct { last_error_text: ?[]const u8, }; -pub const SagaStateRow = struct { +pub const WorkflowRow = struct { + id: []const u8, + name: []const u8, + definition_json: []const u8, + version: i64 = 1, + created_at_ms: i64, + updated_at_ms: i64, +}; + +pub const CheckpointRow = struct { + id: []const u8, run_id: []const u8, - saga_step_id: []const u8, - body_step_id: []const u8, - compensation_step_id: ?[]const u8, + step_id: []const u8, + parent_id: ?[]const u8, + state_json: []const u8, + completed_nodes_json: []const u8, + version: i64, + metadata_json: ?[]const u8, + created_at_ms: i64, +}; + +pub const AgentEventRow = struct { + id: i64, + run_id: []const u8, + step_id: []const u8, + iteration: i64, + tool: ?[]const u8, + args_json: ?[]const u8, + result_text: ?[]const u8, status: []const u8, + created_at_ms: i64, +}; + +pub const PendingInjectionRow = struct { + id: i64, + run_id: []const u8, + updates_json: []const u8, + apply_after_step: ?[]const u8, + created_at_ms: i64, +}; + +pub const PendingWriteRow = struct { + id: i64, + run_id: []const u8, + step_id: []const u8, + channel: []const u8, + value_json: []const u8, + created_at_ms: i64, +}; + +pub const ReducerType = enum { + last_value, + append, + merge, + add, + min, + max, + add_messages, + + pub fn toString(self: ReducerType) []const u8 { + return @tagName(self); + } + + pub fn fromString(s: []const u8) ?ReducerType { + inline for (@typeInfo(ReducerType).@"enum".fields) |f| { + if (std.mem.eql(u8, s, f.name)) return @enumFromInt(f.value); + } + return null; + } }; // ── API Response Types ───────────────────────────────────────────────── @@ -275,17 +325,17 @@ test "RunStatus round-trip" { } test "StepStatus round-trip" { - const s = StepStatus.waiting_approval; + const s = StepStatus.interrupted; const name = s.toString(); - try std.testing.expectEqualStrings("waiting_approval", name); + try std.testing.expectEqualStrings("interrupted", name); const parsed = StepStatus.fromString(name); - try std.testing.expectEqual(StepStatus.waiting_approval, parsed.?); + try std.testing.expectEqual(StepStatus.interrupted, parsed.?); } test "StepType round-trip" { - const s = StepType.fan_out; - try std.testing.expectEqualStrings("fan_out", s.toString()); - try std.testing.expectEqual(StepType.fan_out, StepType.fromString("fan_out").?); + const s = StepType.route; + try std.testing.expectEqualStrings("route", s.toString()); + try std.testing.expectEqual(StepType.route, StepType.fromString("route").?); } test "WorkerStatus round-trip" { diff --git a/src/worker_protocol.zig b/src/worker_protocol.zig index d560d38..59be6a6 100644 --- a/src/worker_protocol.zig +++ b/src/worker_protocol.zig @@ -6,6 +6,7 @@ pub const Protocol = enum { openai_chat, mqtt, redis_stream, + a2a, }; pub fn parse(raw: []const u8) ?Protocol { @@ -14,26 +15,27 @@ pub fn parse(raw: []const u8) ?Protocol { if (std.mem.eql(u8, raw, "openai_chat")) return .openai_chat; if (std.mem.eql(u8, raw, "mqtt")) return .mqtt; if (std.mem.eql(u8, raw, "redis_stream")) return .redis_stream; + if (std.mem.eql(u8, raw, "a2a")) return .a2a; return null; } pub fn requiresModel(protocol: Protocol) bool { return switch (protocol) { .openai_chat => true, - .webhook, .api_chat, .mqtt, .redis_stream => false, + .webhook, .api_chat, .mqtt, .redis_stream, .a2a => false, }; } pub fn requiresExplicitPath(protocol: Protocol) bool { return switch (protocol) { .webhook => true, - .api_chat, .openai_chat, .mqtt, .redis_stream => false, + .api_chat, .openai_chat, .mqtt, .redis_stream, .a2a => false, }; } pub fn validateUrlForProtocol(url: []const u8, protocol: Protocol) bool { - // mqtt and redis_stream URLs are validated by their own parsers - if (protocol == .mqtt or protocol == .redis_stream) return true; + // mqtt, redis_stream, and a2a URLs are validated by their own parsers / have fixed paths + if (protocol == .mqtt or protocol == .redis_stream or protocol == .a2a) return true; if (!requiresExplicitPath(protocol)) return true; return hasExplicitPath(url); } @@ -47,6 +49,9 @@ pub fn buildRequestUrl( if (requiresExplicitPath(protocol) and !hasExplicitPath(trimmed)) { return error.WebhookUrlPathRequired; } + if (protocol == .a2a) { + return try std.fmt.allocPrint(allocator, "{s}/a2a", .{trimmed}); + } return try allocator.dupe(u8, trimmed); } @@ -140,6 +145,7 @@ test "parse protocol supports known values" { try std.testing.expectEqual(Protocol.webhook, parse("webhook").?); try std.testing.expectEqual(Protocol.api_chat, parse("api_chat").?); try std.testing.expectEqual(Protocol.openai_chat, parse("openai_chat").?); + try std.testing.expectEqual(Protocol.a2a, parse("a2a").?); try std.testing.expect(parse("unknown") == null); } @@ -173,11 +179,25 @@ test "validateUrlForProtocol enforces protocol-specific constraints" { try std.testing.expect(validateUrlForProtocol("http://localhost:42617/api/chat", .api_chat)); try std.testing.expect(validateUrlForProtocol("mqtt://broker:1883/topic", .mqtt)); try std.testing.expect(validateUrlForProtocol("redis://redis:6379/stream", .redis_stream)); + try std.testing.expect(validateUrlForProtocol("http://localhost:3000", .a2a)); +} + +test "buildRequestUrl appends /a2a for a2a protocol" { + const allocator = std.testing.allocator; + const url = try buildRequestUrl(allocator, "http://localhost:3000", .a2a); + defer allocator.free(url); + try std.testing.expectEqualStrings("http://localhost:3000/a2a", url); + + // Trailing slash is trimmed before appending /a2a + const url2 = try buildRequestUrl(allocator, "http://localhost:3000/", .a2a); + defer allocator.free(url2); + try std.testing.expectEqualStrings("http://localhost:3000/a2a", url2); } -test "parse supports mqtt and redis_stream" { +test "parse supports mqtt, redis_stream, and a2a" { try std.testing.expectEqual(Protocol.mqtt, parse("mqtt").?); try std.testing.expectEqual(Protocol.redis_stream, parse("redis_stream").?); + try std.testing.expectEqual(Protocol.a2a, parse("a2a").?); } test "parseMqttUrl extracts host, port, topic" { diff --git a/src/worker_response.zig b/src/worker_response.zig index 778109d..55110b0 100644 --- a/src/worker_response.zig +++ b/src/worker_response.zig @@ -1,11 +1,24 @@ const std = @import("std"); +pub const UsageInfo = struct { + input_tokens: i64 = 0, + output_tokens: i64 = 0, +}; + +pub const RateLimitData = struct { + remaining: i64 = 0, + limit: i64 = 0, + reset_ms: i64 = 0, +}; + pub const ParseResult = struct { output: []const u8, success: bool, error_text: ?[]const u8, async_pending: bool = false, correlation_id: ?[]const u8 = null, + usage: ?UsageInfo = null, + rate_limit: ?RateLimitData = null, }; pub const invalid_json_error = "worker response must be a JSON object"; @@ -34,6 +47,8 @@ pub fn parse(allocator: std.mem.Allocator, response_data: []const u8) !ParseResu .output = try allocator.dupe(u8, output), .success = true, .error_text = null, + .usage = extractUsage(obj), + .rate_limit = extractRateLimit(obj), }; } @@ -94,6 +109,53 @@ fn extractErrorMessage(allocator: std.mem.Allocator, obj: std.json.ObjectMap) !? return null; } +fn extractUsage(obj: std.json.ObjectMap) ?UsageInfo { + const usage_val = obj.get("usage") orelse return null; + if (usage_val != .object) return null; + const usage_obj = usage_val.object; + + var info = UsageInfo{}; + + // OpenAI format: prompt_tokens / completion_tokens + if (usage_obj.get("prompt_tokens")) |v| { + if (v == .integer) info.input_tokens = v.integer; + } + if (usage_obj.get("completion_tokens")) |v| { + if (v == .integer) info.output_tokens = v.integer; + } + + // A2A/generic format: input_tokens / output_tokens + if (usage_obj.get("input_tokens")) |v| { + if (v == .integer) info.input_tokens = v.integer; + } + if (usage_obj.get("output_tokens")) |v| { + if (v == .integer) info.output_tokens = v.integer; + } + + if (info.input_tokens == 0 and info.output_tokens == 0) return null; + return info; +} + +fn extractRateLimit(obj: std.json.ObjectMap) ?RateLimitData { + const rl_val = obj.get("rate_limit") orelse return null; + if (rl_val != .object) return null; + const rl_obj = rl_val.object; + + var info = RateLimitData{}; + if (rl_obj.get("remaining")) |v| { + if (v == .integer) info.remaining = v.integer; + } + if (rl_obj.get("limit")) |v| { + if (v == .integer) info.limit = v.integer; + } + if (rl_obj.get("reset_ms")) |v| { + if (v == .integer) info.reset_ms = v.integer; + } + + if (info.remaining == 0 and info.limit == 0) return null; + return info; +} + fn isAsyncAckWithoutOutput(obj: std.json.ObjectMap) bool { const status_val = obj.get("status") orelse return false; return status_val == .string and std.mem.eql(u8, status_val.string, "received"); @@ -165,3 +227,52 @@ test "parse rejects object without supported output fields" { try std.testing.expect(!result.success); try std.testing.expectEqualStrings(missing_output_error, result.error_text.?); } + +test "parse extracts usage info from OpenAI format" { + const allocator = std.testing.allocator; + const result = try parse( + allocator, + "{\"response\":\"done\",\"usage\":{\"prompt_tokens\":150,\"completion_tokens\":75}}", + ); + defer allocator.free(result.output); + try std.testing.expect(result.success); + try std.testing.expect(result.usage != null); + try std.testing.expectEqual(@as(i64, 150), result.usage.?.input_tokens); + try std.testing.expectEqual(@as(i64, 75), result.usage.?.output_tokens); +} + +test "parse extracts usage info from generic format" { + const allocator = std.testing.allocator; + const result = try parse( + allocator, + "{\"response\":\"done\",\"usage\":{\"input_tokens\":200,\"output_tokens\":100}}", + ); + defer allocator.free(result.output); + try std.testing.expect(result.success); + try std.testing.expect(result.usage != null); + try std.testing.expectEqual(@as(i64, 200), result.usage.?.input_tokens); + try std.testing.expectEqual(@as(i64, 100), result.usage.?.output_tokens); +} + +test "parse extracts rate limit info" { + const allocator = std.testing.allocator; + const result = try parse( + allocator, + "{\"response\":\"done\",\"rate_limit\":{\"remaining\":95,\"limit\":100,\"reset_ms\":1700000000000}}", + ); + defer allocator.free(result.output); + try std.testing.expect(result.success); + try std.testing.expect(result.rate_limit != null); + try std.testing.expectEqual(@as(i64, 95), result.rate_limit.?.remaining); + try std.testing.expectEqual(@as(i64, 100), result.rate_limit.?.limit); + try std.testing.expectEqual(@as(i64, 1700000000000), result.rate_limit.?.reset_ms); +} + +test "parse returns null usage when no usage field" { + const allocator = std.testing.allocator; + const result = try parse(allocator, "{\"response\":\"done\"}"); + defer allocator.free(result.output); + try std.testing.expect(result.success); + try std.testing.expect(result.usage == null); + try std.testing.expect(result.rate_limit == null); +} diff --git a/src/workflow_loader.zig b/src/workflow_loader.zig index 5f0fda4..f5d2fe0 100644 --- a/src/workflow_loader.zig +++ b/src/workflow_loader.zig @@ -1,4 +1,7 @@ const std = @import("std"); +const ids = @import("ids.zig"); +const Store = @import("store.zig").Store; +const log = std.log.scoped(.workflow_loader); // ── Types ───────────────────────────────────────────────────────────── @@ -100,6 +103,102 @@ test "loadWorkflows: supports absolute workflow directories" { try std.testing.expectEqualStrings("absolute", map.get("absolute").?.pipeline_id); } +// ── WorkflowWatcher ────────────────────────────────────────────────── + +pub const WorkflowWatcher = struct { + dir_path: []const u8, + store: *Store, + last_check_ms: i64, + file_hashes: std.StringHashMap(u64), + alloc: std.mem.Allocator, + + pub fn init(alloc: std.mem.Allocator, dir_path: []const u8, store: *Store) WorkflowWatcher { + return .{ + .dir_path = dir_path, + .store = store, + .last_check_ms = 0, + .file_hashes = std.StringHashMap(u64).init(alloc), + .alloc = alloc, + }; + } + + pub fn deinit(self: *WorkflowWatcher) void { + var it = self.file_hashes.iterator(); + while (it.next()) |entry| { + self.alloc.free(entry.key_ptr.*); + } + self.file_hashes.deinit(); + } + + /// Check for changed workflow files. Called periodically from engine tick. + pub fn checkForChanges(self: *WorkflowWatcher) void { + const now = ids.nowMs(); + if (now - self.last_check_ms < 5000) return; // check every 5 seconds + self.last_check_ms = now; + + var dir = if (std.fs.path.isAbsolute(self.dir_path)) + std.fs.openDirAbsolute(self.dir_path, .{ .iterate = true }) catch return + else + std.fs.cwd().openDir(self.dir_path, .{ .iterate = true }) catch return; + defer dir.close(); + + var iter = dir.iterate(); + while (iter.next() catch null) |entry| { + if (entry.kind != .file) continue; + if (!std.mem.endsWith(u8, entry.name, ".json")) continue; + + const contents = dir.readFileAlloc(self.alloc, entry.name, 1024 * 1024) catch continue; + defer self.alloc.free(contents); + + // Compute FNV1a hash of content + const hash = std.hash.Fnv1a_64.hash(contents); + + // Check if hash changed + const existing = self.file_hashes.get(entry.name); + if (existing) |prev_hash| { + if (prev_hash == hash) continue; // unchanged + } + + // Parse and validate + const parsed = std.json.parseFromSlice(std.json.Value, self.alloc, contents, .{}) catch continue; + defer parsed.deinit(); + if (parsed.value != .object) continue; + + const obj = parsed.value.object; + + // Extract name and id + const wf_name = if (obj.get("name")) |v| (if (v == .string) v.string else null) else null; + const wf_id = if (obj.get("id")) |v| (if (v == .string) v.string else null) else null; + if (wf_id == null and wf_name == null) continue; + + const id = wf_id orelse wf_name.?; + const name = wf_name orelse wf_id.?; + + // Upsert into workflows table + // Try insert first; if it fails (duplicate id), update instead + self.store.createWorkflow(id, name, contents) catch { + self.store.updateWorkflow(id, name, contents) catch continue; + }; + + // Store hash (need to dupe the key since entry.name is transient) + const key_dupe = self.alloc.dupe(u8, entry.name) catch continue; + if (existing != null) { + // Free old key if we're replacing + if (self.file_hashes.fetchPut(key_dupe, hash) catch null) |old| { + self.alloc.free(old.key); + } + } else { + self.file_hashes.put(key_dupe, hash) catch { + self.alloc.free(key_dupe); + continue; + }; + } + + log.info("workflow {s} reloaded", .{id}); + } + } +}; + // ── getWorkflowForPipeline ──────────────────────────────────────────── pub fn getWorkflowForPipeline(map: *const WorkflowMap, pipeline_id: []const u8) ?*const WorkflowDef { @@ -299,3 +398,41 @@ test "parse workflow with continuation_prompt" { defer parsed.deinit(); try std.testing.expectEqualStrings("Continue: attempt #{{attempt}}", parsed.value.subprocess.continuation_prompt.?); } + +test "WorkflowWatcher: detects file changes" { + const allocator = std.testing.allocator; + var s = try Store.init(allocator, ":memory:"); + defer s.deinit(); + + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + const dir_path = try tmp.dir.realpathAlloc(allocator, "."); + defer allocator.free(dir_path); + + var watcher = WorkflowWatcher.init(allocator, dir_path, &s); + defer watcher.deinit(); + + // Force last_check_ms to 0 so check runs immediately + watcher.last_check_ms = 0; + + // Write a workflow file + try tmp.dir.writeFile(.{ + .sub_path = "test_wf.json", + .data = + \\{"id":"wf-test","name":"Test WF","nodes":{}} + , + }); + + watcher.checkForChanges(); + + // Verify workflow was inserted + const wf = try s.getWorkflow(allocator, "wf-test"); + try std.testing.expect(wf != null); + allocator.free(wf.?.id); + allocator.free(wf.?.name); + allocator.free(wf.?.definition_json); + + // Verify hash was stored + try std.testing.expectEqual(@as(usize, 1), watcher.file_hashes.count()); +} diff --git a/src/workflow_validation.zig b/src/workflow_validation.zig index 3374e94..c5419fb 100644 --- a/src/workflow_validation.zig +++ b/src/workflow_validation.zig @@ -1,4 +1,7 @@ const std = @import("std"); +const Allocator = std.mem.Allocator; + +// ── Legacy validation (used by api.zig for POST /runs) ──────────────── pub const ValidateError = error{ StepMustBeObject, @@ -9,16 +12,6 @@ pub const ValidateError = error{ DependsOnItemNotString, DependsOnDuplicate, DependsOnUnknownStepId, - LoopBodyRequired, - SubWorkflowRequired, - WaitConditionRequired, - WaitDurationInvalid, - WaitUntilInvalid, - WaitSignalInvalid, - RouterRoutesRequired, - SagaBodyRequired, - DebateCountRequired, - GroupChatParticipantsRequired, RetryMustBeObject, MaxAttemptsMustBePositiveInteger, TimeoutMsMustBePositiveInteger, @@ -67,47 +60,9 @@ fn getJsonString(obj: std.json.ObjectMap, key: []const u8) ?[]const u8 { } fn validateStepTypeRules(step_type: []const u8, step_obj: std.json.ObjectMap) ValidateError!void { - if (std.mem.eql(u8, step_type, "loop") and step_obj.get("body") == null) { - return error.LoopBodyRequired; - } - if (std.mem.eql(u8, step_type, "sub_workflow") and step_obj.get("workflow") == null) { - return error.SubWorkflowRequired; - } - if (std.mem.eql(u8, step_type, "wait")) { - if (step_obj.get("duration_ms") == null and step_obj.get("until_ms") == null and step_obj.get("signal") == null) { - return error.WaitConditionRequired; - } - if (step_obj.get("duration_ms")) |duration_val| { - switch (duration_val) { - .integer => { - if (duration_val.integer < 0) return error.WaitDurationInvalid; - }, - else => return error.WaitDurationInvalid, - } - } - if (step_obj.get("until_ms")) |until_val| { - if (until_val != .integer or until_val.integer < 0) { - return error.WaitUntilInvalid; - } - } - if (step_obj.get("signal")) |signal_val| { - if (signal_val != .string or signal_val.string.len == 0) { - return error.WaitSignalInvalid; - } - } - } - if (std.mem.eql(u8, step_type, "router") and step_obj.get("routes") == null) { - return error.RouterRoutesRequired; - } - if (std.mem.eql(u8, step_type, "saga") and step_obj.get("body") == null) { - return error.SagaBodyRequired; - } - if (std.mem.eql(u8, step_type, "debate") and step_obj.get("count") == null) { - return error.DebateCountRequired; - } - if (std.mem.eql(u8, step_type, "group_chat") and step_obj.get("participants") == null) { - return error.GroupChatParticipantsRequired; - } + // No specific rules for current step types (task, route, interrupt, agent, send, transform, subgraph) + _ = step_type; + _ = step_obj; } fn validateDependsOnTypes(allocator: std.mem.Allocator, step_obj: std.json.ObjectMap) ValidateError!void { @@ -140,7 +95,505 @@ fn validateExecutionControls(step_obj: std.json.ObjectMap) ValidateError!void { } } -// ── Tests ───────────────────────────────────────────────────────────── +// ── New graph-based workflow validation ─────────────────────────────── + +pub const ValidationError = struct { + err_type: []const u8, + node: ?[]const u8, + key: ?[]const u8, + message: []const u8, +}; + +/// Validate a workflow definition JSON (new graph format). +/// Returns a slice of ValidationError; caller must free with alloc.free(). +/// Individual string fields inside each ValidationError point into the +/// parsed JSON tree (or are literals) and do not need separate freeing. +pub fn validate(alloc: Allocator, definition_json: []const u8) ![]ValidationError { + var errors: std.ArrayListUnmanaged(ValidationError) = .empty; + defer errors.deinit(alloc); + + const parsed = std.json.parseFromSlice(std.json.Value, alloc, definition_json, .{}) catch { + try errors.append(alloc, .{ + .err_type = "parse_error", + .node = null, + .key = null, + .message = "failed to parse workflow JSON", + }); + return errors.toOwnedSlice(alloc); + }; + defer parsed.deinit(); + + if (parsed.value != .object) { + try errors.append(alloc, .{ + .err_type = "parse_error", + .node = null, + .key = null, + .message = "workflow must be a JSON object", + }); + return errors.toOwnedSlice(alloc); + } + const root = parsed.value.object; + + // Extract nodes map + const nodes_val = root.get("nodes") orelse { + try errors.append(alloc, .{ + .err_type = "missing_field", + .node = null, + .key = "nodes", + .message = "workflow must have a 'nodes' object", + }); + return errors.toOwnedSlice(alloc); + }; + if (nodes_val != .object) { + try errors.append(alloc, .{ + .err_type = "missing_field", + .node = null, + .key = "nodes", + .message = "'nodes' must be an object", + }); + return errors.toOwnedSlice(alloc); + } + const nodes = nodes_val.object; + + // Extract edges array + const edges_val = root.get("edges") orelse { + try errors.append(alloc, .{ + .err_type = "missing_field", + .node = null, + .key = "edges", + .message = "workflow must have an 'edges' array", + }); + return errors.toOwnedSlice(alloc); + }; + if (edges_val != .array) { + try errors.append(alloc, .{ + .err_type = "missing_field", + .node = null, + .key = "edges", + .message = "'edges' must be an array", + }); + return errors.toOwnedSlice(alloc); + } + const edges = edges_val.array.items; + + // Extract state_schema (may be absent or empty object) + var state_schema: ?std.json.ObjectMap = null; + if (root.get("state_schema")) |ss_val| { + if (ss_val == .object) state_schema = ss_val.object; + } + + // --- Collect send target_nodes (exempt from reachability) --- + var send_targets = std.StringHashMap(void).init(alloc); + defer send_targets.deinit(); + var node_it = nodes.iterator(); + while (node_it.next()) |entry| { + const nobj = entry.value_ptr.*; + if (nobj != .object) continue; + const ntype = getJsonStringFromObj(nobj.object, "type") orelse continue; + if (std.mem.eql(u8, ntype, "send")) { + if (getJsonStringFromObj(nobj.object, "target_node")) |tn| { + try send_targets.put(tn, {}); + } + } + } + + // --- Check 1: nodes_in_edges_exist --- + // Build adjacency list while we're at it + // Edge source format: "node" or "node:route_value" + // We'll parse edge sources to get the actual node name + var edge_sources: std.ArrayListUnmanaged([]const u8) = .empty; + defer edge_sources.deinit(alloc); + var edge_targets: std.ArrayListUnmanaged([]const u8) = .empty; + defer edge_targets.deinit(alloc); + + for (edges) |edge_val| { + if (edge_val != .array or edge_val.array.items.len < 2) continue; + const src_raw = if (edge_val.array.items[0] == .string) edge_val.array.items[0].string else continue; + const tgt = if (edge_val.array.items[1] == .string) edge_val.array.items[1].string else continue; + + // Parse "node:route_value" -> node name + const src_node = edgeSourceNode(src_raw); + + try edge_sources.append(alloc, src_raw); + try edge_targets.append(alloc, tgt); + + // Check source node exists (skip __start__, __end__) + if (!isReserved(src_node)) { + if (!nodes.contains(src_node)) { + try errors.append(alloc, .{ + .err_type = "nodes_in_edges_exist", + .node = src_node, + .key = null, + .message = "edge source node does not exist in nodes map", + }); + } + } + // Check target node exists (skip __start__, __end__) + if (!isReserved(tgt)) { + if (!nodes.contains(tgt)) { + try errors.append(alloc, .{ + .err_type = "nodes_in_edges_exist", + .node = tgt, + .key = null, + .message = "edge target node does not exist in nodes map", + }); + } + } + } + + // --- Build reachability set from __start__ --- + // We do a BFS/DFS using static edges only (not send target_nodes). + var reachable = std.StringHashMap(void).init(alloc); + defer reachable.deinit(); + var queue: std.ArrayListUnmanaged([]const u8) = .empty; + defer queue.deinit(alloc); + + try reachable.put("__start__", {}); + try queue.append(alloc, "__start__"); + + var qi: usize = 0; + while (qi < queue.items.len) : (qi += 1) { + const current = queue.items[qi]; + for (edge_sources.items, edge_targets.items) |src_raw, tgt| { + const src_node = edgeSourceNode(src_raw); + if (std.mem.eql(u8, src_node, current) or std.mem.eql(u8, src_raw, current)) { + if (!reachable.contains(tgt)) { + try reachable.put(tgt, {}); + try queue.append(alloc, tgt); + } + } + } + } + + // --- Check 2: unreachable_node --- + node_it = nodes.iterator(); + while (node_it.next()) |entry| { + const nname = entry.key_ptr.*; + if (reachable.contains(nname)) continue; + // Exempt send target_nodes + if (send_targets.contains(nname)) continue; + try errors.append(alloc, .{ + .err_type = "unreachable_node", + .node = nname, + .key = null, + .message = "node is not reachable from __start__", + }); + } + + // --- Check 3: end_unreachable --- + // __end__ must be reachable from __start__ (simple check: it appears in + // reachable set, or at least one edge targets __end__). + // For leaf nodes that are not send_targets, there should be a path to __end__. + // We do a simplified check: __end__ must be in the reachable set. + if (!reachable.contains("__end__")) { + try errors.append(alloc, .{ + .err_type = "end_unreachable", + .node = null, + .key = null, + .message = "__end__ is not reachable from __start__", + }); + } + + // --- Check 4: unintentional_cycle --- + // Detect cycles via DFS. Edges from route nodes (src contains ':') back to + // earlier nodes are intentional. Other back-edges are cycles (errors). + { + const CycleState = enum { unvisited, in_stack, done }; + var cycle_state = std.StringHashMap(CycleState).init(alloc); + defer cycle_state.deinit(); + + // Initialize all known nodes + node_it = nodes.iterator(); + while (node_it.next()) |entry| { + try cycle_state.put(entry.key_ptr.*, .unvisited); + } + try cycle_state.put("__start__", .unvisited); + try cycle_state.put("__end__", .unvisited); + + // We need to track which src_raw produced the edge to know if it's a route edge + // Build adjacency: node -> list of (tgt, src_raw_is_route) + const EdgeInfo = struct { tgt: []const u8, from_route: bool }; + var adj = std.StringHashMap(std.ArrayListUnmanaged(EdgeInfo)).init(alloc); + defer { + var adj_it = adj.iterator(); + while (adj_it.next()) |e| e.value_ptr.deinit(alloc); + adj.deinit(); + } + + for (edge_sources.items, edge_targets.items) |src_raw, tgt| { + const src_node = edgeSourceNode(src_raw); + const is_route_edge = std.mem.indexOfScalar(u8, src_raw, ':') != null; + const res = try adj.getOrPut(src_node); + if (!res.found_existing) { + res.value_ptr.* = .empty; + } + try res.value_ptr.append(alloc, .{ .tgt = tgt, .from_route = is_route_edge }); + } + + // Iterative DFS + var visited_for_dfs = std.StringHashMap(CycleState).init(alloc); + defer visited_for_dfs.deinit(); + + // Initialize + var cs_it = cycle_state.iterator(); + while (cs_it.next()) |e| { + try visited_for_dfs.put(e.key_ptr.*, .unvisited); + } + + var dfs_nodes: std.ArrayListUnmanaged([]const u8) = .empty; + defer dfs_nodes.deinit(alloc); + var cs_it2 = cycle_state.iterator(); + while (cs_it2.next()) |e| { + try dfs_nodes.append(alloc, e.key_ptr.*); + } + + for (dfs_nodes.items) |start_node| { + const s = visited_for_dfs.get(start_node) orelse .unvisited; + if (s != .unvisited) continue; + + // DFS iterative with path tracking + var path = std.StringHashMap(void).init(alloc); + defer path.deinit(); + + const DfsEntry = struct { node: []const u8, child_idx: usize }; + var stack: std.ArrayListUnmanaged(DfsEntry) = .empty; + defer stack.deinit(alloc); + + try stack.append(alloc, .{ .node = start_node, .child_idx = 0 }); + try path.put(start_node, {}); + visited_for_dfs.put(start_node, .in_stack) catch {}; + + while (stack.items.len > 0) { + const top = &stack.items[stack.items.len - 1]; + const neighbors = adj.get(top.node); + if (neighbors == null or top.child_idx >= neighbors.?.items.len) { + // Done with this node + _ = path.remove(top.node); + visited_for_dfs.put(top.node, .done) catch {}; + _ = stack.pop(); + continue; + } + const neighbor = neighbors.?.items[top.child_idx]; + top.child_idx += 1; + + const tgt = neighbor.tgt; + const from_route = neighbor.from_route; + + // Skip reserved endpoints for cycle detection + if (isReserved(tgt)) continue; + + const tgt_state = visited_for_dfs.get(tgt) orelse .unvisited; + if (tgt_state == .in_stack) { + // Back edge found — cycle + if (!from_route) { + // Report cycle error only once per target + var already_reported = false; + for (errors.items) |e| { + if (std.mem.eql(u8, e.err_type, "unintentional_cycle") and + e.node != null and std.mem.eql(u8, e.node.?, tgt)) + { + already_reported = true; + break; + } + } + if (!already_reported) { + try errors.append(alloc, .{ + .err_type = "unintentional_cycle", + .node = tgt, + .key = null, + .message = "cycle detected: non-route edge creates a cycle", + }); + } + } + // Intentional route cycle — skip + } else if (tgt_state == .unvisited) { + visited_for_dfs.put(tgt, .in_stack) catch {}; + try path.put(tgt, {}); + try stack.append(alloc, .{ .node = tgt, .child_idx = 0 }); + } + // .done: already processed, no cycle through this path + } + } + } + + // --- Check 5: undefined_state_key --- + if (state_schema) |schema| { + node_it = nodes.iterator(); + while (node_it.next()) |entry| { + const nname = entry.key_ptr.*; + const nval = entry.value_ptr.*; + if (nval != .object) continue; + const nobj = nval.object; + + // Check prompt field + if (getJsonStringFromObj(nobj, "prompt")) |prompt| { + try checkStateRefs(alloc, &errors, schema, nname, prompt); + } + // Check message field (interrupt) + if (getJsonStringFromObj(nobj, "message")) |msg| { + try checkStateRefs(alloc, &errors, schema, nname, msg); + } + } + } + + // --- Check 6: invalid_route_target --- + node_it = nodes.iterator(); + while (node_it.next()) |entry| { + const nname = entry.key_ptr.*; + const nval = entry.value_ptr.*; + if (nval != .object) continue; + const nobj = nval.object; + const ntype = getJsonStringFromObj(nobj, "type") orelse continue; + if (!std.mem.eql(u8, ntype, "route")) continue; + + const routes_val = nobj.get("routes") orelse continue; + if (routes_val != .object) continue; + var routes_it = routes_val.object.iterator(); + while (routes_it.next()) |re| { + const target = if (re.value_ptr.* == .string) re.value_ptr.*.string else continue; + if (!nodes.contains(target)) { + try errors.append(alloc, .{ + .err_type = "invalid_route_target", + .node = nname, + .key = re.key_ptr.*, + .message = "route target node does not exist", + }); + } + if (!hasRouteEdge(edge_sources.items, edge_targets.items, nname, re.key_ptr.*, target)) { + try errors.append(alloc, .{ + .err_type = "missing_route_edge", + .node = nname, + .key = re.key_ptr.*, + .message = "route key is declared in routes but has no matching conditional edge", + }); + } + } + + if (getJsonStringFromObj(nobj, "default")) |default_route| { + if (!routes_val.object.contains(default_route)) { + try errors.append(alloc, .{ + .err_type = "invalid_route_default", + .node = nname, + .key = "default", + .message = "route default must reference a declared routes key", + }); + } + } + } + + // --- Check 7: invalid_send_target --- + node_it = nodes.iterator(); + while (node_it.next()) |entry| { + const nname = entry.key_ptr.*; + const nval = entry.value_ptr.*; + if (nval != .object) continue; + const nobj = nval.object; + const ntype = getJsonStringFromObj(nobj, "type") orelse continue; + if (!std.mem.eql(u8, ntype, "send")) continue; + + if (getJsonStringFromObj(nobj, "target_node")) |tn| { + if (!nodes.contains(tn)) { + try errors.append(alloc, .{ + .err_type = "invalid_send_target", + .node = nname, + .key = "target_node", + .message = "send target_node does not exist in nodes map", + }); + } + } + } + + // The errors list contains slices pointing into `parsed` which will be + // freed by `defer parsed.deinit()`. We need to copy all strings into + // alloc-owned memory before returning. + const result = try copyErrors(alloc, errors.items); + return result; +} + +// ── Helpers ─────────────────────────────────────────────────────────── + +fn isReserved(name: []const u8) bool { + return std.mem.eql(u8, name, "__start__") or std.mem.eql(u8, name, "__end__"); +} + +/// Given a raw edge source like "node:route_value", return "node". +/// If no colon, returns the whole string. +fn edgeSourceNode(src_raw: []const u8) []const u8 { + if (std.mem.indexOfScalar(u8, src_raw, ':')) |colon_pos| { + return src_raw[0..colon_pos]; + } + return src_raw; +} + +fn hasRouteEdge(edge_sources: []const []const u8, edge_targets: []const []const u8, node_name: []const u8, route_key: []const u8, target: []const u8) bool { + for (edge_sources, edge_targets) |src_raw, edge_target| { + if (!std.mem.eql(u8, edge_target, target)) continue; + const colon_pos = std.mem.indexOfScalar(u8, src_raw, ':') orelse continue; + if (!std.mem.eql(u8, src_raw[0..colon_pos], node_name)) continue; + if (std.mem.eql(u8, src_raw[colon_pos + 1 ..], route_key)) return true; + } + return false; +} + +fn getJsonStringFromObj(obj: std.json.ObjectMap, key: []const u8) ?[]const u8 { + const val = obj.get(key) orelse return null; + if (val == .string) return val.string; + return null; +} + +/// Scan `text` for {{state.KEY}} references and check them against schema. +fn checkStateRefs( + alloc: Allocator, + errors: *std.ArrayListUnmanaged(ValidationError), + schema: std.json.ObjectMap, + node_name: []const u8, + text: []const u8, +) !void { + var pos: usize = 0; + while (pos < text.len) { + // Find "{{" + const open = std.mem.indexOfPos(u8, text, pos, "{{") orelse break; + const close = std.mem.indexOfPos(u8, text, open + 2, "}}") orelse break; + const expr = text[open + 2 .. close]; + pos = close + 2; + + // Check if it's "state.KEY" + if (std.mem.startsWith(u8, expr, "state.")) { + const key = expr["state.".len..]; + if (key.len > 0 and !schema.contains(key)) { + // Copy strings to avoid dangling references after parsed.deinit() + // (We'll do a bulk copy in copyErrors later, but here we need + // to store enough info. We store literals or slices into + // node_name/field_name which come from the parsed JSON tree; + // copyErrors will deep-copy them.) + try errors.append(alloc, .{ + .err_type = "undefined_state_key", + .node = node_name, + .key = key, + .message = "state key referenced in template is not defined in state_schema", + }); + } + } + } +} + +/// Deep-copy all strings in the error list into alloc-owned memory. +/// This is needed because the source strings point into a parsed JSON tree +/// that will be freed after validate() returns. +fn copyErrors(alloc: Allocator, src: []const ValidationError) ![]ValidationError { + const result = try alloc.alloc(ValidationError, src.len); + for (src, 0..) |e, i| { + result[i] = .{ + .err_type = try alloc.dupe(u8, e.err_type), + .node = if (e.node) |n| try alloc.dupe(u8, n) else null, + .key = if (e.key) |k| try alloc.dupe(u8, k) else null, + .message = try alloc.dupe(u8, e.message), + }; + } + return result; +} + +// ── Tests: legacy ───────────────────────────────────────────────────── test "validateStepsForCreateRun: valid workflow" { const allocator = std.testing.allocator; @@ -249,58 +702,6 @@ test "validateStepsForCreateRun: rejects duplicate depends_on item" { try std.testing.expectError(error.DependsOnDuplicate, validateStepsForCreateRun(allocator, parsed.value.array.items)); } -test "validateStepsForCreateRun: rejects missing sub_workflow workflow field" { - const allocator = std.testing.allocator; - const payload = - \\[ - \\ {"id":"sw","type":"sub_workflow"} - \\] - ; - - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{}); - defer parsed.deinit(); - try std.testing.expectError(error.SubWorkflowRequired, validateStepsForCreateRun(allocator, parsed.value.array.items)); -} - -test "validateStepsForCreateRun: rejects missing saga body field" { - const allocator = std.testing.allocator; - const payload = - \\[ - \\ {"id":"sg","type":"saga"} - \\] - ; - - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{}); - defer parsed.deinit(); - try std.testing.expectError(error.SagaBodyRequired, validateStepsForCreateRun(allocator, parsed.value.array.items)); -} - -test "validateStepsForCreateRun: rejects missing debate count field" { - const allocator = std.testing.allocator; - const payload = - \\[ - \\ {"id":"db","type":"debate","prompt_template":"x"} - \\] - ; - - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{}); - defer parsed.deinit(); - try std.testing.expectError(error.DebateCountRequired, validateStepsForCreateRun(allocator, parsed.value.array.items)); -} - -test "validateStepsForCreateRun: rejects missing group_chat participants field" { - const allocator = std.testing.allocator; - const payload = - \\[ - \\ {"id":"gc","type":"group_chat","prompt_template":"x"} - \\] - ; - - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{}); - defer parsed.deinit(); - try std.testing.expectError(error.GroupChatParticipantsRequired, validateStepsForCreateRun(allocator, parsed.value.array.items)); -} - test "validateStepsForCreateRun: rejects non-object retry field" { const allocator = std.testing.allocator; const payload = @@ -340,41 +741,122 @@ test "validateStepsForCreateRun: rejects non-positive timeout_ms" { try std.testing.expectError(error.TimeoutMsMustBePositiveInteger, validateStepsForCreateRun(allocator, parsed.value.array.items)); } -test "validateStepsForCreateRun: rejects invalid wait duration string" { - const allocator = std.testing.allocator; - const payload = - \\[ - \\ {"id":"w","type":"wait","duration_ms":"abc"} - \\] +// ── Tests: new graph validation ──────────────────────────────────────── + +test "validate valid simple workflow" { + const alloc = std.testing.allocator; + const wf = + \\{"state_schema":{"msg":{"type":"string","reducer":"last_value"}},"nodes":{"a":{"type":"task","prompt":"{{state.msg}}"}},"edges":[["__start__","a"],["a","__end__"]]} ; + const errors = try validate(alloc, wf); + defer { + for (errors) |e| { + alloc.free(e.err_type); + if (e.node) |n| alloc.free(n); + if (e.key) |k| alloc.free(k); + alloc.free(e.message); + } + alloc.free(errors); + } + try std.testing.expectEqual(@as(usize, 0), errors.len); +} - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{}); - defer parsed.deinit(); - try std.testing.expectError(error.WaitDurationInvalid, validateStepsForCreateRun(allocator, parsed.value.array.items)); +test "validate unreachable node" { + const alloc = std.testing.allocator; + const wf = + \\{"state_schema":{},"nodes":{"a":{"type":"task","prompt":"x"},"orphan":{"type":"task","prompt":"y"}},"edges":[["__start__","a"],["a","__end__"]]} + ; + const errors = try validate(alloc, wf); + defer { + for (errors) |e| { + alloc.free(e.err_type); + if (e.node) |n| alloc.free(n); + if (e.key) |k| alloc.free(k); + alloc.free(e.message); + } + alloc.free(errors); + } + try std.testing.expect(errors.len > 0); + try std.testing.expectEqualStrings("unreachable_node", errors[0].err_type); } -test "validateStepsForCreateRun: rejects negative wait duration" { - const allocator = std.testing.allocator; - const payload = - \\[ - \\ {"id":"w","type":"wait","duration_ms":-1} - \\] +test "validate undefined state key" { + const alloc = std.testing.allocator; + const wf = + \\{"state_schema":{"msg":{"type":"string","reducer":"last_value"}},"nodes":{"a":{"type":"task","prompt":"{{state.typo}}"}},"edges":[["__start__","a"],["a","__end__"]]} ; + const errors = try validate(alloc, wf); + defer { + for (errors) |e| { + alloc.free(e.err_type); + if (e.node) |n| alloc.free(n); + if (e.key) |k| alloc.free(k); + alloc.free(e.message); + } + alloc.free(errors); + } + try std.testing.expect(errors.len > 0); + try std.testing.expectEqualStrings("undefined_state_key", errors[0].err_type); +} - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{}); - defer parsed.deinit(); - try std.testing.expectError(error.WaitDurationInvalid, validateStepsForCreateRun(allocator, parsed.value.array.items)); +test "validate send target exempt from reachability" { + const alloc = std.testing.allocator; + const wf = + \\{"state_schema":{"items":{"type":"array","reducer":"last_value"},"results":{"type":"array","reducer":"append"}},"nodes":{"s":{"type":"send","items_key":"state.items","target_node":"worker","output_key":"results"},"worker":{"type":"task","prompt":"do work"}},"edges":[["__start__","s"],["s","__end__"]]} + ; + const errors = try validate(alloc, wf); + defer { + for (errors) |e| { + alloc.free(e.err_type); + if (e.node) |n| alloc.free(n); + if (e.key) |k| alloc.free(k); + alloc.free(e.message); + } + alloc.free(errors); + } + try std.testing.expectEqual(@as(usize, 0), errors.len); } -test "validateStepsForCreateRun: rejects invalid wait signal type" { - const allocator = std.testing.allocator; - const payload = - \\[ - \\ {"id":"w","type":"wait","signal":1} - \\] +test "validate invalid route target" { + const alloc = std.testing.allocator; + const wf = + \\{"state_schema":{"x":{"type":"string","reducer":"last_value"}},"nodes":{"r":{"type":"route","input":"state.x","routes":{"a":"nonexistent"}}},"edges":[["__start__","r"],["r:a","nonexistent"]]} ; + const errors = try validate(alloc, wf); + defer { + for (errors) |e| { + alloc.free(e.err_type); + if (e.node) |n| alloc.free(n); + if (e.key) |k| alloc.free(k); + alloc.free(e.message); + } + alloc.free(errors); + } + // Should have error about nonexistent node (either in route target or edge target) + try std.testing.expect(errors.len > 0); +} - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, payload, .{}); - defer parsed.deinit(); - try std.testing.expectError(error.WaitSignalInvalid, validateStepsForCreateRun(allocator, parsed.value.array.items)); +test "validate route requires matching conditional edges for declared routes" { + const alloc = std.testing.allocator; + const wf = + \\{"state_schema":{"x":{"type":"string","reducer":"last_value"}},"nodes":{"r":{"type":"route","input":"state.x","routes":{"yes":"approved"},"default":"yes"},"approved":{"type":"task","prompt":"approve"}},"edges":[["__start__","r"],["r:no","approved"],["approved","__end__"]]} + ; + const errors = try validate(alloc, wf); + defer { + for (errors) |e| { + alloc.free(e.err_type); + if (e.node) |n| alloc.free(n); + if (e.key) |k| alloc.free(k); + alloc.free(e.message); + } + alloc.free(errors); + } + var found_missing_route_edge = false; + for (errors) |err| { + if (std.mem.eql(u8, err.err_type, "missing_route_edge")) { + found_missing_route_edge = true; + break; + } + } + try std.testing.expect(found_missing_route_edge); } diff --git a/src/workspace.zig b/src/workspace.zig index c8b0d39..b7ecd9b 100644 --- a/src/workspace.zig +++ b/src/workspace.zig @@ -14,6 +14,52 @@ pub fn sanitizeId(allocator: std.mem.Allocator, id: []const u8) ![]const u8 { return buf; } +/// Validate that a workspace path is safely contained within the workspace root. +/// Returns true if the canonical workspace_path starts with the canonical root +/// and contains no invalid characters. Returns false if a symlink escape or +/// directory traversal is detected. +pub fn validateWorkspacePath(allocator: std.mem.Allocator, workspace_root: []const u8, workspace_path: []const u8) bool { + // Check for invalid characters (\n, \r, \0) in the raw path + for (workspace_path) |ch| { + if (ch == '\n' or ch == '\r' or ch == 0) { + log.warn("workspace path contains invalid character: {s}", .{workspace_path}); + return false; + } + } + + // Canonicalize both paths (resolves symlinks) + const canon_root = std.fs.cwd().realpathAlloc(allocator, workspace_root) catch { + log.warn("workspace: cannot resolve root {s}", .{workspace_root}); + return false; + }; + defer allocator.free(canon_root); + + const canon_path = std.fs.cwd().realpathAlloc(allocator, workspace_path) catch { + log.warn("workspace: cannot resolve path {s}", .{workspace_path}); + return false; + }; + defer allocator.free(canon_path); + + // Check that canonical workspace_path starts with canonical workspace_root + if (!std.mem.startsWith(u8, canon_path, canon_root)) { + log.warn("workspace path escape detected: {s} is not under {s}", .{ canon_path, canon_root }); + return false; + } + + // Ensure there's a separator after the root (not just a prefix match on a longer name) + if (canon_path.len > canon_root.len and canon_path[canon_root.len] != std.fs.path.sep) { + log.warn("workspace path escape detected: {s} is not under {s}", .{ canon_path, canon_root }); + return false; + } + + return true; +} + +/// Sanitize a directory name by replacing any character not in [A-Za-z0-9._-] +/// with '_'. This prevents directory traversal via task identifiers. +/// Alias for sanitizeId — same logic, exported under the canonical name. +pub const sanitizeDirectoryName = sanitizeId; + /// An isolated workspace directory for a single task. pub const Workspace = struct { root: []const u8, @@ -45,6 +91,13 @@ pub const Workspace = struct { return err; }; + // Validate the created path is safely under the workspace root + if (!validateWorkspacePath(allocator, root, path)) { + log.warn("workspace: path validation failed for {s}, refusing to use", .{path}); + allocator.free(path); + return error.PathValidationFailed; + } + // If the directory already had contents it was not freshly created var dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); defer dir.close(); @@ -114,7 +167,11 @@ pub fn cleanAll(root: []const u8) void { /// Returns true when the command exits with code 0, false otherwise. /// Times out after `timeout_ms` milliseconds (the child is killed on timeout). pub fn runHook(allocator: std.mem.Allocator, command: []const u8, cwd: []const u8, timeout_ms: u64) !bool { - const argv = [_][]const u8{ "/bin/sh", "-lc", command }; + const native = @import("builtin").os.tag; + const argv = if (native == .windows) + [_][]const u8{ "cmd.exe", "/C", command } + else + [_][]const u8{ "/bin/sh", "-lc", command }; var child = std.process.Child.init(&argv, allocator); child.cwd = cwd; @@ -215,6 +272,9 @@ test "Workspace create and remove" { } test "runHook executes shell command" { + const native = @import("builtin").os.tag; + if (native == .windows) return error.SkipZigTest; + const allocator = std.testing.allocator; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); @@ -233,6 +293,9 @@ test "runHook executes shell command" { } test "runHook returns false for failing command" { + const native = @import("builtin").os.tag; + if (native == .windows) return error.SkipZigTest; + const allocator = std.testing.allocator; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); @@ -262,3 +325,38 @@ test "cleanAll removes all subdirectories" { try std.testing.expectError(error.FileNotFound, tmp.dir.openDir("task-001", .{})); try std.testing.expectError(error.FileNotFound, tmp.dir.openDir("task-002", .{})); } + +test "validateWorkspacePath accepts safe path" { + const allocator = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + const root = try tmp.dir.realpathAlloc(allocator, "."); + defer allocator.free(root); + + // Create a subdirectory + try tmp.dir.makeDir("safe-task"); + const sub_path = try std.fs.path.join(allocator, &.{ root, "safe-task" }); + defer allocator.free(sub_path); + + try std.testing.expect(validateWorkspacePath(allocator, root, sub_path)); +} + +test "validateWorkspacePath rejects path outside root" { + const allocator = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + const root = try tmp.dir.realpathAlloc(allocator, "."); + defer allocator.free(root); + + // /tmp is definitely not under the test temp dir + try std.testing.expect(!validateWorkspacePath(allocator, root, "/tmp")); +} + +test "sanitizeDirectoryName replaces invalid chars" { + const allocator = std.testing.allocator; + const result = try sanitizeDirectoryName(allocator, "../../etc/passwd"); + defer allocator.free(result); + try std.testing.expectEqualStrings(".._.._etc_passwd", result); +}