From 3e008e2048ca4ed9379c52a4aacbcd6cf95ea9d5 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Thu, 19 Feb 2026 13:50:36 +0000 Subject: [PATCH 1/9] feat: add coordinator/delegate expert separation Introduce a formal distinction between coordinator and delegate experts based on naming convention (@scope/name for delegates). Enforce delegation rules via Zod schema validation and provide type-specific meta-prompts. - Add expert-type utilities for classification and delegation validation - Split meta-prompt: coordinators get parallelism/task-splitting guidance - Validate delegation rules in expertSchema via superRefine - Rename E2E and root config delegates to @coordinator/name convention - Remove old examples directory Co-Authored-By: Claude Opus 4.6 --- .changeset/coordinator-delegate-separation.md | 8 + e2e/experts/delegate-chain.toml | 16 +- e2e/experts/mixed-tools.toml | 8 +- e2e/experts/parallel-delegate.toml | 12 +- e2e/perstack-cli/continue.test.ts | 2 +- e2e/perstack-cli/delegate.test.ts | 10 +- e2e/perstack-cli/interactive.test.ts | 2 +- examples/README.md | 53 --- examples/bug-finder/README.md | 76 ---- examples/bug-finder/perstack.toml | 88 ---- examples/github-issue-bot/README.md | 150 ------- .../github-issue-bot/checkpoint-filter.ts | 128 ------ examples/github-issue-bot/issue-bot.yml | 50 --- examples/github-issue-bot/perstack.toml | 51 --- examples/gmail-assistant/.gitignore | 3 - examples/gmail-assistant/README.md | 276 ------------- examples/gmail-assistant/filter.ts | 124 ------ examples/gmail-assistant/perstack.toml | 380 ------------------ packages/core/src/index.ts | 1 + packages/core/src/schemas/expert.test.ts | 35 ++ packages/core/src/schemas/expert.ts | 18 +- packages/core/src/schemas/runtime.ts | 4 +- packages/core/src/utils/expert-type.test.ts | 119 ++++++ packages/core/src/utils/expert-type.ts | 87 ++++ .../src/messages/instruction-message.ts | 30 +- .../states/calling-delegates.test.ts | 72 ++-- packages/runtime/test/run-params.ts | 2 +- packages/skill-manager/src/skill-manager.ts | 3 + 28 files changed, 361 insertions(+), 1447 deletions(-) create mode 100644 .changeset/coordinator-delegate-separation.md delete mode 100644 examples/README.md delete mode 100644 examples/bug-finder/README.md delete mode 100644 examples/bug-finder/perstack.toml delete mode 100644 examples/github-issue-bot/README.md delete mode 100644 examples/github-issue-bot/checkpoint-filter.ts delete mode 100644 examples/github-issue-bot/issue-bot.yml delete mode 100644 examples/github-issue-bot/perstack.toml delete mode 100644 examples/gmail-assistant/.gitignore delete mode 100644 examples/gmail-assistant/README.md delete mode 100644 examples/gmail-assistant/filter.ts delete mode 100644 examples/gmail-assistant/perstack.toml create mode 100644 packages/core/src/utils/expert-type.test.ts create mode 100644 packages/core/src/utils/expert-type.ts diff --git a/.changeset/coordinator-delegate-separation.md b/.changeset/coordinator-delegate-separation.md new file mode 100644 index 00000000..f25304e6 --- /dev/null +++ b/.changeset/coordinator-delegate-separation.md @@ -0,0 +1,8 @@ +--- +"@perstack/core": patch +"@perstack/runtime": patch +"@perstack/skill-manager": patch +"@perstack/installer": patch +--- + +Add coordinator/delegate expert separation with naming convention, delegation rule enforcement, and type-specific meta-prompts diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml index 466b0497..deb099fc 100644 --- a/e2e/experts/delegate-chain.toml +++ b/e2e/experts/delegate-chain.toml @@ -9,10 +9,10 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for delegate chain" instruction = """ -1. Delegate to "e2e-delegate-level1" with "test" +1. Delegate to "level1" with "test" 2. When done, call attemptCompletion """ -delegates = ["e2e-delegate-level1"] +delegates = ["@e2e-delegate-chain/level1"] [experts."e2e-delegate-chain".skills."@perstack/base"] type = "mcpStdioSkill" @@ -20,29 +20,29 @@ command = "npx" packageName = "@perstack/base" pick = ["attemptCompletion"] -[experts."e2e-delegate-level1"] +[experts."@e2e-delegate-chain/level1"] version = "1.0.0" description = "First level delegate expert" instruction = """ -1. Delegate to "e2e-delegate-level2" with "test" +1. Delegate to "level2" with "test" 2. When done, call attemptCompletion """ -delegates = ["e2e-delegate-level2"] +delegates = ["@e2e-delegate-chain/level2"] -[experts."e2e-delegate-level1".skills."@perstack/base"] +[experts."@e2e-delegate-chain/level1".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" pick = ["attemptCompletion"] -[experts."e2e-delegate-level2"] +[experts."@e2e-delegate-chain/level2"] version = "1.0.0" description = "Second level delegate expert" instruction = """ Call attemptCompletion. """ -[experts."e2e-delegate-level2".skills."@perstack/base"] +[experts."@e2e-delegate-chain/level2".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml index c304e74f..399b379b 100644 --- a/e2e/experts/mixed-tools.toml +++ b/e2e/experts/mixed-tools.toml @@ -11,10 +11,10 @@ description = "E2E test expert for mixed tool calls (MCP + Delegate + Interactiv instruction = """ Call ALL THREE tools in ONE response: 1. web_search_exa with query "test" -2. e2e-helper with query "test" +2. helper with query "test" 3. askUser with question "confirm?" """ -delegates = ["e2e-helper"] +delegates = ["@e2e-mixed-tools/helper"] [experts."e2e-mixed-tools".skills."exa"] type = "mcpStdioSkill" @@ -38,14 +38,14 @@ command = "npx" packageName = "@perstack/base" pick = ["attemptCompletion", "think"] -[experts."e2e-helper"] +[experts."@e2e-mixed-tools/helper"] version = "1.0.0" description = "E2E test helper expert" instruction = """ Call attemptCompletion. """ -[experts."e2e-helper".skills."@perstack/base"] +[experts."@e2e-mixed-tools/helper".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" diff --git a/e2e/experts/parallel-delegate.toml b/e2e/experts/parallel-delegate.toml index 584064dd..e07789e5 100644 --- a/e2e/experts/parallel-delegate.toml +++ b/e2e/experts/parallel-delegate.toml @@ -9,10 +9,10 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for parallel delegation" instruction = """ -1. In ONE response, delegate to BOTH "e2e-delegate-math" and "e2e-delegate-text" with "test" +1. In ONE response, delegate to BOTH "math" and "text" with "test" 2. When both return, call attemptCompletion """ -delegates = ["e2e-delegate-math", "e2e-delegate-text"] +delegates = ["@e2e-parallel-delegate/math", "@e2e-parallel-delegate/text"] [experts."e2e-parallel-delegate".skills."@perstack/base"] type = "mcpStdioSkill" @@ -20,27 +20,27 @@ command = "npx" packageName = "@perstack/base" pick = ["attemptCompletion", "think"] -[experts."e2e-delegate-math"] +[experts."@e2e-parallel-delegate/math"] version = "1.0.0" description = "Math delegate expert" instruction = """ Call attemptCompletion. """ -[experts."e2e-delegate-math".skills."@perstack/base"] +[experts."@e2e-parallel-delegate/math".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" pick = ["attemptCompletion"] -[experts."e2e-delegate-text"] +[experts."@e2e-parallel-delegate/text"] version = "1.0.0" description = "Text processing delegate expert" instruction = """ Call attemptCompletion. """ -[experts."e2e-delegate-text".skills."@perstack/base"] +[experts."@e2e-parallel-delegate/text".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts index 4f64442f..64f44da7 100644 --- a/e2e/perstack-cli/continue.test.ts +++ b/e2e/perstack-cli/continue.test.ts @@ -100,7 +100,7 @@ describe.concurrent("Continue Job", () => { const delegateToolCalls = callToolsEvents.flatMap((e) => { const toolCalls = (e as { toolCalls?: { toolName: string }[] }).toolCalls ?? [] // Delegate tools have the same name as the expert key (e.g., "e2e-delegate-math") - return toolCalls.filter((tc) => tc.toolName.startsWith("e2e-delegate-")) + return toolCalls.filter((tc) => ["math", "text"].includes(tc.toolName)) }) expect(delegateToolCalls.length).toBe(2) const initialCompleteCount = getEventSequence(initialResult.events).filter( diff --git a/e2e/perstack-cli/delegate.test.ts b/e2e/perstack-cli/delegate.test.ts index 12155805..a1ef4975 100644 --- a/e2e/perstack-cli/delegate.test.ts +++ b/e2e/perstack-cli/delegate.test.ts @@ -54,11 +54,11 @@ describe("Delegate to Expert", () => { expect(controlFlow).toEqual([ "startRun:e2e-delegate-chain", "stopRunByDelegate:e2e-delegate-chain", - "startRun:e2e-delegate-level1", - "stopRunByDelegate:e2e-delegate-level1", - "startRun:e2e-delegate-level2", - "completeRun:e2e-delegate-level2", - "completeRun:e2e-delegate-level1", + "startRun:@e2e-delegate-chain/level1", + "stopRunByDelegate:@e2e-delegate-chain/level1", + "startRun:@e2e-delegate-chain/level2", + "completeRun:@e2e-delegate-chain/level2", + "completeRun:@e2e-delegate-chain/level1", "completeRun:e2e-delegate-chain", ]) diff --git a/e2e/perstack-cli/interactive.test.ts b/e2e/perstack-cli/interactive.test.ts index fc16654c..17745517 100644 --- a/e2e/perstack-cli/interactive.test.ts +++ b/e2e/perstack-cli/interactive.test.ts @@ -25,7 +25,7 @@ const LLM_TIMEOUT = 180000 describe("Interactive Input", () => { /** * Verifies mixed tool call processing order and checkpoint states. - * Expert calls 3 tools in parallel: web_search_exa, e2e-helper, askUser. + * Expert calls 3 tools in parallel: web_search_exa, helper, askUser. */ it("should handle mixed tool calls with delegate and interactive stop", async () => { const cmdResult = await runCli( diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index 97c35a77..00000000 --- a/examples/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Examples - -Examples of Perstack Experts demonstrating capabilities. - -## Available Examples - -| Example | Description | Registry | -| --------------------------------------- | --------------------------------------------------------- | ---------------------------- | -| [bug-finder](./bug-finder/) | Codebase analyzer for finding potential bugs | Local only | -| [github-issue-bot](./github-issue-bot/) | Automated issue responder with real-time activity logging | `@perstack/github-issue-bot` | -| [gmail-assistant](./gmail-assistant/) | Email assistant with Gmail search and local knowledge | Local only | - -## Quick Start - -### Using from Registry - -```bash -# Run published Expert directly -npx perstack run @perstack/github-issue-bot "Answer issue #123" -``` - -### Running Locally - -```bash -cd examples/github-issue-bot -perstack run @perstack/github-issue-bot "Answer issue #123" -``` - -## Example Structure - -Each example contains: - -``` -examples// -├── perstack.toml # Expert definition -├── README.md # Setup and usage guide -└── *.ts # Supporting scripts (if any) -``` - -## Publishing Your Own - -```bash -cd examples/ -export PERSTACK_API_KEY=your-key -perstack publish -``` - -## Contributing - -1. Create directory under `examples/` -2. Add `perstack.toml` with Expert definition -3. Add `README.md` with setup instructions -4. Update this README table diff --git a/examples/bug-finder/README.md b/examples/bug-finder/README.md deleted file mode 100644 index a4699ac6..00000000 --- a/examples/bug-finder/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# Bug Finder - -Codebase analyzer that systematically finds potential bugs through code review. - -| | | -| ------------ | --------------------------------- | -| **Purpose** | Find potential bugs in codebases | -| **Expert** | `bug-finder` | -| **Skills** | `@perstack/base` only | -| **Sandbox** | Local runtime | -| **Registry** | Local only | - -## Quick Start - -### Local Usage - -```bash -cd examples/bug-finder -export ANTHROPIC_API_KEY=your-key - -npx perstack start bug-finder "Find 3 bugs in src/" -``` - -## Bug Categories - -The Expert looks for these common bug patterns: - -| Category | Examples | -| -------------------- | --------------------------------------------- | -| Logic errors | Incorrect conditions, wrong operators | -| Unhandled edge cases | null/undefined, empty arrays, boundary values | -| Type safety issues | Type coercion, any types, missing null checks | -| Resource leaks | Unclosed connections, missing cleanup | -| Race conditions | async/await issues, shared state | -| Off-by-one errors | Array indices, loop bounds | -| Error handling gaps | Swallowed errors, missing try-catch | -| Security issues | Injection, path traversal, unsanitized input | - -## Output Format - -Each bug is reported with: - -``` -## Bug #N: [Brief Title] -- **File**: path/to/file.ts:LINE -- **Severity**: Critical/High/Medium/Low -- **Description**: What the bug is -- **Impact**: What could go wrong -- **Fix**: How to fix it -``` - -## Example Queries - -| Query | What happens | -| ------------------------------------ | ----------------------------------- | -| "Find 3 bugs in src/" | Finds top 3 bugs in src directory | -| "Find security issues in lib/" | Focuses on security vulnerabilities | -| "Analyze auth.ts for bugs" | Reviews a specific file | -| "Find race conditions in async code" | Looks for specific bug category | - -## Files - -| File | Purpose | -| --------------- | ------------------ | -| `perstack.toml` | Expert definition | -| `README.md` | This documentation | - - - - - - - - - - diff --git a/examples/bug-finder/perstack.toml b/examples/bug-finder/perstack.toml deleted file mode 100644 index 963f168a..00000000 --- a/examples/bug-finder/perstack.toml +++ /dev/null @@ -1,88 +0,0 @@ -model = "claude-sonnet-4-5" - -[provider] -providerName = "anthropic" - -[experts."bug-finder"] -version = "1.0.0" -description = "Systematically analyzes codebase to find potential bugs" -instruction = """ -You are a senior software engineer specialized in finding bugs in codebases. - -## Your Task - -When given a target directory and a number of bugs to find, systematically analyze the code to identify potential issues. - -## Workflow - -1. **Understand the structure** - - Use `listDirectory` to explore the target directory - - Identify source files (*.ts, *.js, *.tsx, *.jsx, etc.) - - Prioritize files that contain business logic over config files - -2. **Analyze systematically** - - Read files using `readTextFile` - - Look for common bug patterns: - - Logic errors (incorrect conditions, wrong operators) - - Edge cases not handled (null/undefined, empty arrays, boundary values) - - Type safety issues (type coercion, any types, missing null checks) - - Resource leaks (unclosed connections, missing cleanup) - - Race conditions (async/await issues, shared state) - - Off-by-one errors (array indices, loop bounds) - - Error handling gaps (swallowed errors, missing try-catch) - - Security issues (injection, path traversal, unsanitized input) - -3. **Use `think` tool** - - Before reporting, use the think tool to organize your findings - - Prioritize bugs by severity (critical, high, medium, low) - -4. **Report findings** - - Use `attemptCompletion` with a structured report - - Include for each bug: - - File path and line number(s) - - Bug description - - Severity level - - Potential impact - - Suggested fix - -## Output Format - -Report each bug with: -``` -## Bug #N: [Brief Title] -- **File**: path/to/file.ts:LINE -- **Severity**: Critical/High/Medium/Low -- **Description**: What the bug is -- **Impact**: What could go wrong -- **Fix**: How to fix it -``` - -## Guidelines - -- Focus on real bugs, not style issues -- Provide actionable findings with specific line numbers -- Prioritize high-impact bugs -- Be thorough but efficient -""" - -[experts."bug-finder".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -pick = [ - "listDirectory", - "readTextFile", - "getFileInfo", - "think", - "attemptCompletion", -] - - - - - - - - - - diff --git a/examples/github-issue-bot/README.md b/examples/github-issue-bot/README.md deleted file mode 100644 index 37c36e72..00000000 --- a/examples/github-issue-bot/README.md +++ /dev/null @@ -1,150 +0,0 @@ -# GitHub Issue Bot - -Automated GitHub issue responder that reads your codebase to answer questions. - -| | | -| ------------ | -------------------------------------------- | -| **Purpose** | Answer GitHub issues by reading actual code | -| **Expert** | `@perstack/github-issue-bot` | -| **Skills** | `@perstack/base` only | -| **Sandbox** | GitHub Actions | -| **Trigger** | Issue opened / `@perstack-issue-bot` mention | -| **Registry** | Published | - -## Quick Start - -### 1. Copy Files - -Copy these files to your repository: - -``` -your-repo/ -├── .github/ -│ └── workflows/ -│ └── issue-bot.yml ← from this directory -└── scripts/ - └── checkpoint-filter.ts ← from this directory -``` - -### 2. Add Dependencies - -Add to your `package.json`: - -```json -{ - "devDependencies": { - "tsx": "^4.0.0" - } -} -``` - -### 3. Update Workflow Path - -In `issue-bot.yml`, update the path to match your structure: - -```yaml -npx perstack run @perstack/github-issue-bot "..." 2>&1 | npx tsx scripts/checkpoint-filter.ts -``` - -### 4. Add Secret - -Go to **Settings → Secrets and variables → Actions → New repository secret** - -Add `ANTHROPIC_API_KEY` with your Anthropic API key. - -### 5. Done! - -- New issues automatically get a response -- Comment `@perstack-issue-bot ` for follow-up questions - ---- - -## How checkpoint-filter.ts Works - -`checkpoint-filter.ts` is a working example of processing Perstack's event stream. - -### Event Stream - -When you run `perstack run`, it outputs JSON events to stdout: - -```json -{"type":"callTool","toolCall":{"toolName":"think","args":{"thought":"..."}}} -{"type":"callTool","toolCall":{"toolName":"readTextFile","args":{"path":"..."}}} -{"type":"completeRun","text":"Final answer here"} -``` - -### Processing Events - -The filter reads stdin line by line, parses JSON, and formats human-readable output: - -```typescript -function formatEvent(event: Record): string | null { - const type = event.type as string - switch (type) { - case "callTool": { - const toolCall = event.toolCall as Record - const toolName = toolCall?.toolName as string - const args = toolCall?.args as Record - if (toolName === "think") { - return `💭 ${args?.thought}` - } - if (toolName === "readTextFile") { - return `📖 Reading: ${args?.path}` - } - // ... more tool handlers - } - case "completeRun": { - finalAnswer = event.text as string - return "✅ Done" - } - } -} -``` - -### Event Types - -| Event Type | Description | Example Output | -| ------------- | ----------------------------- | ----------------------- | -| `callTool` | Tool invocation | `📖 Reading: src/app.ts` | -| `completeRun` | Run completed with final text | `✅ Done` | - -### Tool-specific Formatting - -| Tool Name | Icon | Output | -| ------------------- | ---- | --------------------------- | -| `think` | 💭 | Thought content (truncated) | -| `readTextFile` | 📖 | File path | -| `listDirectory` | 📁 | Directory path | -| `exec` | ⚡ | Command + args | -| `attemptCompletion` | ✨ | "Generating answer..." | - ---- - -## Trigger Conditions - -| Event | Trigger | -| ----------------- | ---------------------------------- | -| New issue created | Automatic | -| Comment | Must contain `@perstack-issue-bot` | - -## Customization - -Edit the workflow file to change the model: - -```yaml -cat < perstack.toml -model = "gpt-4o" # or claude-sonnet-4-5, etc. -[provider] -providerName = "openai" # or anthropic -EOF -``` - -For OpenAI, add `OPENAI_API_KEY` secret instead. - -## Files - -| File | Purpose | -| ---------------------- | ---------------------------------------- | -| `issue-bot.yml` | GitHub Actions workflow | -| `checkpoint-filter.ts` | Event stream processor (working example) | -| `perstack.toml` | Expert definition | diff --git a/examples/github-issue-bot/checkpoint-filter.ts b/examples/github-issue-bot/checkpoint-filter.ts deleted file mode 100644 index 06d184fb..00000000 --- a/examples/github-issue-bot/checkpoint-filter.ts +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env npx tsx -import * as readline from "node:readline" - -const COMMENT_ID = process.env.COMMENT_ID -const GITHUB_REPO = process.env.GITHUB_REPO -const GH_TOKEN = process.env.GH_TOKEN - -if (!COMMENT_ID || !GITHUB_REPO || !GH_TOKEN) { - console.error("Missing required env vars: COMMENT_ID, GITHUB_REPO, GH_TOKEN") - process.exit(1) -} - -const logs: string[] = [] -let finalAnswer = "" -let updatePending = false - -async function updateComment() { - if (updatePending) return - updatePending = true - await new Promise((resolve) => setTimeout(resolve, 500)) - updatePending = false - let body: string - if (finalAnswer) { - body = `${finalAnswer} - ---- - -
-🤖 Bot Activity - -\`\`\` -${logs.join("\n")} -\`\`\` - -
` - } else { - body = `
-🤖 Processing... - -\`\`\` -${logs.join("\n")} -\`\`\` - -
` - } - try { - const res = await fetch( - `https://api.github.com/repos/${GITHUB_REPO}/issues/comments/${COMMENT_ID}`, - { - method: "PATCH", - headers: { - Authorization: `Bearer ${GH_TOKEN}`, - "Content-Type": "application/json", - Accept: "application/vnd.github+json", - }, - body: JSON.stringify({ body }), - }, - ) - if (!res.ok) { - console.error("Failed to update comment:", await res.text()) - } - } catch (e) { - console.error("Error updating comment:", e) - } -} - -function formatEvent(event: Record): string | null { - const type = event.type as string - switch (type) { - case "callTool": { - const toolCall = event.toolCall as Record - const toolName = toolCall?.toolName as string - const args = toolCall?.args as Record - if (toolName === "think") { - const thought = args?.thought as string - if (thought) return `💭 ${thought.slice(0, 100)}${thought.length > 100 ? "..." : ""}` - } - if (toolName === "todo") { - return `📋 Updating todo list...` - } - if (toolName === "readTextFile") { - const path = args?.path as string - if (path) return `📖 Reading: ${path}` - } - if (toolName === "exec") { - const command = args?.command as string - const cmdArgs = args?.args as string[] - if (command) return `⚡ Exec: ${command} ${(cmdArgs || []).slice(0, 3).join(" ")}` - } - if (toolName === "attemptCompletion") { - return "✨ Generating answer..." - } - return null - } - case "completeRun": { - const text = (event as Record).text as string - if (text) finalAnswer = text - return "✅ Done" - } - default: - return null - } -} - -const rl = readline.createInterface({ - input: process.stdin, - output: process.stdout, - terminal: false, -}) - -rl.on("line", (line) => { - process.stdout.write(`${line}\n`) - try { - const event = JSON.parse(line) as Record - const formatted = formatEvent(event) - if (formatted) { - logs.push(formatted) - updateComment() - } - } catch { - // Not JSON, pass through - } -}) - -rl.on("close", async () => { - logs.push("---") - await updateComment() -}) diff --git a/examples/github-issue-bot/issue-bot.yml b/examples/github-issue-bot/issue-bot.yml deleted file mode 100644 index 4d7c8047..00000000 --- a/examples/github-issue-bot/issue-bot.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Issue Bot -on: - issue_comment: - types: [created] - issues: - types: [opened] -jobs: - respond: - if: | - (github.event_name == 'issues') || - (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@perstack-issue-bot')) - runs-on: ubuntu-latest - permissions: - issues: write - contents: read - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-node@v4 - with: - node-version: "22" - - name: Add reaction to indicate processing - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - if [ "${{ github.event_name }}" = "issue_comment" ]; then - gh api repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions -f content=eyes - else - gh api repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/reactions -f content=eyes - fi - - name: Post processing status - id: status_comment - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - COMMENT_URL=$(gh issue comment ${{ github.event.issue.number }} --body "🤖 Starting...") - COMMENT_ID=$(echo "$COMMENT_URL" | grep -oE '[0-9]+$') - echo "comment_id=$COMMENT_ID" >> $GITHUB_OUTPUT - - name: Run Issue Bot - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_REPO: ${{ github.repository }} - ISSUE_NUMBER: ${{ github.event.issue.number }} - COMMENT_ID: ${{ steps.status_comment.outputs.comment_id }} - run: | - echo 'model = "claude-sonnet-4-5"' > perstack.toml - echo '[provider]' >> perstack.toml - echo 'providerName = "anthropic"' >> perstack.toml - npm install -g tsx - npx perstack run @perstack/github-issue-bot "Answer issue #$ISSUE_NUMBER" 2>&1 | tsx scripts/checkpoint-filter.ts diff --git a/examples/github-issue-bot/perstack.toml b/examples/github-issue-bot/perstack.toml deleted file mode 100644 index 069f4442..00000000 --- a/examples/github-issue-bot/perstack.toml +++ /dev/null @@ -1,51 +0,0 @@ -model = "claude-sonnet-4-5" - -[provider] -providerName = "anthropic" - -[experts."@perstack/github-issue-bot"] -version = "1.0.0" -description = "Answers GitHub issues by reading codebase" -instruction = """ -You are a GitHub issue support bot that answers questions by reading the actual codebase. - -## Important: Do NOT try to post comments - -Your job is ONLY to generate an answer. The system will automatically post your answer to GitHub. -Do NOT use `gh issue comment` or call GitHub APIs to post comments. - -## Getting the Question - -Use `gh` CLI to get issue information: - -1. First, get the latest comments to find what the user is asking: - `gh api repos/perstack-ai/perstack/issues/$ISSUE_NUMBER/comments --jq '.[-5:]'` - -2. Look for the LATEST comment that contains "@perstack-issue-bot" - that is the question you need to answer. - -3. If no comments with @perstack-issue-bot, the question is the issue body itself: - `gh issue view $ISSUE_NUMBER --repo perstack-ai/perstack` - -## Workflow - -1. Fetch the latest comments to find the question (look for @perstack-issue-bot mention) -2. Understand what the user is asking -3. Explore the codebase using `listDirectory` and `readTextFile` to find relevant code -4. Generate a helpful answer based on actual code (not guesses) -5. Call `attemptCompletion` with your answer - the system will post it automatically - -## Guidelines - -- Always read the actual code before answering -- Keep answers concise and actionable -- Include code snippets or file references when helpful -- If you can't find the answer, say so honestly -- Be friendly but professional -- DO NOT try to post to GitHub yourself -""" - -[experts."@perstack/github-issue-bot".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -requiredEnv = ["GH_TOKEN", "GITHUB_REPO", "ISSUE_NUMBER"] diff --git a/examples/gmail-assistant/.gitignore b/examples/gmail-assistant/.gitignore deleted file mode 100644 index 9cbd8c37..00000000 --- a/examples/gmail-assistant/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -drafts/ -.setup-step -perstack/ diff --git a/examples/gmail-assistant/README.md b/examples/gmail-assistant/README.md deleted file mode 100644 index 25ea5513..00000000 --- a/examples/gmail-assistant/README.md +++ /dev/null @@ -1,276 +0,0 @@ -# Gmail Assistant - -AI-powered email assistant that searches Gmail, explores context, and composes optimal replies. - -| | | -| ------------ | -------------------------------------------------------------------------------------------------------------------------- | -| **Purpose** | Reply to emails with context from inbox and local knowledge | -| **Expert** | `email-assistant` | -| **Skills** | [@gongrzhe/server-gmail-autoauth-mcp](https://www.npmjs.com/package/@gongrzhe/server-gmail-autoauth-mcp), `@perstack/base` | -| **Sandbox** | Local execution | -| **Trigger** | User query like "Reply to John's email about the project" | -| **Registry** | Not published (requires Gmail OAuth setup) | - -## Architecture - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ email-assistant (Coordinator) │ -│ ├── Receives user query: "Reply to John's email" │ -│ ├── Delegates to inbox-searcher to find the email │ -│ ├── Delegates to knowledge-finder to gather context │ -│ └── Delegates to email-composer to write the reply │ -└─────────────────────────────────────────────────────────────────┘ - │ - ┌─────────────────────┼─────────────────────┐ - ▼ ▼ ▼ -┌───────────────┐ ┌────────────────┐ ┌───────────────┐ -│ inbox-searcher│ │ knowledge-finder│ │ email-composer│ -│ │ │ │ │ │ -│ Gmail MCP │ │ @perstack/base │ │ Gmail MCP + │ -│ (search/read) │ │ (file ops) │ │ @perstack/base│ -└───────────────┘ └────────────────┘ └───────────────┘ -``` - -## Quick Start - -### Option A: Guided Setup (Recommended) - -Use the setup assistant to guide you through each step: - -```bash -cd my-workspace -cp /path/to/examples/gmail-assistant/perstack.toml . -export ANTHROPIC_API_KEY=your-key - -npx perstack run setup-assistant "Start setup" -``` - -The assistant will: -1. Tell you what to do in Google Cloud Console -2. Wait for you to complete it - -After completing the Google Cloud setup, run: -```bash -npx perstack run setup-assistant "Done" -``` - -The assistant will guide you through authentication. Repeat until complete. - ---- - -### Option B: Manual Setup - -#### 1. Google Cloud Setup - -1. Go to [Google Cloud Console](https://console.cloud.google.com/) -2. Create a new project or select an existing one -3. Enable the **Gmail API**: - - Go to **APIs & Services** → **Library** - - Search for "Gmail API" and enable it -4. Configure **OAuth consent screen**: - - Go to **APIs & Services** → **OAuth consent screen** - - Select **External** user type - - Fill in App name, User support email, Developer contact - - Add your email as a **test user** -5. Create **OAuth credentials**: - - Go to **APIs & Services** → **Credentials** - - Click **Create Credentials** → **OAuth client ID** - - Select **Desktop app** - - Download the JSON file -6. Place credentials: - ```bash - mkdir -p ~/.gmail-mcp - mv ~/Downloads/client_secret_*.json ~/.gmail-mcp/gcp-oauth.keys.json - ``` - -#### 2. Authenticate - -```bash -npx -y @gongrzhe/server-gmail-autoauth-mcp auth -``` - -A browser opens. Sign in with your Google account and authorize access. - -#### 3. Run the Assistant - -```bash -cd my-workspace -export ANTHROPIC_API_KEY=your-key -npx perstack run email-assistant "Reply to John's email about the Q4 report" 2>&1 | npx tsx filter.ts -``` - ---- - -## Example Queries - -| Query | What happens | -| ---------------------------------------------- | -------------------------------------------------------- | -| "Reply to John's email about the project" | Searches for emails from John mentioning "project" | -| "Answer the latest email from hr@company.com" | Finds the most recent email from HR | -| "Respond to the meeting invite from yesterday" | Searches for recent meeting-related emails | -| "会計士からのメールに返信したい" | Searches for emails from accountant (Japanese supported) | - -## Output - -The assistant creates a **Gmail draft** in the same thread as the original email: - -1. Filter shows clickable URL: `🔗 https://mail.google.com/mail/u/0/#inbox/{threadId}` -2. Click to open the draft directly in Gmail -3. Review, edit if needed, and send - -A local copy is also saved to `./drafts/` for reference. - ---- - -## Expert Details - -### email-assistant - -The coordinator Expert that orchestrates the workflow: - -1. Parses user query to understand intent -2. Delegates email search to `inbox-searcher` -3. Delegates knowledge gathering to `knowledge-finder` -4. Delegates reply composition to `email-composer` -5. Creates Gmail draft and local file backup - -### inbox-searcher - -Gmail search specialist: - -| Tool | Purpose | -| --------------- | ----------------------------- | -| `search_emails` | Find emails matching criteria | -| `get_email` | Retrieve full email content | -| `get_thread` | Get conversation context | - -Returns **Thread ID** and **Message ID** for proper reply threading. - -### knowledge-finder - -Local filesystem search for context enrichment. - -**Setup:** Place knowledge files in your workspace directory: - -``` -my-workspace/ -├── perstack.toml -├── context/ # Conversation history (auto-populated) -│ └── john_proposal.md -│ └── accountant_tax.md -├── notes/ # Meeting notes, project info -│ └── project-x.md -├── docs/ # Company docs, policies -│ └── company-info.md -├── contracts/ # PDFs, agreements -│ └── engagement.pdf -└── contacts/ # Contact info, org charts - └── vendors.txt -``` - -The `context/` directory is special — it stores conversation history automatically. -The assistant searches these files to add relevant context to your replies. - -| Tool | Purpose | -| --------------- | -------------------------------- | -| `listDirectory` | Explore workspace structure | -| `readTextFile` | Read text, markdown, PDF content | -| `getFileInfo` | Get file metadata | - -### email-composer - -Reply composition, Gmail draft creation, and context management: - -| Tool | Purpose | -| ----------------- | --------------------------------- | -| `draft_email` | Create Gmail draft in thread | -| `writeTextFile` | Save local backup | -| `editTextFile` | Append to context files | -| `createDirectory` | Create drafts/context directories | - -**Context Management:** After composing a reply, the assistant appends a summary to `context/{person}_{topic}.md`. This builds up conversation history over time, so future replies have full context. - ---- - -## Security Notes - -- OAuth credentials stored in `~/.gmail-mcp/` -- Never commit credentials to version control -- **Drafts only** - emails are not sent automatically -- Review drafts in Gmail before sending - -## Troubleshooting - -### OAuth Keys Not Found - -``` -Error: OAuth keys not found -``` - -Ensure `gcp-oauth.keys.json` exists in `~/.gmail-mcp/`. - -### Authentication Failed - -``` -Error: Token refresh failed -``` - -Re-authenticate: - -```bash -rm ~/.gmail-mcp/credentials.json -npx -y @gongrzhe/server-gmail-autoauth-mcp auth -``` - -### Port Already in Use - -``` -Error: listen EADDRINUSE: address already in use :::3000 -``` - -Stop the process using port 3000 (often Docker/dev servers), then retry. - -### No Emails Found - -- Check search query syntax -- Verify Gmail API is enabled -- Ensure your email is added as test user in OAuth consent screen - ---- - -## Files - -| File | Purpose | -| --------------- | ----------------------------------- | -| `perstack.toml` | Expert definitions | -| `filter.ts` | Output filter for readable progress | -| `README.md` | This documentation | - -## Output Filter - -The `filter.ts` script shows real-time agent activity: - -``` -[inbox-searcher] 📧 Searching: from:john@example.com -[inbox-searcher] 📨 Reading email... -[knowledge-finder] 📁 Listing: docs/ -[email-composer] 📝 Creating Gmail draft to john@example.com (thread: abc123) -[email-composer] ✅ Draft saved: r1234567890 -[email-composer] 🔗 https://mail.google.com/mail/u/0/#inbox/abc123 -[email-composer] ✅ Done -``` - -Click the URL to open the draft directly in Gmail. - -To see raw JSON output without filter: - -```bash -npx perstack run email-assistant "your query" -``` - -## References - -- [@gongrzhe/server-gmail-autoauth-mcp](https://www.npmjs.com/package/@gongrzhe/server-gmail-autoauth-mcp) - Gmail MCP server -- [Gmail API Documentation](https://developers.google.com/gmail/api) -- [Perstack Documentation](https://perstack.ai) diff --git a/examples/gmail-assistant/filter.ts b/examples/gmail-assistant/filter.ts deleted file mode 100644 index 01d0777a..00000000 --- a/examples/gmail-assistant/filter.ts +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env npx tsx -import * as readline from "node:readline" - -const pendingDrafts = new Map() - -function formatEvent(event: Record): string | null { - const type = event.type as string - const expertKey = event.expertKey as string - switch (type) { - case "callTool": { - const toolCall = event.toolCall as Record - const toolName = toolCall?.toolName as string - const args = toolCall?.args as Record - if (toolName === "think") { - const thought = args?.thought as string - if (thought) - return `[${expertKey}] 💭 ${thought.slice(0, 120)}${thought.length > 120 ? "..." : ""}` - } - if (toolName === "search_emails") { - const query = args?.query as string - if (query) return `[${expertKey}] 📧 Searching: ${query}` - } - if (toolName === "read_email" || toolName === "get_email") { - return `[${expertKey}] 📨 Reading email...` - } - if (toolName === "draft_email") { - const toolCallId = (event.toolCall as Record)?.id as string - const to = args?.to as string | string[] - const threadId = (args?.threadId ?? args?.thread_id) as string - const toStr = Array.isArray(to) ? to[0] : to - if (threadId && toolCallId) pendingDrafts.set(toolCallId, threadId) - let msg = `[${expertKey}] 📝 Creating Gmail draft${toStr ? ` to ${toStr}` : ""}` - if (threadId) msg += ` (thread: ${threadId})` - return msg - } - if (toolName === "send_email") { - const to = args?.to as string - return `[${expertKey}] 📤 Sending email${to ? ` to ${to}` : ""}...` - } - if (toolName === "readTextFile") { - const path = args?.path as string - if (path) return `[${expertKey}] 📖 Reading: ${path}` - } - if (toolName === "writeTextFile") { - const path = args?.path as string - if (path) return `[${expertKey}] ✏️ Writing: ${path}` - } - if (toolName === "editTextFile") { - const path = args?.path as string - if (path) return `[${expertKey}] ✏️ Editing: ${path}` - } - if (toolName === "attemptCompletion") { - return `[${expertKey}] ✨ Completing...` - } - if ( - toolName === "inbox-searcher" || - toolName === "knowledge-finder" || - toolName === "email-composer" - ) { - const query = args?.query as string - if (query) return `[${expertKey}] 🔀 Delegating to ${toolName}: ${query.slice(0, 80)}...` - } - return `[${expertKey}] 🔧 ${toolName}` - } - case "resolveToolResult": { - const toolResult = event.toolResult as Record - const toolName = toolResult?.toolName as string - const toolCallId = toolResult?.id as string - if (toolName === "draft_email") { - const result = toolResult?.result as Array> - const text = result?.[0]?.text as string - const threadId = pendingDrafts.get(toolCallId) - if (threadId) pendingDrafts.delete(toolCallId) - if (text) { - const match = text.match(/ID: (r[\d-]+)/) - if (match) { - console.log(`[${expertKey}] ✅ Draft saved: ${match[1]}`) - if (threadId) { - console.log(`[${expertKey}] 🔗 https://mail.google.com/mail/u/0/#inbox/${threadId}`) - } else { - console.log(`[${expertKey}] 🔗 https://mail.google.com/mail/u/0/#drafts`) - } - } - } - } - return null - } - case "completeRun": { - const text = (event as Record).text as string - if (text) { - console.log(`\n${"=".repeat(60)}`) - console.log("FINAL RESULT:") - console.log("=".repeat(60)) - console.log(text) - console.log("=".repeat(60)) - } - return `[${expertKey}] ✅ Done` - } - case "startRun": - return `[${expertKey}] 🚀 Starting...` - default: - return null - } -} - -const rl = readline.createInterface({ - input: process.stdin, - output: process.stdout, - terminal: false, -}) - -rl.on("line", (line) => { - try { - const event = JSON.parse(line) as Record - const formatted = formatEvent(event) - if (formatted) { - console.log(formatted) - } - } catch { - if (line.startsWith("[MCP]")) { - console.log(line) - } - } -}) diff --git a/examples/gmail-assistant/perstack.toml b/examples/gmail-assistant/perstack.toml deleted file mode 100644 index 3b9497ee..00000000 --- a/examples/gmail-assistant/perstack.toml +++ /dev/null @@ -1,380 +0,0 @@ -model = "claude-sonnet-4-5" - -[provider] -providerName = "anthropic" - -[experts."setup-assistant"] -version = "1.0.0" -description = "Guides users through Gmail MCP Server setup with state tracking and Q&A support" -instruction = """ -You are a friendly setup assistant for Gmail MCP Server (@gongrzhe/server-gmail-autoauth-mcp). -You help users set up Gmail integration AND answer their questions. - -## Handling User Messages - -Understand user intent from context: -- Question about setup → Answer it, then show current step -- Reporting completion → Advance to next step -- Starting fresh → Show current step status -- Confusion or frustration → Offer clarification and encouragement - -Use your judgment. Don't rely on keyword matching. - -## State Management - -Progress tracked in `.setup-step` file: -1. Read `.setup-step` (default "1" if not exists) -2. When user reports completion → write incremented step number - -## Setup Steps (3 steps) - -### Step 1: Google Cloud Setup + Credentials -Tell user to complete ALL of these in ONE session (A through E): - ---- -**Go to https://console.cloud.google.com/ and do the following:** - -**A. Create/Select Project** -- Click project dropdown (top left) → New Project → Create -- Or select an existing project - -**B. Enable Gmail API** -- Go to: APIs & Services → Library -- Search "Gmail API" → Click → Enable - -**C. Configure OAuth Consent Screen** (Do this BEFORE creating credentials!) -- Go to: APIs & Services → OAuth consent screen -- User Type: External → Create -- App name: anything (e.g., "Gmail Assistant") -- User support email: your email -- Developer contact: your email -- Save and Continue -- Scopes: Add these manually or skip for now -- Test users: Add your Gmail address -- Save - -**D. Create OAuth Credentials** -- Go to: APIs & Services → Credentials -- Create Credentials → OAuth client ID -- Application type: Desktop app -- Name: anything -- Create → **Download JSON** - -**E. Place the JSON file** -``` -mkdir -p ~/.gmail-mcp -mv ~/Downloads/client_secret_*.json ~/.gmail-mcp/gcp-oauth.keys.json -``` - -⚠️ **Step 1 is complete only after running the commands in E!** ---- - -### Step 2: First Authentication -Tell user to run this command: -``` -npx -y @gongrzhe/server-gmail-autoauth-mcp auth -``` -Then: -1. Browser opens automatically -2. Sign in with your Google account (the test user you added) -3. Click "Continue" (ignore "unverified app" warning) -4. Allow all permissions -5. See "Authentication successful" → Done! - -### Step 3: Complete! -Show: -``` -🎉 Setup complete! - -You can now use the email assistant: -npx perstack start email-assistant "Find email from john@example.com and write a reply" -``` - -## Common Questions - -Q: "What is OAuth consent screen?" -A: It's Google's way of asking users to approve app access. For personal use, configure as "External" with yourself as test user. - -Q: "Why External user type?" -A: Internal requires Google Workspace. External works for any Google account, just add yourself as test user. - -Q: "App is unverified warning?" -A: Normal for development. Click "Continue" to proceed. Only you (test user) can use it anyway. - -Q: "Where is the JSON file?" -A: After creating OAuth credentials, a download dialog appears. Check ~/Downloads/ for client_secret_*.json - -Q: "Authentication failed?" -A: Make sure you added your email as test user in OAuth consent screen. - -## Output Format - -``` -## Gmail MCP Server Setup - -⏳ Step 1: Google Cloud Setup ← Current -⬚ Step 2: First Authentication -⬚ Step 3: Complete! - -[Detailed instructions or answer to question] - ---- -After completing this step: `npx perstack start setup-assistant "Done"` -``` -""" - -[experts."setup-assistant".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -pick = [ - "getFileInfo", - "readTextFile", - "writeTextFile", - "think", - "attemptCompletion", -] - -[experts."email-assistant"] -version = "1.0.0" -description = "Assists with email replies by searching Gmail, exploring related emails, finding relevant knowledge, and composing optimal responses" -instruction = """ -You are an email assistant that helps users compose the best possible reply emails. - -## Workflow - -When the user asks to reply to an email (e.g., "Find email from John and write a reply"): - -1. **Find the target email** - - Delegate to `inbox-searcher` to find the email matching the user's description - - Get the email content, sender info, and thread context - -2. **Gather context** - - Delegate to `knowledge-finder` to search for relevant information in the workspace - - This includes project files, notes, previous correspondence, and any relevant documents - -3. **Compose the reply** - - Delegate to `email-composer` to create the optimal reply - - The composer will use all gathered context to write a professional, appropriate response - -4. **Present the result** - - Show the composed email to the user - - Save the draft to `./drafts/` directory for easy access - -## Guidelines - -- Always search for context before composing -- Consider the tone and formality of the original email -- Include relevant information from the knowledge base -- Be concise but thorough -""" -delegates = ["inbox-searcher", "knowledge-finder", "email-composer"] - -[experts."inbox-searcher"] -version = "1.0.0" -description = "Searches Gmail inbox to find specific emails and related thread messages" -instruction = """ -You are an inbox search specialist. Your job is to find emails in Gmail based on user descriptions. - -## Capabilities - -- Search by sender name or email address -- Search by subject keywords -- Search by date range -- Find related emails in the same thread -- Get full email content and metadata - -## Search Syntax - -Use Gmail's search operators: -- `from:john@example.com` - emails from a specific sender -- `to:mary@example.com` - emails sent to a specific recipient -- `subject:"meeting notes"` - emails with specific text in the subject -- `has:attachment` - emails with attachments -- `after:2024/01/01` - emails received after a date -- `before:2024/02/01` - emails received before a date -- `is:unread` - unread emails -- `label:work` - emails with a specific label - -Combine operators: `from:john@example.com after:2024/01/01 has:attachment` - -## Workflow - -1. Parse the search criteria from the query -2. Use `search_emails` to find matching emails -3. If multiple matches, identify the most relevant one -4. Use `get_email` to retrieve the full email content -5. Use `get_thread` if conversation context is needed -6. Return structured information including: - - Subject - - Sender (name and email) - - Recipients - - Date - - Full body content - - Thread messages (if any) - - Attachments (if any) - -## Output Format - -Provide a clear summary of the found email with all relevant details. -IMPORTANT: Always include these IDs for reply handling: -- **Thread ID**: Required for replies to appear in the same conversation -- **Message ID**: The specific email being replied to - -If no email is found, suggest alternative search terms. -""" - -[experts."inbox-searcher".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -pick = ["think", "attemptCompletion"] - -[experts."inbox-searcher".skills."gmail"] -type = "mcpStdioSkill" -description = "Gmail API access for searching and reading emails" -command = "npx" -args = ["-y", "@gongrzhe/server-gmail-autoauth-mcp"] -rule = "Use search_emails to find emails, get_email to read full content, get_thread for conversation context" - -[experts."knowledge-finder"] -version = "1.0.0" -description = "Searches the local filesystem for relevant knowledge, documents, and context" -instruction = """ -You are a knowledge search specialist. Your job is to find relevant information in the workspace. - -## Search Strategy - -1. Start with `listDirectory` to understand the workspace structure -2. Look for relevant directories (in priority order): - - `context/` - **MOST IMPORTANT**: Past conversation history organized by person/topic - - `docs/` or `documentation/` for project documentation - - `notes/` for personal notes and meeting records - - `projects/` for project-specific information - - `knowledge/` or `kb/` for knowledge base files -3. Use `readTextFile` to examine promising files -4. Search for keywords related to the email context - -## Context Directory Structure - -The `context/` directory contains conversation history files named by person and topic: -- `context/田中さん_見積もり.md` - Past exchanges with Tanaka-san about estimates -- `context/税理士_確定申告.md` - Tax accountant conversations about filing - -**Always check `context/` first** - it contains the most relevant past conversation history. - -## File Types to Consider - -- Markdown files (.md) - documentation and notes -- Text files (.txt) - general notes -- JSON/YAML files - structured data -- Code files - if technical context is needed - -## Output Format - -Provide a summary of relevant information found: -- File paths where information was found -- Key facts and context extracted -- Relevant quotes or sections -- Connections to the email topic -- **Past conversation history** from context files (if found) -""" - -[experts."knowledge-finder".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -pick = [ - "listDirectory", - "readTextFile", - "getFileInfo", - "think", - "attemptCompletion", -] - -[experts."email-composer"] -version = "1.0.0" -description = "Composes professional email replies based on context and gathered information" -instruction = """ -You are an expert email composer. Your job is to write the optimal reply email. - -## Inputs You Receive - -1. Original email content and metadata -2. Thread context (previous messages) -3. Relevant knowledge from the workspace - -## Composition Guidelines - -### Tone Matching -- Match the formality level of the original email -- Professional but not stiff for business emails -- Friendly but clear for casual exchanges - -### Structure -- Clear greeting appropriate to the relationship -- Direct response to the main points -- Additional relevant information from knowledge base -- Professional closing - -### Content -- Address all questions or requests in the original email -- Include specific details from the knowledge base when relevant -- Be concise but complete -- Avoid unnecessary pleasantries - -## Output - -1. **Save as Gmail Draft (REQUIRED)** - - Use `draft_email` to create a draft in Gmail - - ALWAYS include `thread_id` from inbox-searcher to keep the reply in the same conversation - - This allows the user to review and send from Gmail - -2. **Save context for future reference (REQUIRED)** - - Create/update a file in `context/` directory - - Filename: `{person}_{topic}.md` (e.g., `田中さん_見積もり.md`, `税理士_確定申告.md`) - - Use simple names the user would naturally search for - - **Append** to existing file if it exists, or create new one - - Format: - ``` - ## {date} - {brief subject} - - **From:** {sender} - **Summary:** {1-2 sentence summary of their message} - - **My Reply:** - {summary of what I replied} - - --- - ``` - - This builds up conversation history over time - -3. **Also save draft to local file** - - Save a copy to `drafts/` directory for reference - -Provide the complete email: -- Subject line (with Re: prefix if appropriate) -- Body text -- Thread ID used (for verification) -- Context file updated (path) -""" - -[experts."email-composer".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -pick = [ - "writeTextFile", - "readTextFile", - "editTextFile", - "createDirectory", - "think", - "attemptCompletion", -] - -[experts."email-composer".skills."gmail"] -type = "mcpStdioSkill" -description = "Gmail API access for creating drafts" -command = "npx" -args = ["-y", "@gongrzhe/server-gmail-autoauth-mcp"] -rule = "Use draft_email with thread_id to create a reply draft in the same conversation. Never send directly - always create draft first." diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 95001d0a..81c4d897 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -24,4 +24,5 @@ export * from "./schemas/usage.js" export * from "./utils/activity.js" export * from "./utils/env-filter.js" export * from "./utils/event-filter.js" +export * from "./utils/expert-type.js" export * from "./utils/zod-error.js" diff --git a/packages/core/src/schemas/expert.test.ts b/packages/core/src/schemas/expert.test.ts index e5c689e4..8b9f41ca 100644 --- a/packages/core/src/schemas/expert.test.ts +++ b/packages/core/src/schemas/expert.test.ts @@ -118,4 +118,39 @@ describe("@perstack/core: expertSchema", () => { }) expect(result.description).toBe("") }) + + it("accepts valid delegation (coordinator -> own scope delegate)", () => { + const result = expertSchema.parse({ + key: "game-producer", + name: "game-producer", + version: "1.0.0", + instruction: "Test instruction", + delegates: ["@game-producer/designer"], + }) + expect(result.delegates).toEqual(["@game-producer/designer"]) + }) + + it("rejects self-delegation", () => { + expect(() => + expertSchema.parse({ + key: "game-producer", + name: "game-producer", + version: "1.0.0", + instruction: "Test instruction", + delegates: ["game-producer"], + }), + ).toThrow(ZodError) + }) + + it("rejects out-of-scope delegation", () => { + expect(() => + expertSchema.parse({ + key: "game-producer", + name: "game-producer", + version: "1.0.0", + instruction: "Test instruction", + delegates: ["@other-coordinator/expert"], + }), + ).toThrow(ZodError) + }) }) diff --git a/packages/core/src/schemas/expert.ts b/packages/core/src/schemas/expert.ts index 1b96cd2a..b0b6b2f4 100644 --- a/packages/core/src/schemas/expert.ts +++ b/packages/core/src/schemas/expert.ts @@ -6,6 +6,7 @@ import { maxExpertNameLength, tagNameRegex, } from "../constants/constants.js" +import { validateAllDelegations } from "../utils/expert-type.js" import type { AnthropicProviderSkill, ProviderToolOptions } from "./provider-tools.js" import { anthropicProviderSkillSchema, providerToolOptionsSchema } from "./provider-tools.js" import type { RuntimeVersion } from "./runtime-version.js" @@ -49,7 +50,11 @@ type SkillWithoutName = | Omit | Omit -export const expertSchema = z.object({ +/** + * Base object schema for Expert. Use this for `.omit()` / `.pick()` operations. + * For parsing with delegation validation, use `expertSchema` instead. + */ +export const expertBaseSchema = z.object({ key: z.string().regex(expertKeyRegex).min(1), name: z.string().regex(expertNameRegex).min(1).max(maxExpertNameLength), version: z.string().regex(expertVersionRegex), @@ -103,3 +108,14 @@ export const expertSchema = z.object({ providerSkills: z.array(anthropicProviderSkillSchema).optional(), providerToolOptions: providerToolOptionsSchema, }) + +/** + * Expert schema with delegation rule validation. + * Rejects self-delegation, out-of-scope delegates, and delegate-to-own-coordinator. + */ +export const expertSchema = expertBaseSchema.superRefine((data, ctx) => { + const errors = validateAllDelegations(data.key, data.delegates) + for (const error of errors) { + ctx.addIssue({ code: z.ZodIssueCode.custom, message: error, path: ["delegates"] }) + } +}) diff --git a/packages/core/src/schemas/runtime.ts b/packages/core/src/schemas/runtime.ts index 58fc2dcc..62c7511d 100644 --- a/packages/core/src/schemas/runtime.ts +++ b/packages/core/src/schemas/runtime.ts @@ -11,7 +11,7 @@ import { PerstackError } from "../errors.js" import type { Checkpoint } from "./checkpoint.js" import { checkpointSchema } from "./checkpoint.js" import type { Expert } from "./expert.js" -import { expertSchema } from "./expert.js" +import { expertBaseSchema, expertSchema } from "./expert.js" import type { ExpertMessage, InstructionMessage, @@ -203,7 +203,7 @@ export const runParamsSchema = z.object({ .optional(), }), experts: z - .record(z.string().min(1).regex(expertKeyRegex), expertSchema.omit({ key: true })) + .record(z.string().min(1).regex(expertKeyRegex), expertBaseSchema.omit({ key: true })) .optional() .default({}) .transform((experts) => diff --git a/packages/core/src/utils/expert-type.test.ts b/packages/core/src/utils/expert-type.test.ts new file mode 100644 index 00000000..234bfb6f --- /dev/null +++ b/packages/core/src/utils/expert-type.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it } from "vitest" +import { + getExpertScope, + getExpertShortName, + getExpertType, + isCoordinatorExpert, + isDelegateExpert, + validateAllDelegations, + validateDelegation, +} from "./expert-type.js" + +describe("expert type classification", () => { + it("classifies coordinator experts", () => { + expect(getExpertType("game-producer")).toBe("coordinator") + expect(isCoordinatorExpert("game-producer")).toBe(true) + expect(isDelegateExpert("game-producer")).toBe(false) + }) + + it("classifies delegate experts", () => { + expect(getExpertType("@game-producer/designer")).toBe("delegate") + expect(isCoordinatorExpert("@game-producer/designer")).toBe(false) + expect(isDelegateExpert("@game-producer/designer")).toBe(true) + }) +}) + +describe("getExpertScope", () => { + it("returns coordinator name as its own scope", () => { + expect(getExpertScope("game-producer")).toBe("game-producer") + }) + + it("extracts scope from delegate name", () => { + expect(getExpertScope("@game-producer/designer")).toBe("game-producer") + }) +}) + +describe("getExpertShortName", () => { + it("returns coordinator name unchanged", () => { + expect(getExpertShortName("game-producer")).toBe("game-producer") + }) + + it("extracts short name from delegate name", () => { + expect(getExpertShortName("@game-producer/designer")).toBe("designer") + }) +}) + +describe("validateDelegation", () => { + describe("valid cases", () => { + it("coordinator -> own scope delegate", () => { + expect(validateDelegation("game-producer", "@game-producer/designer")).toBeNull() + }) + + it("coordinator -> other coordinator", () => { + expect(validateDelegation("game-producer", "other-coordinator")).toBeNull() + }) + + it("delegate -> sibling delegate", () => { + expect(validateDelegation("@game-producer/designer", "@game-producer/programmer")).toBeNull() + }) + + it("delegate -> other coordinator", () => { + expect(validateDelegation("@game-producer/designer", "other-coordinator")).toBeNull() + }) + }) + + describe("invalid cases", () => { + it("self-delegation (coordinator)", () => { + expect(validateDelegation("game-producer", "game-producer")).toBe( + 'Expert "game-producer" cannot delegate to itself', + ) + }) + + it("self-delegation (delegate)", () => { + expect(validateDelegation("@game-producer/designer", "@game-producer/designer")).toBe( + 'Expert "@game-producer/designer" cannot delegate to itself', + ) + }) + + it("coordinator -> out-of-scope delegate", () => { + expect(validateDelegation("game-producer", "@other-coordinator/expert")).toBe( + 'Expert "game-producer" cannot delegate to out-of-scope delegate "@other-coordinator/expert"', + ) + }) + + it("delegate -> own coordinator", () => { + expect(validateDelegation("@game-producer/designer", "game-producer")).toBe( + 'Delegate "@game-producer/designer" cannot delegate to its own coordinator "game-producer"', + ) + }) + + it("delegate -> out-of-scope delegate", () => { + expect(validateDelegation("@game-producer/designer", "@other/expert")).toBe( + 'Expert "@game-producer/designer" cannot delegate to out-of-scope delegate "@other/expert"', + ) + }) + }) +}) + +describe("validateAllDelegations", () => { + it("returns empty array for all valid delegations", () => { + expect( + validateAllDelegations("game-producer", [ + "@game-producer/designer", + "@game-producer/programmer", + "other-coordinator", + ]), + ).toEqual([]) + }) + + it("returns errors for invalid delegations", () => { + const errors = validateAllDelegations("game-producer", [ + "@game-producer/designer", + "game-producer", + "@other/expert", + ]) + expect(errors).toHaveLength(2) + expect(errors[0]).toContain("cannot delegate to itself") + expect(errors[1]).toContain("out-of-scope delegate") + }) +}) diff --git a/packages/core/src/utils/expert-type.ts b/packages/core/src/utils/expert-type.ts new file mode 100644 index 00000000..3de34291 --- /dev/null +++ b/packages/core/src/utils/expert-type.ts @@ -0,0 +1,87 @@ +export type ExpertType = "coordinator" | "delegate" + +export function getExpertType(expertName: string): ExpertType { + return expertName.startsWith("@") ? "delegate" : "coordinator" +} + +export function isCoordinatorExpert(expertName: string): boolean { + return getExpertType(expertName) === "coordinator" +} + +export function isDelegateExpert(expertName: string): boolean { + return getExpertType(expertName) === "delegate" +} + +/** + * Returns the scope of an expert. + * - Coordinator "game-producer" -> "game-producer" + * - Delegate "@game-producer/designer" -> "game-producer" + */ +export function getExpertScope(expertName: string): string { + if (isDelegateExpert(expertName)) { + const withoutAt = expertName.slice(1) + const slashIndex = withoutAt.indexOf("/") + return slashIndex === -1 ? withoutAt : withoutAt.slice(0, slashIndex) + } + return expertName +} + +/** + * Returns the short name of an expert. + * - Coordinator "game-producer" -> "game-producer" + * - Delegate "@game-producer/designer" -> "designer" + */ +export function getExpertShortName(expertName: string): string { + if (isDelegateExpert(expertName)) { + const slashIndex = expertName.indexOf("/") + return slashIndex === -1 ? expertName : expertName.slice(slashIndex + 1) + } + return expertName +} + +/** + * Validates whether a delegation from source to target is allowed. + * Returns null if valid, an error message string if invalid. + * + * Rules: + * - No self-delegation + * - If target is a delegate (@scope/name), source must be in the same scope + * - A delegate cannot delegate to its own coordinator + */ +export function validateDelegation(source: string, target: string): string | null { + if (source === target) { + return `Expert "${source}" cannot delegate to itself` + } + + const sourceScope = getExpertScope(source) + const targetIsDelegate = isDelegateExpert(target) + + if (targetIsDelegate) { + const targetScope = getExpertScope(target) + if (sourceScope !== targetScope) { + return `Expert "${source}" cannot delegate to out-of-scope delegate "${target}"` + } + } + + // A delegate cannot delegate to its own coordinator + if (isDelegateExpert(source) && isCoordinatorExpert(target) && target === sourceScope) { + return `Delegate "${source}" cannot delegate to its own coordinator "${target}"` + } + + return null +} + +/** + * Validates all delegations for an expert. + * Returns an array of error messages (empty if all valid). + */ +export function validateAllDelegations(expertName: string, delegates: string[]): string[] { + const errors: string[] = [] + for (const delegate of delegates) { + const error = validateDelegation(expertName, delegate) + if (error) { + errors.push(error) + } + } + return errors +} diff --git a/packages/runtime/src/messages/instruction-message.ts b/packages/runtime/src/messages/instruction-message.ts index f3062207..c05f266b 100644 --- a/packages/runtime/src/messages/instruction-message.ts +++ b/packages/runtime/src/messages/instruction-message.ts @@ -1,8 +1,8 @@ import { createId } from "@paralleldrive/cuid2" -import type { Expert, InstructionMessage } from "@perstack/core" +import { type Expert, type InstructionMessage, isCoordinatorExpert } from "@perstack/core" import { dedent } from "ts-dedent" -function getMetaInstruction(startedAt: number): string { +function getDelegateMetaInstruction(startedAt: number): string { return dedent` Call tools iteratively to complete the user's task. When the task is complete, call attemptCompletion with a result parameter containing your final response. @@ -14,11 +14,35 @@ function getMetaInstruction(startedAt: number): string { ` } +function getCoordinatorMetaInstruction(startedAt: number): string { + return dedent` + You are a Coordinator. Your role is to plan, split tasks, and delegate. Never execute concrete tasks yourself. + + Delegation best practices: + - Parallelism: When there are N independent tasks, delegate them simultaneously in a single response. + - Task splitting: Distribute N tasks across M Experts. Do not send all tasks to a single Expert. + - Specificity: Include context, constraints, and expected output format in every delegation. No vague delegations. + - Planning as delegation: If planning is needed, delegate to a planner Expert. If none exists, create one with createExpert, then addDelegate, then delegate. + - Creating Experts: If no suitable delegate exists, use createExpert to create a specialized Expert, then addDelegate, then delegate to it. + + When the task is complete, call attemptCompletion with a result parameter containing your final response. + When you cannot help, call attemptCompletion without a result. + + Environment: + - Current time: ${new Date(startedAt).toISOString()} + - Working directory: ${process.cwd()} + ` +} + export function createInstructionMessage(expert: Expert, startedAt: number): InstructionMessage { + const metaInstruction = isCoordinatorExpert(expert.name) + ? getCoordinatorMetaInstruction(startedAt) + : getDelegateMetaInstruction(startedAt) + const instruction = dedent` You are Perstack, an AI expert that tackles tasks requested by users by utilizing all available tools. - ${getMetaInstruction(startedAt)} + ${metaInstruction} --- diff --git a/packages/runtime/src/state-machine/states/calling-delegates.test.ts b/packages/runtime/src/state-machine/states/calling-delegates.test.ts index a46d6b06..d8e436d5 100644 --- a/packages/runtime/src/state-machine/states/calling-delegates.test.ts +++ b/packages/runtime/src/state-machine/states/calling-delegates.test.ts @@ -70,19 +70,19 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { pendingToolCalls: [ { id: "tc_123", - skillName: "@perstack/math-expert", - toolName: "@perstack/math-expert", + skillName: "@test-expert/math", + toolName: "@test-expert/math", args: { query: "Calculate 2 + 2" }, }, ], partialToolResults: [], }) const skillManager = createMockSkillManager({ - "@perstack/math-expert": createMockAdapter({ - name: "@perstack/math-expert", + "@test-expert/math": createMockAdapter({ + name: "@test-expert/math", type: "delegate", - expert: { key: "@perstack/math-expert", name: "@perstack/math-expert", version: "1.0.0" }, - tools: [{ name: "@perstack/math-expert", description: "Math calculations" }], + expert: { key: "@test-expert/math", name: "@test-expert/math", version: "1.0.0" }, + tools: [{ name: "@test-expert/math", description: "Math calculations" }], }), }) await expect( @@ -108,12 +108,12 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { delegateTo: [ { expert: { - key: "@perstack/math-expert", - name: "@perstack/math-expert", + key: "@test-expert/math", + name: "@test-expert/math", version: "1.0.0", }, toolCallId: "tc_123", - toolName: "@perstack/math-expert", + toolName: "@test-expert/math", query: "Calculate 2 + 2", }, ], @@ -170,19 +170,19 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { pendingToolCalls: [ { id: "tc_123", - skillName: "@perstack/math-expert", - toolName: "@perstack/math-expert", + skillName: "@test-expert/math", + toolName: "@test-expert/math", args: { query: "Calculate 2 + 2" }, }, ], partialToolResults: [], }) const skillManager = createMockSkillManager({ - "@perstack/math-expert": createMockAdapter({ - name: "@perstack/math-expert", + "@test-expert/math": createMockAdapter({ + name: "@test-expert/math", type: "delegate", expert: undefined, - tools: [{ name: "@perstack/math-expert", description: "Math calculations" }], + tools: [{ name: "@test-expert/math", description: "Math calculations" }], }), }) await expect( @@ -194,7 +194,7 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { skillManager, llmExecutor: mockLLMExecutor, }), - ).rejects.toThrow('Delegation error: skill manager "@perstack/math-expert" not found') + ).rejects.toThrow('Delegation error: skill manager "@test-expert/math" not found') }) it("throws error when query is undefined", async () => { @@ -204,19 +204,19 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { pendingToolCalls: [ { id: "tc_123", - skillName: "@perstack/math-expert", - toolName: "@perstack/math-expert", + skillName: "@test-expert/math", + toolName: "@test-expert/math", args: { query: undefined }, }, ], partialToolResults: [], }) const skillManager = createMockSkillManager({ - "@perstack/math-expert": createMockAdapter({ - name: "@perstack/math-expert", + "@test-expert/math": createMockAdapter({ + name: "@test-expert/math", type: "delegate", - expert: { key: "@perstack/math-expert", name: "@perstack/math-expert", version: "1.0.0" }, - tools: [{ name: "@perstack/math-expert", description: "Math calculations" }], + expert: { key: "@test-expert/math", name: "@test-expert/math", version: "1.0.0" }, + tools: [{ name: "@test-expert/math", description: "Math calculations" }], }), }) await expect( @@ -238,8 +238,8 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { pendingToolCalls: [ { id: "tc_1", - skillName: "@perstack/math-expert", - toolName: "@perstack/math-expert", + skillName: "@test-expert/math", + toolName: "@test-expert/math", args: { query: "Calculate 2 + 2" }, }, { @@ -252,11 +252,11 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { partialToolResults: [], }) const skillManager = createMockSkillManager({ - "@perstack/math-expert": createMockAdapter({ - name: "@perstack/math-expert", + "@test-expert/math": createMockAdapter({ + name: "@test-expert/math", type: "delegate", - expert: { key: "@perstack/math-expert", name: "@perstack/math-expert", version: "1.0.0" }, - tools: [{ name: "@perstack/math-expert", description: "Math calculations" }], + expert: { key: "@test-expert/math", name: "@test-expert/math", version: "1.0.0" }, + tools: [{ name: "@test-expert/math", description: "Math calculations" }], }), "@perstack/text-expert": createMockAdapter({ name: "@perstack/text-expert", @@ -278,12 +278,12 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { expect(result.checkpoint.delegateTo).toEqual([ { expert: { - key: "@perstack/math-expert", - name: "@perstack/math-expert", + key: "@test-expert/math", + name: "@test-expert/math", version: "1.0.0", }, toolCallId: "tc_1", - toolName: "@perstack/math-expert", + toolName: "@test-expert/math", query: "Calculate 2 + 2", }, { @@ -307,8 +307,8 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { pendingToolCalls: [ { id: "tc_1", - skillName: "@perstack/math-expert", - toolName: "@perstack/math-expert", + skillName: "@test-expert/math", + toolName: "@test-expert/math", args: { query: "Calculate 2 + 2" }, }, { @@ -328,11 +328,11 @@ describe("@perstack/runtime: StateMachineLogic['CallingDelegates']", () => { ], }) const skillManager = createMockSkillManager({ - "@perstack/math-expert": createMockAdapter({ - name: "@perstack/math-expert", + "@test-expert/math": createMockAdapter({ + name: "@test-expert/math", type: "delegate", - expert: { key: "@perstack/math-expert", name: "@perstack/math-expert", version: "1.0.0" }, - tools: [{ name: "@perstack/math-expert", description: "Math calculations" }], + expert: { key: "@test-expert/math", name: "@test-expert/math", version: "1.0.0" }, + tools: [{ name: "@test-expert/math", description: "Math calculations" }], }), "@perstack/base": createMockAdapter({ name: "@perstack/base", diff --git a/packages/runtime/test/run-params.ts b/packages/runtime/test/run-params.ts index 3caeaacf..26376803 100644 --- a/packages/runtime/test/run-params.ts +++ b/packages/runtime/test/run-params.ts @@ -38,7 +38,7 @@ export function createRunSetting(overrides: Partial = version: "1.0.0", instruction: "You can delegate tasks to other experts.", skills: {}, - delegates: ["@perstack/math-expert"], + delegates: ["@test-expert/math"], tags: [], }, }, diff --git a/packages/skill-manager/src/skill-manager.ts b/packages/skill-manager/src/skill-manager.ts index 7114fcc2..5aedeeae 100644 --- a/packages/skill-manager/src/skill-manager.ts +++ b/packages/skill-manager/src/skill-manager.ts @@ -5,6 +5,7 @@ import { type McpSseSkill, type McpStdioSkill, type ToolDefinition, + validateDelegation, } from "@perstack/core" import { InMemoryBaseSkillAdapter } from "./adapters/in-memory-base-adapter.js" import { LockfileSkillAdapter } from "./adapters/lockfile-adapter.js" @@ -246,6 +247,8 @@ export class SkillManager { }, removeSkill: (name) => sm.removeSkill(name), addDelegate: async (key) => { + const delegationError = validateDelegation(expert.name, key) + if (delegationError) throw new Error(delegationError) const delegateExpert = experts[key] if (!delegateExpert) throw new Error(`Expert "${key}" not found`) await sm.addDelegate(delegateExpert) From a18522cd63b6915a3b1eb7ba0df70a2d8d489e6c Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Thu, 19 Feb 2026 14:03:05 +0000 Subject: [PATCH 2/9] refactor: split create-expert into coordinator + planner/definition-writer delegates Replace the monolithic expert with a coordinator that orchestrates two focused delegates: @expert/planner (designs architectures) and @expert/definition-writer (writes and tests perstack.toml definitions). Each delegate embeds deep Perstack best practices in its instruction. Co-Authored-By: Claude Opus 4.6 --- apps/create-expert/perstack.toml | 289 +++++++++++++++++++------------ 1 file changed, 179 insertions(+), 110 deletions(-) diff --git a/apps/create-expert/perstack.toml b/apps/create-expert/perstack.toml index 00d50895..ae3e3db7 100644 --- a/apps/create-expert/perstack.toml +++ b/apps/create-expert/perstack.toml @@ -7,138 +7,207 @@ providerName = "anthropic" version = "1.0.0" description = "Creates and modifies Perstack expert definitions in perstack.toml" instruction = """ -You are an expert builder for Perstack. Your job is to create and modify expert definitions in perstack.toml files. +You are the coordinator for creating Perstack expert definitions. Your role is to orchestrate the planning and writing of expert definitions. -## perstack.toml Schema +## Workflow -A perstack.toml file defines experts and their configuration. Here is the complete schema: +1. Understand the user's request for an expert or set of experts +2. If a perstack.toml already exists in the workspace, read it and include its contents as context for your delegates +3. Delegate to @expert/planner with the user's request and any existing configuration context. The planner will return a structured design document +4. Delegate to @expert/definition-writer with the planner's design output. The definition-writer will write and test the perstack.toml +5. After the definition-writer completes, read the final perstack.toml to verify it was written correctly and matches the planner's design +6. Use attemptCompletion to report the result to the user + +## Guidelines + +- Always pass the full planner output to the definition-writer so it has complete design context +- If a delegate fails, read the error and decide whether to retry with adjusted input or report the issue +- You do not write files yourself — that is the definition-writer's responsibility +- If the user's request is ambiguous, ask for clarification before delegating +""" +delegates = ["@expert/planner", "@expert/definition-writer"] + +[experts."expert".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["readTextFile", "listDirectory", "think", "attemptCompletion"] + +[experts."@expert/planner"] +version = "1.0.0" +description = """ +Designs expert architectures for Perstack. Provide: (1) what the expert should do, (2) any existing perstack.toml content for context. \ +Returns a structured design document with expert names, descriptions, instruction summaries, skill requirements, delegation structure, and test scenarios. +""" +instruction = """ +You are an expert architect for Perstack. Your role is to design expert systems that follow Perstack best practices. You produce a structured design document — you do not write TOML or files. + +## Perstack Best Practices + +### 1. Do One Thing Well +Focused experts with clear boundaries, not monoliths. When something goes wrong in a monolith, you cannot tell which part failed. Focused experts are easier to debug, test, and improve independently. + +### 2. Trust the LLM, Define Domain Knowledge +Provide domain knowledge (policies, rules, constraints), not step-by-step procedures. The LLM knows how to reason and converse. What it does not know is your specific domain — that is what instructions should contain. + +### 3. Let Them Collaborate +Modular experts that delegate, not monoliths. The same focused expert works across different contexts. One person improves one expert while another builds a different one. Test each independently. Replace one without touching others. + +### 4. Keep It Verifiable +Instructions that anyone can read and predict behavior. If someone else cannot read your expert and predict its behavior, it is not verifiable. Include concrete rules, thresholds, and criteria rather than vague guidance. + +### 5. Ship Early +Start minimal, iterate based on real usage. Real users reveal actual edge cases. A complex initial design often solves the wrong problems. Ship, observe, iterate. + +## Perstack Expert Model + +- **description** = public interface. Seen by delegating experts as a tool description. Write it to help callers decide when to use this expert and what to include in the query. +- **instruction** = private domain knowledge. Define what the expert achieves, domain-specific rules/constraints, and completion criteria. NOT step-by-step procedures. +- **skills** = MCP tools (file ops, exec, custom MCP servers). Always include attemptCompletion. +- **delegates** = other experts this one can call. Naming convention: coordinator = plain-name, delegate = @coordinator/delegate-name. +- **Context isolation**: delegates receive only the query, no parent context. Data exchange happens via workspace files. +- **Parallel delegation**: multiple delegate calls in one response execute concurrently. + +## Available Skill Types + +- **mcpStdioSkill** — stdio MCP server (most common). Fields: command, args/packageName, pick/omit, requiredEnv, rule +- **mcpSseSkill** — SSE MCP server. Fields: endpoint +- **interactiveSkill** — pauses for user input. Fields: tools with inputJsonSchema + +## Available @perstack/base Tools + +- readTextFile, writeTextFile, editTextFile — file operations +- listDirectory, getFileInfo — directory and file metadata +- exec — run system commands +- think — complex reasoning scratchpad +- attemptCompletion — signal task completion (always include) +- createExpert, addDelegate, removeDelegate — expert lifecycle management + +## Design Process + +1. Read existing files in the workspace if relevant to understand context +2. Analyze whether the task needs one expert or a coordinator with delegates +3. For simple, focused tasks: design a single expert +4. For complex, multi-faceted tasks: design a coordinator with focused delegates +5. Consider what tools each expert needs (minimal set — only what the expert actually needs) +6. Think about testing: what query would exercise each expert's core function? + +## Design Deliverables + +Your completion result must include: + +1. **Expert names/keys** — kebab-case, following coordinator/delegate naming convention if multi-expert +2. **Description for each expert** — optimized for callers (or users, for top-level experts) +3. **Instruction summary for each expert** — what domain knowledge to include, what rules/constraints/policies to embed, completion criteria +4. **Skills required per expert** — which @perstack/base tools, any custom MCP servers +5. **Delegation structure** — who delegates to whom, with rationale +6. **Test scenario for each expert** — a concrete, realistic query that exercises the expert's core function +""" + +[experts."@expert/planner".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["readTextFile", "listDirectory", "think", "attemptCompletion"] + +[experts."@expert/definition-writer"] +version = "1.0.0" +description = """ +Writes and tests Perstack expert definitions in perstack.toml. Provide: a design document specifying expert names, descriptions, instruction content, skills, and delegation structure. \ +Returns confirmation after the perstack.toml has been written and verified through testing. +""" +instruction = """ +You are a Perstack definition writer. Your role is to write, test, and verify expert definitions in perstack.toml based on a design document you receive. + +## perstack.toml Schema Reference ```toml # Optional: default model for all experts model = "claude-sonnet-4-5" -# Optional: default provider configuration +# Optional: provider configuration [provider] providerName = "anthropic" # or "openai", "google", etc. +envPath = [".env"] -# Optional: paths to environment files -envPath = [".env", ".env.local"] - -# Optional: global settings -# maxSteps = 100 -# maxRetries = 5 -# timeout = 300000 - -# Expert definitions - each expert is a key under [experts] +# Expert definition [experts."expert-name"] version = "1.0.0" -description = "A brief description of what this expert does" +description = "Brief description of what this expert does" instruction = \"\"\" -Detailed instructions for the expert. This is the system prompt that guides the expert's behavior. +Domain knowledge and guidelines for the expert. \"\"\" -# Optional: delegate to other experts -# delegates = ["other-expert-name"] -# Optional: tags for categorization -# tags = ["tag1", "tag2"] +delegates = ["@expert-name/delegate"] # optional +tags = ["tag"] # optional -# Skills give experts access to tools via MCP servers -[experts."expert-name".skills."@perstack/base"] +# Skills — MCP tool access +[experts."expert-name".skills."skill-name"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" -# Optional: only include specific tools -pick = ["readTextFile", "writeTextFile", "listDirectory", "think", "attemptCompletion"] -# Optional: exclude specific tools (mutually exclusive with pick) -# omit = ["exec"] - -# Custom MCP skill example -# [experts."expert-name".skills."custom-mcp"] -# type = "mcpStdioSkill" -# description = "Description of this skill" -# command = "npx" -# args = ["-y", "some-mcp-server"] -# requiredEnv = ["API_KEY"] -# rule = "Instructions for using this skill" +pick = ["tool1", "tool2"] # optional, include specific tools +# omit = ["tool3"] # optional, mutually exclusive with pick +# requiredEnv = ["ENV_VAR"] # optional, required environment variables +# rule = "Usage instructions" # optional, guidance for using this skill ``` -## Your Workflow - -1. First, check if a `perstack.toml` already exists in the current directory using `readTextFile` -2. If it exists, read and understand the current configuration -3. Based on the user's request, draft the expert definition -4. Create the expert in memory using `createExpert` to validate the definition -5. Add it as a delegate using `addDelegate` so you can test it -6. **Practical test**: Call the delegate with a realistic query that matches what the user would actually ask (see Testing Guide below) -7. **Verify outputs**: After the delegate returns, verify the actual artifacts and process (see Testing Guide below) -8. If the test shows errors, missing artifacts, or quality issues: - - Use `removeDelegate` to remove the current delegate - - Modify the definition and call `createExpert` again with the same key - - Add it as a delegate again with `addDelegate` and re-test -9. Once the expert produces correct, complete outputs, write the final `perstack.toml` using `writeTextFile` -10. Use `attemptCompletion` when the expert is created and verified - -## Testing Guide - -You MUST perform practical, end-to-end testing before writing perstack.toml. The test must simulate the user's actual use case, not just check that the expert "runs without errors". - -### Step 1: Design a realistic test query - -Before calling the delegate, think about what the user will actually ask this expert to do. The test query should be a concrete, representative task — not a trivial or abstract one. - -- If the expert generates code: ask it to generate a small but complete, realistic piece (e.g., "Create a responsive landing page with a hero section, feature cards, and a contact form") -- If the expert writes documentation: ask it to document a specific scenario (e.g., "Write API documentation for a user authentication endpoint with examples") -- If the expert performs analysis: give it real-looking data to analyze -- If the expert manages a workflow with sub-experts: give it a task that exercises delegation to at least one sub-expert - -### Step 2: Verify the artifacts after delegation - -After the delegate returns its text result, you must verify what was actually produced. Do NOT just read the delegate's response text and assume success. - -**For experts that create files:** -1. Use `listDirectory` to confirm all expected files were created -2. Use `readTextFile` to read each generated file -3. Check that file contents are correct, complete, and well-structured -4. Verify no placeholder content (e.g., "TODO", "Lorem ipsum" where real content is expected) - -**For experts that modify existing files:** -1. Use `readTextFile` to read the modified files -2. Verify the changes are correct and the file is still valid -3. Check that unrelated parts of the file were not damaged - -**For experts that perform tasks (build, test, deploy, etc.):** -1. Use `exec` to run `perstack logs --last` to inspect the execution process -2. Verify the task steps were performed in the correct order -3. Check that the final state matches expectations (files created, commands run, etc.) - -**For experts with delegates (coordinator/lead experts):** -1. Use `exec` to run `perstack logs --last` to verify delegation occurred -2. Confirm that each sub-expert was called with appropriate queries -3. Verify the coordinator properly synthesized results from sub-experts - -### Step 3: Evaluate quality, not just correctness - -Ask yourself: "If I were the user, would I be satisfied with this output?" -- Is the output complete, or are parts missing? -- Is the quality appropriate for the task? -- Does the expert follow its instruction faithfully? -- Would the user need to manually fix or redo anything? - -If the answer to any of these is unsatisfactory, iterate: fix the instruction, recreate, and re-test. - -## Important Rules - -- Always produce valid TOML syntax -- Use triple-quoted strings (\"\"\" \"\"\") for multi-line instructions -- Expert keys should be kebab-case (e.g., "my-expert-name") -- Always include `version`, `description`, and `instruction` for each expert -- Always include at least `attemptCompletion` in the skills pick list -- Choose appropriate tools based on what the expert needs to do -- If the expert needs to read/write files, include file operation tools -- If the expert needs to run commands, include `exec` -- Include `think` for experts that need complex reasoning +## Instruction Writing Guidelines + +- Define domain knowledge, not step-by-step procedures +- Include: role identity, domain-specific rules/constraints/policies, completion criteria, priority tradeoffs +- Avoid: numbered step sequences, over-specified procedures, vague descriptions +- Write descriptions that tell callers what this expert does, when to use it, and what to include in the query + +## Skill Selection Guide + +- Always include attemptCompletion in pick list +- Include think for experts that need complex reasoning +- Include readTextFile, writeTextFile, listDirectory for file operations +- Include exec for system commands +- Include createExpert, addDelegate, removeDelegate only for experts that manage other experts +- Include editTextFile when targeted text replacement is needed +- Include getFileInfo when file metadata matters +- Prefer minimal tool sets — only include what the expert actually needs + +## TOML Syntax Rules + +- Use triple-quoted strings for multi-line instructions +- Expert keys: kebab-case (my-expert-name) +- Delegate keys: @coordinator/delegate-name +- Always include version, description, instruction for each expert +- Produce valid TOML — no syntax errors + +## Testing Workflow + +After writing the perstack.toml, you must test the experts: + +1. Create the expert in memory using createExpert +2. Add it as a delegate using addDelegate +3. Design a realistic test query that exercises the expert's core function +4. Call the delegate with the test query +5. Verify artifacts: read generated files, check for completeness and quality +6. If issues found: removeDelegate, modify the definition, createExpert again, re-test +7. For coordinator experts: verify delegation occurred and results were synthesized + +### Verification Checklist + +- All expected files were created/modified correctly +- No placeholder content (TODO, Lorem ipsum) where real content is expected +- Expert follows its instruction faithfully +- Output is complete and high quality +- TOML syntax is valid + +## Process + +1. Read the design document provided in the query +2. If a perstack.toml already exists, read it to understand current state +3. Write the perstack.toml with all expert definitions based on the design +4. Test each expert with a realistic query +5. Iterate on definitions if tests reveal issues +6. Use attemptCompletion when all experts are written and verified """ -[experts."expert".skills."@perstack/base"] +[experts."@expert/definition-writer".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" From 47ec10b1656cbdb51d49b60259ed90b4ddfe1486 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Thu, 19 Feb 2026 14:06:25 +0000 Subject: [PATCH 3/9] fix: rename expert to create-expert in create-expert app The coordinator expert key was "expert" instead of "create-expert", inconsistent with the app name. Rename all expert keys and delegate references accordingly. Co-Authored-By: Claude Opus 4.6 --- apps/create-expert/perstack.toml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/create-expert/perstack.toml b/apps/create-expert/perstack.toml index ae3e3db7..9486ad93 100644 --- a/apps/create-expert/perstack.toml +++ b/apps/create-expert/perstack.toml @@ -3,7 +3,7 @@ model = "claude-sonnet-4-5" [provider] providerName = "anthropic" -[experts."expert"] +[experts."create-expert"] version = "1.0.0" description = "Creates and modifies Perstack expert definitions in perstack.toml" instruction = """ @@ -13,8 +13,8 @@ You are the coordinator for creating Perstack expert definitions. Your role is t 1. Understand the user's request for an expert or set of experts 2. If a perstack.toml already exists in the workspace, read it and include its contents as context for your delegates -3. Delegate to @expert/planner with the user's request and any existing configuration context. The planner will return a structured design document -4. Delegate to @expert/definition-writer with the planner's design output. The definition-writer will write and test the perstack.toml +3. Delegate to @create-expert/planner with the user's request and any existing configuration context. The planner will return a structured design document +4. Delegate to @create-expert/definition-writer with the planner's design output. The definition-writer will write and test the perstack.toml 5. After the definition-writer completes, read the final perstack.toml to verify it was written correctly and matches the planner's design 6. Use attemptCompletion to report the result to the user @@ -25,15 +25,15 @@ You are the coordinator for creating Perstack expert definitions. Your role is t - You do not write files yourself — that is the definition-writer's responsibility - If the user's request is ambiguous, ask for clarification before delegating """ -delegates = ["@expert/planner", "@expert/definition-writer"] +delegates = ["@create-expert/planner", "@create-expert/definition-writer"] -[experts."expert".skills."@perstack/base"] +[experts."create-expert".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" pick = ["readTextFile", "listDirectory", "think", "attemptCompletion"] -[experts."@expert/planner"] +[experts."@create-expert/planner"] version = "1.0.0" description = """ Designs expert architectures for Perstack. Provide: (1) what the expert should do, (2) any existing perstack.toml content for context. \ @@ -104,13 +104,13 @@ Your completion result must include: 6. **Test scenario for each expert** — a concrete, realistic query that exercises the expert's core function """ -[experts."@expert/planner".skills."@perstack/base"] +[experts."@create-expert/planner".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" pick = ["readTextFile", "listDirectory", "think", "attemptCompletion"] -[experts."@expert/definition-writer"] +[experts."@create-expert/definition-writer"] version = "1.0.0" description = """ Writes and tests Perstack expert definitions in perstack.toml. Provide: a design document specifying expert names, descriptions, instruction content, skills, and delegation structure. \ @@ -207,7 +207,7 @@ After writing the perstack.toml, you must test the experts: 6. Use attemptCompletion when all experts are written and verified """ -[experts."@expert/definition-writer".skills."@perstack/base"] +[experts."@create-expert/definition-writer".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" From 6586802bb5c05b029bf41245a3dbdca1d1bfca54 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Thu, 19 Feb 2026 14:07:03 +0000 Subject: [PATCH 4/9] fix: update expert key in cli.ts to match renamed create-expert Co-Authored-By: Claude Opus 4.6 --- apps/create-expert/bin/cli.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/create-expert/bin/cli.ts b/apps/create-expert/bin/cli.ts index a37c26b7..1e8e61fe 100644 --- a/apps/create-expert/bin/cli.ts +++ b/apps/create-expert/bin/cli.ts @@ -66,13 +66,13 @@ new Command() console.error("Error: query argument is required in headless mode") process.exit(1) } - await runHandler("expert", query, options, { + await runHandler("create-expert", query, options, { perstackConfig: config, lockfile, additionalEnv, }) } else { - await startHandler("expert", query, options, { + await startHandler("create-expert", query, options, { perstackConfig: config, lockfile, additionalEnv, From f7a0c1174c9670c187de63d06191b76d86e89447 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Thu, 19 Feb 2026 14:15:35 +0000 Subject: [PATCH 5/9] refactor: thin coordinator with file-based delegation and dedicated tester - Coordinator now only routes file paths between delegates, no domain logic - Planner writes design to perstack/plan.md instead of returning as text - Definition-writer reads plan file and writes perstack.toml (no testing) - New @create-expert/expert-tester delegate handles all testing concerns - Coordinator orchestrates: planner -> definition-writer -> expert-tester Co-Authored-By: Claude Opus 4.6 --- apps/create-expert/perstack.toml | 142 ++++++++++++++++++++----------- 1 file changed, 90 insertions(+), 52 deletions(-) diff --git a/apps/create-expert/perstack.toml b/apps/create-expert/perstack.toml index 9486ad93..3baf9c56 100644 --- a/apps/create-expert/perstack.toml +++ b/apps/create-expert/perstack.toml @@ -7,40 +7,41 @@ providerName = "anthropic" version = "1.0.0" description = "Creates and modifies Perstack expert definitions in perstack.toml" instruction = """ -You are the coordinator for creating Perstack expert definitions. Your role is to orchestrate the planning and writing of expert definitions. +You are the coordinator for creating Perstack expert definitions. -## Workflow +Delegate to your specialists and pass file paths between them. Do not read or interpret the contents of intermediate files yourself. -1. Understand the user's request for an expert or set of experts -2. If a perstack.toml already exists in the workspace, read it and include its contents as context for your delegates -3. Delegate to @create-expert/planner with the user's request and any existing configuration context. The planner will return a structured design document -4. Delegate to @create-expert/definition-writer with the planner's design output. The definition-writer will write and test the perstack.toml -5. After the definition-writer completes, read the final perstack.toml to verify it was written correctly and matches the planner's design -6. Use attemptCompletion to report the result to the user +## Delegates -## Guidelines +- @create-expert/planner — designs expert architectures, writes plan to perstack/plan.md +- @create-expert/definition-writer — reads a plan file and writes perstack.toml +- @create-expert/expert-tester — tests a perstack.toml by exercising each expert -- Always pass the full planner output to the definition-writer so it has complete design context -- If a delegate fails, read the error and decide whether to retry with adjusted input or report the issue -- You do not write files yourself — that is the definition-writer's responsibility -- If the user's request is ambiguous, ask for clarification before delegating +## Coordination + +1. If a perstack.toml already exists in the workspace, note its path +2. Delegate to planner: pass the user's request and the perstack.toml path if one exists +3. Delegate to definition-writer: tell it to read perstack/plan.md and write perstack.toml +4. Delegate to expert-tester: tell it to test perstack.toml +5. If the tester reports issues, delegate back to definition-writer with the tester's feedback and the plan file path, then re-test +6. attemptCompletion with a summary of what was created """ -delegates = ["@create-expert/planner", "@create-expert/definition-writer"] +delegates = ["@create-expert/planner", "@create-expert/definition-writer", "@create-expert/expert-tester"] [experts."create-expert".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" -pick = ["readTextFile", "listDirectory", "think", "attemptCompletion"] +pick = ["listDirectory", "think", "attemptCompletion"] [experts."@create-expert/planner"] version = "1.0.0" description = """ -Designs expert architectures for Perstack. Provide: (1) what the expert should do, (2) any existing perstack.toml content for context. \ -Returns a structured design document with expert names, descriptions, instruction summaries, skill requirements, delegation structure, and test scenarios. +Designs expert architectures for Perstack. Provide: (1) what the expert should do, (2) path to existing perstack.toml if one exists. \ +Writes the design plan to perstack/plan.md. """ instruction = """ -You are an expert architect for Perstack. Your role is to design expert systems that follow Perstack best practices. You produce a structured design document — you do not write TOML or files. +You are an expert architect for Perstack. Design expert systems that follow best practices, then write your design to perstack/plan.md. ## Perstack Best Practices @@ -59,6 +60,9 @@ Instructions that anyone can read and predict behavior. If someone else cannot r ### 5. Ship Early Start minimal, iterate based on real usage. Real users reveal actual edge cases. A complex initial design often solves the wrong problems. Ship, observe, iterate. +### 6. Thin Coordinators +Coordinators should only route work between delegates, not contain domain logic. If a coordinator needs to understand or transform data, that logic belongs in a delegate. + ## Perstack Expert Model - **description** = public interface. Seen by delegating experts as a tool description. Write it to help callers decide when to use this expert and what to include in the query. @@ -85,39 +89,41 @@ Start minimal, iterate based on real usage. Real users reveal actual edge cases. ## Design Process -1. Read existing files in the workspace if relevant to understand context +1. If an existing perstack.toml path was provided, read it to understand current state 2. Analyze whether the task needs one expert or a coordinator with delegates 3. For simple, focused tasks: design a single expert 4. For complex, multi-faceted tasks: design a coordinator with focused delegates -5. Consider what tools each expert needs (minimal set — only what the expert actually needs) +5. Consider what tools each expert needs (minimal set) 6. Think about testing: what query would exercise each expert's core function? -## Design Deliverables +## Output -Your completion result must include: +Write your design to perstack/plan.md with the following sections: 1. **Expert names/keys** — kebab-case, following coordinator/delegate naming convention if multi-expert -2. **Description for each expert** — optimized for callers (or users, for top-level experts) -3. **Instruction summary for each expert** — what domain knowledge to include, what rules/constraints/policies to embed, completion criteria +2. **Description for each expert** — optimized for callers +3. **Instruction summary for each expert** — what domain knowledge to include, rules/constraints/policies, completion criteria 4. **Skills required per expert** — which @perstack/base tools, any custom MCP servers 5. **Delegation structure** — who delegates to whom, with rationale 6. **Test scenario for each expert** — a concrete, realistic query that exercises the expert's core function + +After writing the file, attemptCompletion with the file path. """ [experts."@create-expert/planner".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" -pick = ["readTextFile", "listDirectory", "think", "attemptCompletion"] +pick = ["readTextFile", "writeTextFile", "listDirectory", "think", "attemptCompletion"] [experts."@create-expert/definition-writer"] version = "1.0.0" description = """ -Writes and tests Perstack expert definitions in perstack.toml. Provide: a design document specifying expert names, descriptions, instruction content, skills, and delegation structure. \ -Returns confirmation after the perstack.toml has been written and verified through testing. +Writes Perstack expert definitions in perstack.toml from a design plan. Provide: path to the plan file (e.g. perstack/plan.md). \ +Optionally include feedback from a previous test round to address. """ instruction = """ -You are a Perstack definition writer. Your role is to write, test, and verify expert definitions in perstack.toml based on a design document you receive. +You are a Perstack definition writer. Read a design plan file and write the corresponding perstack.toml. ## perstack.toml Schema Reference @@ -177,43 +183,75 @@ pick = ["tool1", "tool2"] # optional, include specific tools - Always include version, description, instruction for each expert - Produce valid TOML — no syntax errors -## Testing Workflow +## Process -After writing the perstack.toml, you must test the experts: +1. Read the plan file specified in the query +2. If a perstack.toml already exists, read it to understand current state +3. Write the perstack.toml with all expert definitions based on the plan +4. If feedback from a previous test round was provided, address those issues +5. attemptCompletion when the perstack.toml has been written +""" -1. Create the expert in memory using createExpert -2. Add it as a delegate using addDelegate -3. Design a realistic test query that exercises the expert's core function -4. Call the delegate with the test query -5. Verify artifacts: read generated files, check for completeness and quality -6. If issues found: removeDelegate, modify the definition, createExpert again, re-test -7. For coordinator experts: verify delegation occurred and results were synthesized +[experts."@create-expert/definition-writer".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["readTextFile", "writeTextFile", "listDirectory", "think", "attemptCompletion"] -### Verification Checklist +[experts."@create-expert/expert-tester"] +version = "1.0.0" +description = """ +Tests Perstack expert definitions in a perstack.toml. Provide: path to the perstack.toml to test. \ +Creates each expert in memory, runs realistic test queries, and reports results. +""" +instruction = """ +You are a Perstack expert tester. Your job is to validate expert definitions by creating them in memory and running realistic test queries. -- All expected files were created/modified correctly -- No placeholder content (TODO, Lorem ipsum) where real content is expected -- Expert follows its instruction faithfully -- Output is complete and high quality -- TOML syntax is valid +## Testing Process -## Process +For each expert defined in the perstack.toml: -1. Read the design document provided in the query -2. If a perstack.toml already exists, read it to understand current state -3. Write the perstack.toml with all expert definitions based on the design -4. Test each expert with a realistic query -5. Iterate on definitions if tests reveal issues -6. Use attemptCompletion when all experts are written and verified +1. Read the perstack.toml to understand all expert definitions +2. Create the expert in memory using createExpert +3. Add it as a delegate using addDelegate +4. Design a realistic test query that exercises the expert's core function +5. Call the delegate with the test query +6. Verify the results + +## What to Test + +- **Single experts**: call with a realistic query, verify the output is complete and correct +- **Experts that write files**: verify files were created, read them, check contents +- **Coordinator experts**: verify delegation occurred and results were synthesized +- **Experts with exec**: verify commands were run correctly + +## Verification Criteria + +- Expert follows its instruction faithfully +- Output is complete — no placeholder content (TODO, Lorem ipsum) +- Files created are well-structured and correct +- TOML syntax in generated perstack.toml files is valid +- Delegation chains work end-to-end + +## Reporting + +If all experts pass: attemptCompletion with confirmation that all tests passed. + +If issues found: +1. removeDelegate and clean up the test expert +2. attemptCompletion with a detailed report of what failed and why, including: + - Which expert failed + - What the test query was + - What went wrong + - Suggested fix """ -[experts."@create-expert/definition-writer".skills."@perstack/base"] +[experts."@create-expert/expert-tester".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" pick = [ "readTextFile", - "writeTextFile", "listDirectory", "getFileInfo", "exec", From b99d9e562f027bf01e67a0d8c1f6b83ecc662f70 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Fri, 20 Feb 2026 02:46:13 +0000 Subject: [PATCH 6/9] temp: remove maxSteps feature Co-Authored-By: Claude Opus 4.6 --- SECURITY.md | 1 - .../src/tools/run-expert.ts | 6 -- apps/create-expert/bin/cli.ts | 4 -- apps/perstack/README.md | 1 - apps/perstack/bin/cli.ts | 8 --- docs/guides/adding-ai-to-your-app.md | 1 - docs/guides/going-to-production.md | 2 +- docs/references/cli.md | 12 ++-- docs/references/events.md | 1 - docs/references/perstack-toml.md | 3 - docs/understanding-perstack/runtime.md | 6 +- docs/using-experts/error-handling.md | 3 - docs/using-experts/running-experts.md | 3 +- docs/using-experts/state-management.md | 18 ----- e2e/README.md | 2 - packages/core/src/constants/constants.ts | 1 - packages/core/src/schemas/checkpoint.ts | 2 - packages/core/src/schemas/job.ts | 4 -- packages/core/src/schemas/perstack-toml.ts | 3 - packages/core/src/schemas/run-command.test.ts | 20 ------ packages/core/src/schemas/run-command.ts | 11 --- packages/core/src/schemas/runtime.test.ts | 1 - packages/core/src/schemas/runtime.ts | 13 ---- packages/core/src/utils/activity.test.ts | 15 +--- packages/filesystem/README.md | 2 +- packages/filesystem/src/job.ts | 3 +- packages/log/src/data-fetcher.test.ts | 1 - packages/runtime/README.md | 2 +- .../runtime/src/helpers/checkpoint.test.ts | 1 - .../runtime/src/helpers/setup-experts.test.ts | 1 - .../coordinator-executor.test.ts | 1 - .../src/orchestration/coordinator-executor.ts | 1 - .../orchestration/delegation-executor.test.ts | 1 - .../src/orchestration/delegation-executor.ts | 2 +- packages/runtime/src/run.test.ts | 25 +------ packages/runtime/src/run.ts | 13 +--- packages/runtime/src/state-machine/machine.ts | 10 --- .../states/finishing-step.test.ts | 68 +------------------ .../state-machine/states/finishing-step.ts | 16 +---- packages/runtime/test/run-params.ts | 1 - packages/tui-components/src/constants.ts | 1 - .../src/hooks/state/use-runtime-info.ts | 2 - packages/tui-components/src/types/base.ts | 2 - packages/tui/src/run-handler.ts | 1 - packages/tui/src/start-handler.ts | 4 -- 45 files changed, 19 insertions(+), 280 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 75a3917e..63337dc5 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -86,7 +86,6 @@ The `exec` tool blocks overriding critical variables (case-insensitive): - Run in isolated directories without sensitive data - Review tool calls in verbose mode -- Set `maxSteps` limits ### For Production diff --git a/apps/create-expert-skill/src/tools/run-expert.ts b/apps/create-expert-skill/src/tools/run-expert.ts index a0e20ce2..c4efd2dc 100644 --- a/apps/create-expert-skill/src/tools/run-expert.ts +++ b/apps/create-expert-skill/src/tools/run-expert.ts @@ -36,7 +36,6 @@ interface RunExpertInput { provider: string model?: string timeout: number - maxSteps?: number } interface RunExpertOutput { @@ -156,10 +155,6 @@ export async function runExpert(input: RunExpertInput): Promise args.push("--timeout", String(input.timeout)) } - if (input.maxSteps) { - args.push("--max-steps", String(input.maxSteps)) - } - args.push(input.expertKey, input.query) // Map PROVIDER_API_KEY to the provider-specific env var @@ -274,7 +269,6 @@ export function registerRunExpert(server: McpServer) { .optional() .default(120000) .describe("Timeout in milliseconds (default: 120000)"), - maxSteps: z.number().optional().describe("Maximum steps (optional)"), }, }, async (input: RunExpertInput) => { diff --git a/apps/create-expert/bin/cli.ts b/apps/create-expert/bin/cli.ts index a37c26b7..bfe830e0 100644 --- a/apps/create-expert/bin/cli.ts +++ b/apps/create-expert/bin/cli.ts @@ -26,10 +26,6 @@ new Command() "--reasoning-budget ", "Reasoning budget for native LLM reasoning (minimal, low, medium, high, or token count)", ) - .option( - "--max-steps ", - "Maximum number of steps to run, default is undefined (no limit)", - ) .option("--max-retries ", "Maximum number of generation retries, default is 5") .option( "--timeout ", diff --git a/apps/perstack/README.md b/apps/perstack/README.md index 638518ab..8788b0d9 100644 --- a/apps/perstack/README.md +++ b/apps/perstack/README.md @@ -43,7 +43,6 @@ perstack run | `--config ` | Path to `perstack.toml` | Auto-discover | | `--provider ` | LLM provider | `anthropic` | | `--model ` | Model name | `claude-sonnet-4-5` | -| `--max-steps ` | Maximum steps | unlimited | | `--max-retries ` | Max retry attempts | `5` | | `--timeout ` | Timeout per generation | `300000` | | `--reasoning-budget ` | Reasoning budget for native LLM reasoning | - | diff --git a/apps/perstack/bin/cli.ts b/apps/perstack/bin/cli.ts index 25377e20..53b6682d 100755 --- a/apps/perstack/bin/cli.ts +++ b/apps/perstack/bin/cli.ts @@ -37,10 +37,6 @@ program "--reasoning-budget ", "Reasoning budget for native LLM reasoning (minimal, low, medium, high, or token count)", ) - .option( - "--max-steps ", - "Maximum number of steps to run, default is undefined (no limit)", - ) .option("--max-retries ", "Maximum number of generation retries, default is 5") .option( "--timeout ", @@ -78,10 +74,6 @@ program "--reasoning-budget ", "Reasoning budget for native LLM reasoning (minimal, low, medium, high, or token count)", ) - .option( - "--max-steps ", - "Maximum number of steps to run, default is undefined (no limit)", - ) .option("--max-retries ", "Maximum number of generation retries, default is 5") .option( "--timeout ", diff --git a/docs/guides/adding-ai-to-your-app.md b/docs/guides/adding-ai-to-your-app.md index 9ef8f7e8..6c9197a3 100644 --- a/docs/guides/adding-ai-to-your-app.md +++ b/docs/guides/adding-ai-to-your-app.md @@ -66,7 +66,6 @@ When execution ends, the final checkpoint indicates why: | --------------------------- | --------------------------------- | --------------------------------------- | | `completed` | Expert finished successfully | Done, or continue with new query | | `stoppedByInteractiveTool` | Expert called an interactive tool | Your app handles the tool, then resumes | -| `stoppedByExceededMaxSteps` | Hit max steps limit | Continue or abort | | `stoppedByError` | Error occurred | Check logs, retry or abort | ### Continuing a run diff --git a/docs/guides/going-to-production.md b/docs/guides/going-to-production.md index 97936d73..051c218a 100644 --- a/docs/guides/going-to-production.md +++ b/docs/guides/going-to-production.md @@ -170,7 +170,7 @@ The agent operates freely within its sandbox. Your infrastructure controls what **Resource limits**: - [ ] Container memory limits set -- [ ] Execution time limits (via `--max-steps` or container timeout) +- [ ] Execution time limits (via container timeout) - [ ] Workspace size limits ## Scaling patterns diff --git a/docs/references/cli.md b/docs/references/cli.md index 08eab908..2b0ceb69 100644 --- a/docs/references/cli.md +++ b/docs/references/cli.md @@ -50,11 +50,10 @@ Providers: `anthropic`, `google`, `openai`, `deepseek`, `ollama`, `azure-openai` ### Execution Control -| Option | Description | Default | -| ------------------- | -------------------------------------------- | --------- | -| `--max-steps ` | Maximum total steps across all Runs in a Job | `100` | -| `--max-retries ` | Max retry attempts per generation | `5` | -| `--timeout ` | Timeout per generation (ms) | `300000` | +| Option | Description | Default | +| ------------------- | --------------------------------- | --------- | +| `--max-retries ` | Max retry attempts per generation | `5` | +| `--timeout ` | Timeout per generation (ms) | `300000` | ### Reasoning @@ -126,8 +125,7 @@ npx perstack run my-expert "Review this code" # With model options npx perstack run my-expert "query" \ --provider google \ - --model gemini-2.5-pro \ - --max-steps 100 + --model gemini-2.5-pro # Continue Job with follow-up npx perstack run my-expert "initial query" diff --git a/docs/references/events.md b/docs/references/events.md index f5ef9890..89832177 100644 --- a/docs/references/events.md +++ b/docs/references/events.md @@ -96,7 +96,6 @@ interface BaseEvent { | --------------------------- | ------------------------------ | ----------------------------- | | `stopRunByInteractiveTool` | Stopped for user input | `checkpoint`, `step` | | `stopRunByDelegate` | Stopped for delegation | `checkpoint`, `step` | -| `stopRunByExceededMaxSteps` | Stopped due to max steps limit | `checkpoint`, `step` | | `stopRunByError` | Stopped due to error | `checkpoint`, `step`, `error` | ### StreamingEvent Types diff --git a/docs/references/perstack-toml.md b/docs/references/perstack-toml.md index cd5ea529..01dddfd9 100644 --- a/docs/references/perstack-toml.md +++ b/docs/references/perstack-toml.md @@ -15,7 +15,6 @@ sidebar: # Runtime configuration model = "claude-sonnet-4-5" reasoningBudget = "medium" -maxSteps = 50 maxRetries = 3 timeout = 60000 envPath = [".env", ".env.local"] @@ -90,7 +89,6 @@ Top-level settings that apply to all Experts in the file. ```toml model = "claude-sonnet-4-5" reasoningBudget = "medium" -maxSteps = 100 maxRetries = 10 timeout = 60000 @@ -105,7 +103,6 @@ headers = { "X-Custom-Header" = "value" } | ----------------- | ---------------- | -------------------------------------------------------------------------------- | | `model` | string | Model name | | `reasoningBudget` | string or number | Native LLM reasoning budget (`minimal`, `low`, `medium`, `high`, or token count) | -| `maxSteps` | number | Maximum steps per run | | `maxRetries` | number | Maximum retry attempts | | `timeout` | number | Timeout per generation (ms) | | `envPath` | string[] | Paths to environment files | diff --git a/docs/understanding-perstack/runtime.md b/docs/understanding-perstack/runtime.md index 3094e46a..07eded01 100644 --- a/docs/understanding-perstack/runtime.md +++ b/docs/understanding-perstack/runtime.md @@ -59,7 +59,6 @@ Each Run executes through an agent loop: The loop ends when: - LLM calls `attemptCompletion` with all todos complete (or no todos) -- Job reaches `maxSteps` limit - External signal (SIGTERM/SIGINT) When `attemptCompletion` is called, the runtime checks the todo list. If incomplete todos remain, they are returned to the LLM to continue work. This prevents premature completion and ensures all planned tasks are addressed. @@ -77,19 +76,16 @@ Job (totalSteps = 8) └── Run 1 continues: step 5 → 6 → 7 → 8 ``` -The `maxSteps` limit applies to the Job's total steps across all Runs. - ### Stopping and resuming ```bash -npx perstack run my-expert "query" --max-steps 50 +npx perstack run my-expert "query" ``` | Stop condition | Behavior | Resume from | | ---------------------------------------- | -------------- | --------------------------------- | | `attemptCompletion` (no remaining todos) | Task complete | N/A | | `attemptCompletion` (remaining todos) | Continue loop | N/A (loop continues) | -| `maxSteps` reached | Graceful stop | Coordinator's last checkpoint | | SIGTERM/SIGINT | Immediate stop | Coordinator's previous checkpoint | > [!WARNING] diff --git a/docs/using-experts/error-handling.md b/docs/using-experts/error-handling.md index ecca741c..fa7d1ba7 100644 --- a/docs/using-experts/error-handling.md +++ b/docs/using-experts/error-handling.md @@ -23,13 +23,10 @@ A Run ends with one of these checkpoint statuses: | Status | Meaning | | --------------------------- | ------------------------------------------------------------------ | | `completed` | LLM called `attemptCompletion` with no remaining todos — task done | -| `stoppedByExceededMaxSteps` | Job's `maxSteps` limit reached | | `stoppedByInteractiveTool` | Waiting for user input (Coordinator only) | | `stoppedByDelegate` | Waiting for delegate Expert | | `stoppedByError` | Unrecoverable error | -When a Run stops with `stoppedByExceededMaxSteps`, you can resume from the last checkpoint. See [State Management](./state-management.md). - ## Delegation errors When a Delegated Expert fails, the Job continues — the error is returned to the Coordinator, which decides how to handle it. See [Delegation failure handling](../understanding-perstack/experts.md#delegation-failure-handling) for details. diff --git a/docs/using-experts/running-experts.md b/docs/using-experts/running-experts.md index 34e2c550..6948cc7e 100644 --- a/docs/using-experts/running-experts.md +++ b/docs/using-experts/running-experts.md @@ -56,8 +56,7 @@ npx perstack start @org/my-expert@1.0.0 "query" ```bash npx perstack start my-expert "query" \ --provider anthropic \ - --model claude-sonnet-4-5 \ - --max-steps 50 + --model claude-sonnet-4-5 ``` For the complete list of options, see [CLI Reference](../references/cli.md). diff --git a/docs/using-experts/state-management.md b/docs/using-experts/state-management.md index 57721df2..0bfd727e 100644 --- a/docs/using-experts/state-management.md +++ b/docs/using-experts/state-management.md @@ -111,24 +111,6 @@ The `-i` flag (or `--interactive-tool-call-result`) treats the query as the tool **Note:** Interactive tools are only available to the Coordinator Expert. See [Why no interactive tools for delegates?](../understanding-perstack/experts.md#why-no-interactive-tools-for-delegates) -## Max steps - -The `--max-steps` option limits total steps across all Runs in a Job: - -```bash -npx perstack run my-expert "query" --max-steps 50 -``` - -``` -Job (maxSteps = 50) - ├── Run 1: 10 steps - ├── Run 2: 5 steps - └── Run 3: 8 steps - Total: 23 steps (can continue until 50) -``` - -When total steps reach the limit, the Job stops gracefully. - ## Events Every state change is recorded as an event in `perstack/jobs/{jobId}/runs/{runId}/`. diff --git a/e2e/README.md b/e2e/README.md index 95783d9a..f6798931 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -144,7 +144,6 @@ e2e/ | ------------------------------------ | ------------------------- | | `should accept --provider option` | Verify provider option | | `should accept --model option` | Verify model option | -| `should accept --max-steps option` | Verify max-steps option | | `should accept --max-retries option` | Verify max-retries option | | `should accept --timeout option` | Verify timeout option | | `should accept --job-id option` | Verify job-id option | @@ -156,7 +155,6 @@ e2e/ | Test | Purpose | | ------------------------------------------------------------ | ---------------------------- | -| `should accept --max-steps option and complete within limit` | Verify max-steps enforcement | | `should accept --max-retries option` | Verify max-retries option | #### Skills (`skills.test.ts`) diff --git a/packages/core/src/constants/constants.ts b/packages/core/src/constants/constants.ts index e81d985f..3023d33e 100644 --- a/packages/core/src/constants/constants.ts +++ b/packages/core/src/constants/constants.ts @@ -8,7 +8,6 @@ export const expertVersionRegex = /^(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)(?:-[\w.-]+)?(?:\+[\w.-]+)?$/ export const tagNameRegex = /^[a-z0-9][a-z0-9_-]*$/ export const maxExpertNameLength = 255 -export const defaultMaxSteps = 100 export const defaultMaxRetries = 5 export const defaultTimeout = 5 * 1000 * 60 diff --git a/packages/core/src/schemas/checkpoint.ts b/packages/core/src/schemas/checkpoint.ts index b63f7b3e..06527d91 100644 --- a/packages/core/src/schemas/checkpoint.ts +++ b/packages/core/src/schemas/checkpoint.ts @@ -15,7 +15,6 @@ export type CheckpointStatus = | "completed" | "stoppedByInteractiveTool" | "stoppedByDelegate" - | "stoppedByExceededMaxSteps" | "stoppedByError" | "stoppedByCancellation" @@ -25,7 +24,6 @@ export const checkpointStatusSchema = z.enum([ "completed", "stoppedByInteractiveTool", "stoppedByDelegate", - "stoppedByExceededMaxSteps", "stoppedByError", "stoppedByCancellation", ]) diff --git a/packages/core/src/schemas/job.ts b/packages/core/src/schemas/job.ts index 54b9d183..d718cab4 100644 --- a/packages/core/src/schemas/job.ts +++ b/packages/core/src/schemas/job.ts @@ -7,7 +7,6 @@ import { usageSchema } from "./usage.js" export type JobStatus = | "running" | "completed" - | "stoppedByMaxSteps" | "stoppedByInteractiveTool" | "stoppedByError" | "stoppedByCancellation" @@ -15,7 +14,6 @@ export type JobStatus = export const jobStatusSchema = z.enum([ "running", "completed", - "stoppedByMaxSteps", "stoppedByInteractiveTool", "stoppedByError", "stoppedByCancellation", @@ -27,7 +25,6 @@ export interface Job { coordinatorExpertKey: string runtimeVersion: RuntimeVersion totalSteps: number - maxSteps?: number usage: Usage startedAt: number finishedAt?: number @@ -39,7 +36,6 @@ export const jobSchema = z.object({ coordinatorExpertKey: z.string(), runtimeVersion: runtimeVersionSchema, totalSteps: z.number(), - maxSteps: z.number().optional(), usage: usageSchema, startedAt: z.number(), finishedAt: z.number().optional(), diff --git a/packages/core/src/schemas/perstack-toml.ts b/packages/core/src/schemas/perstack-toml.ts index 0d3ff692..82b449a3 100644 --- a/packages/core/src/schemas/perstack-toml.ts +++ b/packages/core/src/schemas/perstack-toml.ts @@ -217,8 +217,6 @@ export interface PerstackConfig { model?: string /** Reasoning budget for native LLM reasoning (extended thinking) */ reasoningBudget?: ReasoningBudget - /** Maximum steps per run */ - maxSteps?: number /** Maximum retries on generation failure */ maxRetries?: number /** Timeout per generation in milliseconds */ @@ -237,7 +235,6 @@ export const perstackConfigSchema = z.object({ provider: providerTableSchema.optional(), model: z.string().optional(), reasoningBudget: reasoningBudgetSchema.optional(), - maxSteps: z.number().optional(), maxRetries: z.number().optional(), timeout: z.number().optional(), experts: z diff --git a/packages/core/src/schemas/run-command.test.ts b/packages/core/src/schemas/run-command.test.ts index 50459d9e..899f2b56 100644 --- a/packages/core/src/schemas/run-command.test.ts +++ b/packages/core/src/schemas/run-command.test.ts @@ -12,24 +12,6 @@ describe("@perstack/core: runCommandInputSchema", () => { expect(result.query).toBe("Hello world") }) - it("transforms maxSteps string to number", () => { - const result = runCommandInputSchema.parse({ - expertKey: "test-expert", - query: "test", - options: { maxSteps: "10" }, - }) - expect(result.options.maxSteps).toBe(10) - }) - - it("returns undefined for invalid maxSteps", () => { - const result = runCommandInputSchema.parse({ - expertKey: "test-expert", - query: "test", - options: { maxSteps: "not-a-number" }, - }) - expect(result.options.maxSteps).toBeUndefined() - }) - it("transforms maxRetries string to number", () => { const result = runCommandInputSchema.parse({ expertKey: "test-expert", @@ -71,12 +53,10 @@ describe("@perstack/core: runCommandInputSchema", () => { expertKey: "test-expert", query: "test", options: { - maxSteps: undefined, maxRetries: undefined, timeout: undefined, }, }) - expect(result.options.maxSteps).toBeUndefined() expect(result.options.maxRetries).toBeUndefined() expect(result.options.timeout).toBeUndefined() }) diff --git a/packages/core/src/schemas/run-command.ts b/packages/core/src/schemas/run-command.ts index 4f5b7a0e..2276ee7a 100644 --- a/packages/core/src/schemas/run-command.ts +++ b/packages/core/src/schemas/run-command.ts @@ -14,8 +14,6 @@ export interface CommandOptions { model?: string /** Reasoning budget for native LLM reasoning (extended thinking) */ reasoningBudget?: ReasoningBudget - /** Maximum steps */ - maxSteps?: number /** Maximum retries */ maxRetries?: number /** Timeout in milliseconds */ @@ -59,15 +57,6 @@ const commandOptionsSchema = z.object({ return parsedValue }) .pipe(reasoningBudgetSchema.optional()), - maxSteps: z - .string() - .optional() - .transform((value) => { - if (value === undefined) return undefined - const parsedValue = Number.parseInt(value, 10) - if (Number.isNaN(parsedValue)) return undefined - return parsedValue - }), maxRetries: z .string() .optional() diff --git a/packages/core/src/schemas/runtime.test.ts b/packages/core/src/schemas/runtime.test.ts index c6ebda00..7fbfd681 100644 --- a/packages/core/src/schemas/runtime.test.ts +++ b/packages/core/src/schemas/runtime.test.ts @@ -49,7 +49,6 @@ describe("@perstack/core: createEvent", () => { input: { text: "test" }, experts: {}, reasoningBudget: "low" as const, - maxSteps: 100, maxRetries: 3, timeout: 30000, startedAt: Date.now(), diff --git a/packages/core/src/schemas/runtime.ts b/packages/core/src/schemas/runtime.ts index 58fc2dcc..b1d53271 100644 --- a/packages/core/src/schemas/runtime.ts +++ b/packages/core/src/schemas/runtime.ts @@ -2,7 +2,6 @@ import { createId } from "@paralleldrive/cuid2" import { z } from "zod" import { defaultMaxRetries, - defaultMaxSteps, defaultPerstackApiBaseUrl, defaultTimeout, expertKeyRegex, @@ -77,8 +76,6 @@ export interface RunSetting { experts: Record /** Reasoning budget for native LLM reasoning (extended thinking). Defaults to "low". Use "none" or 0 to disable. */ reasoningBudget: ReasoningBudget - /** Maximum steps before stopping (applies to Job's totalSteps) */ - maxSteps: number /** Maximum retries on generation failure */ maxRetries: number /** Timeout per generation in milliseconds */ @@ -131,7 +128,6 @@ export type RunParamsInput = { input: RunInput experts?: Record reasoningBudget?: ReasoningBudget - maxSteps?: number maxRetries?: number timeout?: number startedAt?: number @@ -165,7 +161,6 @@ export const runSettingSchema = z.object({ }), experts: z.record(z.string(), expertSchema), reasoningBudget: reasoningBudgetSchema.default(defaultReasoningBudget), - maxSteps: z.number().min(1).optional().default(defaultMaxSteps), maxRetries: z.number().min(0), timeout: z.number().min(0), startedAt: z.number(), @@ -218,7 +213,6 @@ export const runParamsSchema = z.object({ ), ), reasoningBudget: reasoningBudgetSchema.optional().default(defaultReasoningBudget), - maxSteps: z.number().min(1).optional().default(defaultMaxSteps), maxRetries: z.number().min(0).optional().default(defaultMaxRetries), timeout: z.number().min(0).optional().default(defaultTimeout), startedAt: z.number().optional().default(Date.now()), @@ -296,10 +290,6 @@ type ExpertStatePayloads = { checkpoint: Checkpoint step: Step } - stopRunByExceededMaxSteps: { - checkpoint: Checkpoint - step: Step - } stopRunByError: { checkpoint: Checkpoint step: Step @@ -445,7 +435,6 @@ export const resumeToolCalls = createEvent("resumeToolCalls") export const completeRun = createEvent("completeRun") export const stopRunByInteractiveTool = createEvent("stopRunByInteractiveTool") export const stopRunByDelegate = createEvent("stopRunByDelegate") -export const stopRunByExceededMaxSteps = createEvent("stopRunByExceededMaxSteps") export const stopRunByError = createEvent("stopRunByError") export const continueToNextStep = createEvent("continueToNextStep") @@ -468,7 +457,6 @@ type RuntimeEventPayloads = { expertName: string experts: string[] model: string - maxSteps?: number maxRetries: number timeout: number query?: string @@ -553,7 +541,6 @@ const EXPERT_STATE_EVENT_TYPES = new Set([ "continueToNextStep", "stopRunByInteractiveTool", "stopRunByDelegate", - "stopRunByExceededMaxSteps", "stopRunByError", "completeRun", ]) diff --git a/packages/core/src/utils/activity.test.ts b/packages/core/src/utils/activity.test.ts index c1d9b682..8629db50 100644 --- a/packages/core/src/utils/activity.test.ts +++ b/packages/core/src/utils/activity.test.ts @@ -319,19 +319,6 @@ describe("getActivities", () => { expect(activities).toEqual([]) }) - it("still returns RetryActivity for stoppedByExceededMaxSteps with no tool calls", () => { - const checkpoint = createBaseCheckpoint({ - status: "stoppedByExceededMaxSteps", - stepNumber: 2, - }) - const step = createBaseStep({ stepNumber: 2, toolCalls: [], toolResults: [] }) - - const activities = getActivities({ checkpoint, step }) - - expect(activities).toHaveLength(1) - expect(activities[0].type).toBe("retry") - }) - it("still returns RetryActivity for stoppedByCancellation with no tool calls", () => { const checkpoint = createBaseCheckpoint({ status: "stoppedByCancellation", @@ -348,7 +335,7 @@ describe("getActivities", () => { describe("retry activity", () => { it("returns retry activity when no tool call or result", () => { - const checkpoint = createBaseCheckpoint({ status: "stoppedByExceededMaxSteps" }) + const checkpoint = createBaseCheckpoint({ status: "stoppedByCancellation" }) const step = createBaseStep({ newMessages: [ { diff --git a/packages/filesystem/README.md b/packages/filesystem/README.md index 361af66c..ab6c1d42 100644 --- a/packages/filesystem/README.md +++ b/packages/filesystem/README.md @@ -47,7 +47,7 @@ const jobs = getAllJobs() | Function | Description | | --- | --- | -| `createInitialJob(jobId, expertKey, maxSteps?)` | Create initial job object | +| `createInitialJob(jobId, expertKey)` | Create initial job object | | `storeJob(job)` | Store job to filesystem | | `retrieveJob(jobId)` | Retrieve job by ID | | `getAllJobs()` | Get all jobs sorted by start time | diff --git a/packages/filesystem/src/job.ts b/packages/filesystem/src/job.ts index 6f9f21f0..5fe6d7d0 100644 --- a/packages/filesystem/src/job.ts +++ b/packages/filesystem/src/job.ts @@ -63,14 +63,13 @@ export function getAllJobs(): Job[] { return jobs.sort((a, b) => b.startedAt - a.startedAt) } -export function createInitialJob(jobId: string, expertKey: string, maxSteps?: number): Job { +export function createInitialJob(jobId: string, expertKey: string): Job { return { id: jobId, status: "running", coordinatorExpertKey: expertKey, runtimeVersion: "v1.0", totalSteps: 0, - maxSteps, usage: { inputTokens: 0, outputTokens: 0, diff --git a/packages/log/src/data-fetcher.test.ts b/packages/log/src/data-fetcher.test.ts index a04446b4..8b19f754 100644 --- a/packages/log/src/data-fetcher.test.ts +++ b/packages/log/src/data-fetcher.test.ts @@ -72,7 +72,6 @@ const mockRunSetting: RunSetting = { input: { text: "test query" }, experts: {}, reasoningBudget: "low", - maxSteps: 100, maxRetries: 5, timeout: 60000, startedAt: 1700000000000, diff --git a/packages/runtime/README.md b/packages/runtime/README.md index 9102c3b9..fa143de2 100644 --- a/packages/runtime/README.md +++ b/packages/runtime/README.md @@ -95,7 +95,7 @@ ResumingFromStop CallingDelegates → CallingInteract ResolvingToolResult → FinishingStep → (loop) ``` -Terminal states: `completed`, `stoppedByError`, `stoppedByExceededMaxSteps`, `stoppedByInteractiveTool`, `stoppedByDelegate`, `stoppedByCancellation` +Terminal states: `completed`, `stoppedByError`, `stoppedByInteractiveTool`, `stoppedByDelegate`, `stoppedByCancellation` ## Related Documentation diff --git a/packages/runtime/src/helpers/checkpoint.test.ts b/packages/runtime/src/helpers/checkpoint.test.ts index c5400fc5..b19c2ad0 100644 --- a/packages/runtime/src/helpers/checkpoint.test.ts +++ b/packages/runtime/src/helpers/checkpoint.test.ts @@ -100,7 +100,6 @@ describe("@perstack/runtime: buildDelegationReturnState", () => { input: { text: "child query" }, experts: {}, reasoningBudget: "low", - maxSteps: 100, maxRetries: 3, timeout: 30000, startedAt: 1000, diff --git a/packages/runtime/src/helpers/setup-experts.test.ts b/packages/runtime/src/helpers/setup-experts.test.ts index aef2c336..2152bf65 100644 --- a/packages/runtime/src/helpers/setup-experts.test.ts +++ b/packages/runtime/src/helpers/setup-experts.test.ts @@ -23,7 +23,6 @@ describe("@perstack/runtime: setupExperts", () => { input: { text: "hello" }, experts: {}, reasoningBudget: "low", - maxSteps: 100, maxRetries: 3, timeout: 30000, startedAt: 1000, diff --git a/packages/runtime/src/orchestration/coordinator-executor.test.ts b/packages/runtime/src/orchestration/coordinator-executor.test.ts index 8fa1844c..908f1d27 100644 --- a/packages/runtime/src/orchestration/coordinator-executor.test.ts +++ b/packages/runtime/src/orchestration/coordinator-executor.test.ts @@ -117,7 +117,6 @@ const createMockSetting = (overrides?: Partial): RunSetting => runId: "run-1", expertKey: "test-expert", model: "claude-sonnet-4-20250514", - maxSteps: 10, maxRetries: 3, timeout: 30000, providerConfig: { providerName: "anthropic" }, diff --git a/packages/runtime/src/orchestration/coordinator-executor.ts b/packages/runtime/src/orchestration/coordinator-executor.ts index 80c79c7d..10390284 100644 --- a/packages/runtime/src/orchestration/coordinator-executor.ts +++ b/packages/runtime/src/orchestration/coordinator-executor.ts @@ -187,7 +187,6 @@ export class CoordinatorExecutor { expertName: expertToRun.name, experts: Object.keys(experts), model: setting.model, - maxSteps: setting.maxSteps, maxRetries: setting.maxRetries, timeout: setting.timeout, query: setting.input.text, diff --git a/packages/runtime/src/orchestration/delegation-executor.test.ts b/packages/runtime/src/orchestration/delegation-executor.test.ts index 3b1fbe4f..2a9536f5 100644 --- a/packages/runtime/src/orchestration/delegation-executor.test.ts +++ b/packages/runtime/src/orchestration/delegation-executor.test.ts @@ -20,7 +20,6 @@ const createMockSetting = (overrides?: Partial): RunSetting => runId: "run-1", expertKey: "expert-1", model: "claude-sonnet-4-20250514", - maxSteps: 10, maxRetries: 3, timeout: 30000, providerConfig: { providerName: "anthropic" }, diff --git a/packages/runtime/src/orchestration/delegation-executor.ts b/packages/runtime/src/orchestration/delegation-executor.ts index 9cf809f4..66afcd71 100644 --- a/packages/runtime/src/orchestration/delegation-executor.ts +++ b/packages/runtime/src/orchestration/delegation-executor.ts @@ -244,7 +244,7 @@ export class DelegationExecutor { } } - // Handle non-completed delegation (stoppedByError, stoppedByExceededMaxSteps, etc.) + // Handle non-completed delegation (stoppedByError, stoppedByCancellation, etc.) if (resultCheckpoint.status !== "completed") { return { toolCallId, diff --git a/packages/runtime/src/run.test.ts b/packages/runtime/src/run.test.ts index 86987d79..4716e089 100644 --- a/packages/runtime/src/run.test.ts +++ b/packages/runtime/src/run.test.ts @@ -55,15 +55,6 @@ describe("@perstack/runtime: run", () => { expect(result.status).toBe("stoppedByInteractiveTool") }) - it("returns checkpoint on stoppedByExceededMaxSteps status", async () => { - const stoppedCheckpoint = createCheckpoint({ status: "stoppedByExceededMaxSteps" }) - setupMockExecutor([{ checkpoint: stoppedCheckpoint }]) - - const result = await run({ setting, checkpoint }) - - expect(result.status).toBe("stoppedByExceededMaxSteps") - }) - it("returns checkpoint on stoppedByError status", async () => { const stoppedCheckpoint = createCheckpoint({ status: "stoppedByError" }) setupMockExecutor([{ checkpoint: stoppedCheckpoint }]) @@ -180,19 +171,6 @@ describe("@perstack/runtime: run", () => { expect(lastCall.finishedAt).toBeDefined() }) - it("updates job status to stoppedByMaxSteps on exceeded max steps", async () => { - const stoppedCheckpoint = createCheckpoint({ status: "stoppedByExceededMaxSteps" }) - setupMockExecutor([{ checkpoint: stoppedCheckpoint }]) - - const storeJob = vi.fn() - - await run({ setting, checkpoint }, { storeJob }) - - const lastCall = storeJob.mock.calls[storeJob.mock.calls.length - 1][0] as Job - expect(lastCall.status).toBe("stoppedByMaxSteps") - expect(lastCall.finishedAt).toBeDefined() - }) - it("updates job status to stoppedByError on error", async () => { const stoppedCheckpoint = createCheckpoint({ status: "stoppedByError" }) setupMockExecutor([{ checkpoint: stoppedCheckpoint }]) @@ -320,7 +298,6 @@ describe("@perstack/runtime: run", () => { status: "running", totalSteps: 0, startedAt: Date.now(), - maxSteps: 100, usage: createEmptyUsage(), } const createJob = vi.fn().mockReturnValue(customJob) @@ -328,7 +305,7 @@ describe("@perstack/runtime: run", () => { await run({ setting, checkpoint }, { createJob, storeJob }) - expect(createJob).toHaveBeenCalledWith("test-job-id", "test-expert", 10) + expect(createJob).toHaveBeenCalledWith("test-job-id", "test-expert") }) }) }) diff --git a/packages/runtime/src/run.ts b/packages/runtime/src/run.ts index 760d654c..b17d22ee 100755 --- a/packages/runtime/src/run.ts +++ b/packages/runtime/src/run.ts @@ -29,7 +29,7 @@ export type RunOptions = { storeEvent?: (event: RunEvent) => Promise storeJob?: (job: Job) => Promise retrieveJob?: (jobId: string) => Promise - createJob?: (jobId: string, expertKey: string, maxSteps?: number) => Job + createJob?: (jobId: string, expertKey: string) => Job eventListener?: (event: RunEvent | RuntimeEvent) => void resolveExpertToRun?: ResolveExpertToRunFn returnOnDelegationComplete?: boolean @@ -39,7 +39,6 @@ export type RunOptions = { const defaultCreateJob = ( jobId: string, expertKey: string, - maxSteps?: number, runtimeVersion: RuntimeVersion = getCurrentRuntimeVersion(), ): Job => ({ id: jobId, @@ -48,7 +47,6 @@ const defaultCreateJob = ( status: "running", totalSteps: 0, startedAt: Date.now(), - maxSteps, usage: createEmptyUsage(), }) @@ -76,9 +74,7 @@ export async function run(runInput: RunParamsInput, options?: RunOptions): Promi }) const createJob = options?.createJob ?? defaultCreateJob - let job: Job = - (await retrieveJob(setting.jobId)) ?? - createJob(setting.jobId, setting.expertKey, setting.maxSteps) + let job: Job = (await retrieveJob(setting.jobId)) ?? createJob(setting.jobId, setting.expertKey) if (job.status !== "running") { job = { ...job, status: "running", finishedAt: undefined } } @@ -155,11 +151,6 @@ export async function run(runInput: RunParamsInput, options?: RunOptions): Promi break } - case "stoppedByExceededMaxSteps": { - await storeJob({ ...job, status: "stoppedByMaxSteps", finishedAt: Date.now() }) - return resultCheckpoint - } - case "stoppedByError": { await storeJob({ ...job, status: "stoppedByError", finishedAt: Date.now() }) return resultCheckpoint diff --git a/packages/runtime/src/state-machine/machine.ts b/packages/runtime/src/state-machine/machine.ts index f2e8371e..f1fee8ab 100644 --- a/packages/runtime/src/state-machine/machine.ts +++ b/packages/runtime/src/state-machine/machine.ts @@ -338,16 +338,6 @@ export const runtimeStateMachine = setup({ }), reenter: true, }, - stopRunByExceededMaxSteps: { - target: "Stopped", - actions: assign({ - checkpoint: ({ event }) => event.checkpoint, - step: ({ event }) => ({ - ...event.step, - inputMessages: undefined, - }), - }), - }, }, }, diff --git a/packages/runtime/src/state-machine/states/finishing-step.test.ts b/packages/runtime/src/state-machine/states/finishing-step.test.ts index e0f18b27..f17ae63c 100644 --- a/packages/runtime/src/state-machine/states/finishing-step.test.ts +++ b/packages/runtime/src/state-machine/states/finishing-step.test.ts @@ -12,8 +12,8 @@ import { StateMachineLogics } from "../machine.js" const mockLLMExecutor = createMockLLMExecutor() as unknown as LLMExecutor describe("@perstack/runtime: StateMachineLogic['FinishingStep']", () => { - it("finishes steps correctly when within max steps", async () => { - const setting = createRunSetting({ maxSteps: 5 }) + it("continues to next step", async () => { + const setting = createRunSetting() const checkpoint = createCheckpoint({ stepNumber: 2 }) const step = createStep({ stepNumber: 2 }) await expect( @@ -49,68 +49,4 @@ describe("@perstack/runtime: StateMachineLogic['FinishingStep']", () => { }, }) }) - - it("stops when step number reaches max steps", async () => { - const setting = createRunSetting({ maxSteps: 3 }) - const checkpoint = createCheckpoint({ stepNumber: 3 }) - const step = createStep({ stepNumber: 3 }) - await expect( - StateMachineLogics.FinishingStep({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor, - }), - ).resolves.toStrictEqual({ - type: "stopRunByExceededMaxSteps", - id: expect.any(String), - expertKey: setting.expertKey, - timestamp: expect.any(Number), - jobId: setting.jobId, - runId: setting.runId, - stepNumber: checkpoint.stepNumber, - checkpoint: { - ...checkpoint, - status: "stoppedByExceededMaxSteps", - }, - step: { - ...step, - finishedAt: expect.any(Number), - }, - }) - }) - - it("stops when step number exceeds max steps", async () => { - const setting = createRunSetting({ maxSteps: 3 }) - const checkpoint = createCheckpoint({ stepNumber: 5 }) - const step = createStep({ stepNumber: 5 }) - await expect( - StateMachineLogics.FinishingStep({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor, - }), - ).resolves.toStrictEqual({ - type: "stopRunByExceededMaxSteps", - id: expect.any(String), - expertKey: setting.expertKey, - timestamp: expect.any(Number), - jobId: setting.jobId, - runId: setting.runId, - stepNumber: checkpoint.stepNumber, - checkpoint: { - ...checkpoint, - status: "stoppedByExceededMaxSteps", - }, - step: { - ...step, - finishedAt: expect.any(Number), - }, - }) - }) }) diff --git a/packages/runtime/src/state-machine/states/finishing-step.ts b/packages/runtime/src/state-machine/states/finishing-step.ts index 83d5a5e7..342ae935 100644 --- a/packages/runtime/src/state-machine/states/finishing-step.ts +++ b/packages/runtime/src/state-machine/states/finishing-step.ts @@ -1,35 +1,21 @@ import { createId } from "@paralleldrive/cuid2" -import { continueToNextStep, type RunEvent, stopRunByExceededMaxSteps } from "@perstack/core" +import { continueToNextStep, type RunEvent } from "@perstack/core" import type { RunSnapshot } from "../machine.js" /** * FinishingStep state: Complete current step and decide next action. * * Responsibilities: - * - Check if max steps exceeded * - Increment step number for next iteration * * Transitions: * - continueToNextStep → PreparingForStep (continue loop) - * - stopRunByExceededMaxSteps → Stopped (limit reached) */ export async function finishingStepLogic({ setting, checkpoint, step, }: RunSnapshot["context"]): Promise { - if (setting.maxSteps !== undefined && checkpoint.stepNumber >= setting.maxSteps) { - return stopRunByExceededMaxSteps(setting, checkpoint, { - checkpoint: { - ...checkpoint, - status: "stoppedByExceededMaxSteps", - }, - step: { - ...step, - finishedAt: Date.now(), - }, - }) - } return continueToNextStep(setting, checkpoint, { checkpoint: { ...checkpoint, diff --git a/packages/runtime/test/run-params.ts b/packages/runtime/test/run-params.ts index 3caeaacf..bca3b042 100644 --- a/packages/runtime/test/run-params.ts +++ b/packages/runtime/test/run-params.ts @@ -26,7 +26,6 @@ export function createRunSetting(overrides: Partial = model: "claude-sonnet-4-20250514", expertKey: "test-expert", maxRetries: 3, - maxSteps: 10, timeout: 1000, startedAt: Date.now(), updatedAt: Date.now(), diff --git a/packages/tui-components/src/constants.ts b/packages/tui-components/src/constants.ts index 3924f920..d8cb87c6 100644 --- a/packages/tui-components/src/constants.ts +++ b/packages/tui-components/src/constants.ts @@ -26,7 +26,6 @@ export const INDICATOR = { export const STOP_EVENT_TYPES = [ "stopRunByInteractiveTool", - "stopRunByExceededMaxSteps", ] as const satisfies readonly RunEvent["type"][] export const KEY_BINDINGS = { diff --git a/packages/tui-components/src/hooks/state/use-runtime-info.ts b/packages/tui-components/src/hooks/state/use-runtime-info.ts index b78e7cfd..9c7c411c 100644 --- a/packages/tui-components/src/hooks/state/use-runtime-info.ts +++ b/packages/tui-components/src/hooks/state/use-runtime-info.ts @@ -13,7 +13,6 @@ export const useRuntimeInfo = (options: UseRuntimeInfoOptions) => { runtimeVersion: options.initialConfig.runtimeVersion, expertName: options.initialExpertName, model: options.initialConfig.model, - maxSteps: options.initialConfig.maxSteps, maxRetries: options.initialConfig.maxRetries, timeout: options.initialConfig.timeout, activeSkills: [], @@ -25,7 +24,6 @@ export const useRuntimeInfo = (options: UseRuntimeInfoOptions) => { runtimeVersion: event.runtimeVersion, expertName: event.expertName, model: event.model, - maxSteps: event.maxSteps, maxRetries: event.maxRetries, timeout: event.timeout, currentStep: 1, diff --git a/packages/tui-components/src/types/base.ts b/packages/tui-components/src/types/base.ts index b6e94bf0..a73e275b 100644 --- a/packages/tui-components/src/types/base.ts +++ b/packages/tui-components/src/types/base.ts @@ -3,7 +3,6 @@ export type RuntimeInfo = { expertName?: string model: string currentStep?: number - maxSteps?: number maxRetries: number timeout: number status: "initializing" | "running" | "completed" | "stopped" @@ -17,7 +16,6 @@ export type RuntimeInfo = { export type InitialRuntimeConfig = { runtimeVersion: string model: string - maxSteps?: number maxRetries: number timeout: number contextWindowUsage: number diff --git a/packages/tui/src/run-handler.ts b/packages/tui/src/run-handler.ts index dca57087..58c2dc3d 100644 --- a/packages/tui/src/run-handler.ts +++ b/packages/tui/src/run-handler.ts @@ -83,7 +83,6 @@ export async function runHandler( model, providerConfig, reasoningBudget: input.options.reasoningBudget ?? perstackConfig.reasoningBudget, - maxSteps: input.options.maxSteps ?? perstackConfig.maxSteps, maxRetries: input.options.maxRetries ?? perstackConfig.maxRetries, timeout: input.options.timeout ?? perstackConfig.timeout, perstackApiBaseUrl: perstackConfig.perstackApiBaseUrl, diff --git a/packages/tui/src/start-handler.ts b/packages/tui/src/start-handler.ts index bd20da9c..b6248e93 100644 --- a/packages/tui/src/start-handler.ts +++ b/packages/tui/src/start-handler.ts @@ -64,7 +64,6 @@ export async function startHandler( Object.assign(env, handlerOptions.additionalEnv(env)) } - const maxSteps = input.options.maxSteps ?? perstackConfig.maxSteps const maxRetries = input.options.maxRetries ?? perstackConfig.maxRetries ?? defaultMaxRetries const timeout = input.options.timeout ?? perstackConfig.timeout ?? defaultTimeout @@ -165,7 +164,6 @@ export async function startHandler( config: { runtimeVersion, model, - maxSteps, maxRetries, timeout, contextWindowUsage: currentCheckpoint?.contextWindowUsage ?? 0, @@ -191,7 +189,6 @@ export async function startHandler( model, providerConfig, reasoningBudget: input.options.reasoningBudget ?? perstackConfig.reasoningBudget, - maxSteps: input.options.maxSteps ?? perstackConfig.maxSteps, maxRetries: input.options.maxRetries ?? perstackConfig.maxRetries, timeout: input.options.timeout ?? perstackConfig.timeout, perstackApiBaseUrl: perstackConfig.perstackApiBaseUrl, @@ -220,7 +217,6 @@ export async function startHandler( // Check if user wants to continue const canContinue = runResult.status === "completed" || - runResult.status === "stoppedByExceededMaxSteps" || runResult.status === "stoppedByError" || runResult.status === "stoppedByInteractiveTool" From caee0e5a5934c97c4dd74411eb89112556aceb1b Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Fri, 20 Feb 2026 02:49:03 +0000 Subject: [PATCH 7/9] feat: add addDelegateFromConfig tool and refine create-expert workflow Add a new skill management tool that loads experts from a perstack.toml config file and adds them as delegates in a single step. Update create-expert definitions to use this tool, simplify plan file paths, and add workspace investigation guidance for delegates. Co-Authored-By: Claude Opus 4.6 --- apps/base/src/tools/skill-management.test.ts | 60 ++++++++++++++ apps/base/src/tools/skill-management.ts | 33 ++++++++ apps/create-expert/perstack.toml | 81 ++++++++++--------- .../src/messages/instruction-message.ts | 3 + packages/skill-manager/package.json | 3 +- .../src/adapters/in-memory-base-adapter.ts | 3 + .../skill-manager/src/skill-manager.test.ts | 2 + packages/skill-manager/src/skill-manager.ts | 41 ++++++++++ pnpm-lock.yaml | 3 + 9 files changed, 188 insertions(+), 41 deletions(-) diff --git a/apps/base/src/tools/skill-management.test.ts b/apps/base/src/tools/skill-management.test.ts index d75370a9..bd6657c4 100644 --- a/apps/base/src/tools/skill-management.test.ts +++ b/apps/base/src/tools/skill-management.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it, vi } from "vitest" import type { SkillManagementCallbacks } from "./skill-management.js" import { registerAddDelegate, + registerAddDelegateFromConfig, registerAddSkill, registerCreateExpert, registerRemoveDelegate, @@ -15,6 +16,9 @@ function createMockCallbacks(): SkillManagementCallbacks { addDelegate: vi.fn().mockResolvedValue({ delegateToolName: "delegate-tool" }), removeDelegate: vi.fn().mockResolvedValue(undefined), createExpert: vi.fn().mockResolvedValue({ expertKey: "my-expert" }), + addDelegateFromConfig: vi + .fn() + .mockResolvedValue({ delegateToolName: "delegate-from-config-tool" }), } } @@ -250,4 +254,60 @@ describe("skill-management tools", () => { }) }) }) + + describe("addDelegateFromConfig", () => { + it("registers tool with correct metadata", () => { + const server = createMockServer() + const callbacks = createMockCallbacks() + registerAddDelegateFromConfig(server as never, callbacks) + expect(server.registerTool).toHaveBeenCalledWith( + "addDelegateFromConfig", + expect.objectContaining({ title: "Add delegate from config" }), + expect.any(Function), + ) + }) + + it("calls callback with correct input and returns delegate tool name", async () => { + const server = createMockServer() + const callbacks = createMockCallbacks() + registerAddDelegateFromConfig(server as never, callbacks) + const handler = getHandler(server) + const input = { + configPath: "/path/to/perstack.toml", + delegateExpertName: "my-expert", + } + const result = await handler(input) + expect(callbacks.addDelegateFromConfig).toHaveBeenCalledWith(input) + expect(result).toStrictEqual({ + content: [ + { + type: "text", + text: JSON.stringify({ delegateToolName: "delegate-from-config-tool" }), + }, + ], + }) + }) + + it("returns errorToolResult when callback throws", async () => { + const server = createMockServer() + const callbacks = createMockCallbacks() + ;(callbacks.addDelegateFromConfig as ReturnType).mockRejectedValue( + new Error("config not found"), + ) + registerAddDelegateFromConfig(server as never, callbacks) + const handler = getHandler(server) + const result = await handler({ + configPath: "/bad/path.toml", + delegateExpertName: "missing", + }) + expect(result).toStrictEqual({ + content: [ + { + type: "text", + text: JSON.stringify({ error: "Error", message: "config not found" }), + }, + ], + }) + }) + }) }) diff --git a/apps/base/src/tools/skill-management.ts b/apps/base/src/tools/skill-management.ts index e66c673c..385cfaa3 100644 --- a/apps/base/src/tools/skill-management.ts +++ b/apps/base/src/tools/skill-management.ts @@ -44,6 +44,10 @@ export interface SkillManagementCallbacks { tags?: string[] providerTools?: string[] }): Promise<{ expertKey: string }> + addDelegateFromConfig(input: { + configPath: string + delegateExpertName: string + }): Promise<{ delegateToolName: string }> } export function registerAddSkill(server: McpServer, callbacks: SkillManagementCallbacks) { @@ -233,6 +237,34 @@ export function registerCreateExpert(server: McpServer, callbacks: SkillManageme ) } +export function registerAddDelegateFromConfig( + server: McpServer, + callbacks: SkillManagementCallbacks, +) { + server.registerTool( + "addDelegateFromConfig", + { + title: "Add delegate from config", + description: + "Load all experts from a perstack.toml config file and add the specified one as a delegate. This is a shortcut that combines reading the config, creating the expert, and adding it as a delegate in a single step.", + inputSchema: { + configPath: z.string().describe("Path to the perstack.toml config file"), + delegateExpertName: z + .string() + .describe("Name of the expert in the config to add as a delegate"), + }, + }, + async (input: { configPath: string; delegateExpertName: string }) => { + try { + return successToolResult(await callbacks.addDelegateFromConfig(input)) + } catch (e) { + if (e instanceof Error) return errorToolResult(e) + throw e + } + }, + ) +} + export function registerSkillManagementTools( server: McpServer, callbacks: SkillManagementCallbacks, @@ -242,4 +274,5 @@ export function registerSkillManagementTools( registerAddDelegate(server, callbacks) registerRemoveDelegate(server, callbacks) registerCreateExpert(server, callbacks) + registerAddDelegateFromConfig(server, callbacks) } diff --git a/apps/create-expert/perstack.toml b/apps/create-expert/perstack.toml index 3baf9c56..a50a3548 100644 --- a/apps/create-expert/perstack.toml +++ b/apps/create-expert/perstack.toml @@ -13,7 +13,7 @@ Delegate to your specialists and pass file paths between them. Do not read or in ## Delegates -- @create-expert/planner — designs expert architectures, writes plan to perstack/plan.md +- @create-expert/planner — designs expert architectures, writes plan to plan.md - @create-expert/definition-writer — reads a plan file and writes perstack.toml - @create-expert/expert-tester — tests a perstack.toml by exercising each expert @@ -21,7 +21,7 @@ Delegate to your specialists and pass file paths between them. Do not read or in 1. If a perstack.toml already exists in the workspace, note its path 2. Delegate to planner: pass the user's request and the perstack.toml path if one exists -3. Delegate to definition-writer: tell it to read perstack/plan.md and write perstack.toml +3. Delegate to definition-writer: tell it to read plan.md and write perstack.toml 4. Delegate to expert-tester: tell it to test perstack.toml 5. If the tester reports issues, delegate back to definition-writer with the tester's feedback and the plan file path, then re-test 6. attemptCompletion with a summary of what was created @@ -32,16 +32,16 @@ delegates = ["@create-expert/planner", "@create-expert/definition-writer", "@cre type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" -pick = ["listDirectory", "think", "attemptCompletion"] +pick = ["exec", "attemptCompletion"] [experts."@create-expert/planner"] version = "1.0.0" description = """ Designs expert architectures for Perstack. Provide: (1) what the expert should do, (2) path to existing perstack.toml if one exists. \ -Writes the design plan to perstack/plan.md. +Writes the design plan to plan.md. """ instruction = """ -You are an expert architect for Perstack. Design expert systems that follow best practices, then write your design to perstack/plan.md. +You are an expert architect for Perstack. Design expert systems that follow best practices, then write your design to plan.md. ## Perstack Best Practices @@ -81,15 +81,18 @@ Coordinators should only route work between delegates, not contain domain logic. ## Available @perstack/base Tools - readTextFile, writeTextFile, editTextFile — file operations -- listDirectory, getFileInfo — directory and file metadata -- exec — run system commands -- think — complex reasoning scratchpad +- exec — run system commands (use `ls` for directory listing) +- todo, clearTodo — task planning and tracking - attemptCompletion — signal task completion (always include) -- createExpert, addDelegate, removeDelegate — expert lifecycle management +- addDelegateFromConfig, addDelegate, removeDelegate — delegation management +- createExpert — create expert definitions in memory + +### 7. Practical Over Ceremonial +Experts must produce real, usable output — not ceremony. A programming expert must write code, not documentation about code. A design expert must produce designs, not reports about design. If the user asks for a game, the expert should produce a playable game, not a game design document. Match the expert's output to what a human practitioner would actually deliver. ## Design Process -1. If an existing perstack.toml path was provided, read it to understand current state +1. Investigate thoroughly first: if an existing perstack.toml path was provided, read it. Read relevant workspace files to understand the domain and existing state. 2. Analyze whether the task needs one expert or a coordinator with delegates 3. For simple, focused tasks: design a single expert 4. For complex, multi-faceted tasks: design a coordinator with focused delegates @@ -98,7 +101,7 @@ Coordinators should only route work between delegates, not contain domain logic. ## Output -Write your design to perstack/plan.md with the following sections: +Write your design to plan.md with the following sections: 1. **Expert names/keys** — kebab-case, following coordinator/delegate naming convention if multi-expert 2. **Description for each expert** — optimized for callers @@ -114,12 +117,12 @@ After writing the file, attemptCompletion with the file path. type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" -pick = ["readTextFile", "writeTextFile", "listDirectory", "think", "attemptCompletion"] +pick = ["readTextFile", "writeTextFile", "exec", "todo", "attemptCompletion"] [experts."@create-expert/definition-writer"] version = "1.0.0" description = """ -Writes Perstack expert definitions in perstack.toml from a design plan. Provide: path to the plan file (e.g. perstack/plan.md). \ +Writes Perstack expert definitions in perstack.toml from a design plan. Provide: path to the plan file (e.g. plan.md). \ Optionally include feedback from a previous test round to address. """ instruction = """ @@ -147,7 +150,8 @@ delegates = ["@expert-name/delegate"] # optional tags = ["tag"] # optional # Skills — MCP tool access -[experts."expert-name".skills."skill-name"] +# IMPORTANT: this skill key MUST be exactly "@perstack/base" — the runtime requires this exact key +[experts."expert-name".skills."@perstack/base"] type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" @@ -167,12 +171,11 @@ pick = ["tool1", "tool2"] # optional, include specific tools ## Skill Selection Guide - Always include attemptCompletion in pick list -- Include think for experts that need complex reasoning -- Include readTextFile, writeTextFile, listDirectory for file operations -- Include exec for system commands -- Include createExpert, addDelegate, removeDelegate only for experts that manage other experts +- Include readTextFile, writeTextFile for file operations +- Include exec for system commands (also covers directory listing via `ls`) - Include editTextFile when targeted text replacement is needed -- Include getFileInfo when file metadata matters +- Include todo for task planning and tracking +- Include addDelegateFromConfig, addDelegate, removeDelegate only for experts that manage other experts - Prefer minimal tool sets — only include what the expert actually needs ## TOML Syntax Rules @@ -180,6 +183,7 @@ pick = ["tool1", "tool2"] # optional, include specific tools - Use triple-quoted strings for multi-line instructions - Expert keys: kebab-case (my-expert-name) - Delegate keys: @coordinator/delegate-name +- The @perstack/base skill key MUST be exactly `"@perstack/base"` — never `"base"` or other aliases. The runtime looks up this exact key. Other skill keys can be any name. - Always include version, description, instruction for each expert - Produce valid TOML — no syntax errors @@ -196,41 +200,41 @@ pick = ["tool1", "tool2"] # optional, include specific tools type = "mcpStdioSkill" command = "npx" packageName = "@perstack/base" -pick = ["readTextFile", "writeTextFile", "listDirectory", "think", "attemptCompletion"] +pick = ["readTextFile", "writeTextFile", "exec", "todo", "attemptCompletion"] [experts."@create-expert/expert-tester"] version = "1.0.0" description = """ Tests Perstack expert definitions in a perstack.toml. Provide: path to the perstack.toml to test. \ -Creates each expert in memory, runs realistic test queries, and reports results. +Adds the coordinator as a delegate and runs realistic test queries that exercise the full delegation chain. """ instruction = """ -You are a Perstack expert tester. Your job is to validate expert definitions by creating them in memory and running realistic test queries. +You are a Perstack expert tester. Your job is to validate expert definitions by loading them from a config file and running realistic test queries. -## Testing Process +## Delegation Scope Rules -For each expert defined in the perstack.toml: +You can ONLY delegate to coordinators (plain names like "game-dev"), NOT to delegates (names starting with @ like "@game-dev/designer"). Delegates are internal to their coordinator and are tested indirectly by testing the coordinator with queries that exercise the full delegation chain. + +## Testing Process -1. Read the perstack.toml to understand all expert definitions -2. Create the expert in memory using createExpert -3. Add it as a delegate using addDelegate -4. Design a realistic test query that exercises the expert's core function -5. Call the delegate with the test query -6. Verify the results +1. Read the perstack.toml to identify the coordinator expert(s) +2. Use addDelegateFromConfig to add the coordinator as a delegate +3. Design a realistic test query that exercises the coordinator and its delegates end-to-end +4. Call the coordinator delegate with the test query +5. Verify the results +6. removeDelegate when done testing ## What to Test -- **Single experts**: call with a realistic query, verify the output is complete and correct -- **Experts that write files**: verify files were created, read them, check contents -- **Coordinator experts**: verify delegation occurred and results were synthesized -- **Experts with exec**: verify commands were run correctly +- Test coordinators with queries that exercise the full delegation chain +- Verify files were created, read them, check contents +- Verify delegation occurred and results were synthesized ## Verification Criteria - Expert follows its instruction faithfully - Output is complete — no placeholder content (TODO, Lorem ipsum) - Files created are well-structured and correct -- TOML syntax in generated perstack.toml files is valid - Delegation chains work end-to-end ## Reporting @@ -252,12 +256,9 @@ command = "npx" packageName = "@perstack/base" pick = [ "readTextFile", - "listDirectory", - "getFileInfo", "exec", - "think", + "todo", "attemptCompletion", - "createExpert", - "addDelegate", + "addDelegateFromConfig", "removeDelegate", ] diff --git a/packages/runtime/src/messages/instruction-message.ts b/packages/runtime/src/messages/instruction-message.ts index c05f266b..b0adf1d1 100644 --- a/packages/runtime/src/messages/instruction-message.ts +++ b/packages/runtime/src/messages/instruction-message.ts @@ -5,6 +5,9 @@ import { dedent } from "ts-dedent" function getDelegateMetaInstruction(startedAt: number): string { return dedent` Call tools iteratively to complete the user's task. + + Before starting work, investigate the workspace and understand the current state. Then use the todo tool to create a plan of action. Work through the todos step by step, marking each completed as you go. + When the task is complete, call attemptCompletion with a result parameter containing your final response. When you cannot help, call attemptCompletion without a result. diff --git a/packages/skill-manager/package.json b/packages/skill-manager/package.json index e1a71fbe..fc10fd95 100644 --- a/packages/skill-manager/package.json +++ b/packages/skill-manager/package.json @@ -29,7 +29,8 @@ "@modelcontextprotocol/sdk": "^1.26.0", "@paralleldrive/cuid2": "^3.3.0", "@perstack/base": "workspace:*", - "@perstack/core": "workspace:*" + "@perstack/core": "workspace:*", + "@perstack/perstack-toml": "workspace:*" }, "devDependencies": { "@tsconfig/node22": "^22.0.5", diff --git a/packages/skill-manager/src/adapters/in-memory-base-adapter.ts b/packages/skill-manager/src/adapters/in-memory-base-adapter.ts index 8e3bc635..205d53c2 100644 --- a/packages/skill-manager/src/adapters/in-memory-base-adapter.ts +++ b/packages/skill-manager/src/adapters/in-memory-base-adapter.ts @@ -40,6 +40,9 @@ export class InMemoryBaseSkillAdapter extends SkillAdapter { createExpert: () => { throw new Error("Skill management not initialized") }, + addDelegateFromConfig: () => { + throw new Error("Skill management not initialized") + }, } constructor( diff --git a/packages/skill-manager/src/skill-manager.test.ts b/packages/skill-manager/src/skill-manager.test.ts index 1b209c71..ee7f8d6c 100644 --- a/packages/skill-manager/src/skill-manager.test.ts +++ b/packages/skill-manager/src/skill-manager.test.ts @@ -754,12 +754,14 @@ describe("InMemoryBaseSkillAdapter binding", () => { const addDelegate = vi.fn().mockResolvedValue({ delegateToolName: "d" }) const removeDelegate = vi.fn().mockResolvedValue(undefined) const createExpert = vi.fn().mockResolvedValue({ expertKey: "x" }) + const addDelegateFromConfig = vi.fn().mockResolvedValue({ delegateToolName: "d" }) adapter.bindSkillManagement({ addSkill, removeSkill, addDelegate, removeDelegate, createExpert, + addDelegateFromConfig, }) // No error means the binding succeeded — real verification happens through integration }) diff --git a/packages/skill-manager/src/skill-manager.ts b/packages/skill-manager/src/skill-manager.ts index 5aedeeae..07bf37d0 100644 --- a/packages/skill-manager/src/skill-manager.ts +++ b/packages/skill-manager/src/skill-manager.ts @@ -1,3 +1,4 @@ +import { readFile } from "node:fs/promises" import { type Expert, expertSchema, @@ -7,6 +8,7 @@ import { type ToolDefinition, validateDelegation, } from "@perstack/core" +import { parsePerstackConfig } from "@perstack/perstack-toml" import { InMemoryBaseSkillAdapter } from "./adapters/in-memory-base-adapter.js" import { LockfileSkillAdapter } from "./adapters/lockfile-adapter.js" import type { SkillAdapter } from "./skill-adapter.js" @@ -303,6 +305,45 @@ export class SkillManager { experts[newExpert.key] = newExpert return { expertKey: newExpert.key } }, + addDelegateFromConfig: async (input) => { + const configString = await readFile(input.configPath, "utf-8") + const config = parsePerstackConfig(configString) + for (const [key, configExpert] of Object.entries(config.experts ?? {})) { + if (!experts[key]) { + experts[key] = expertSchema.parse({ + key, + name: key, + version: configExpert.version ?? "1.0.0", + description: configExpert.description, + instruction: configExpert.instruction, + skills: configExpert.skills, + delegates: configExpert.delegates, + tags: configExpert.tags, + providerTools: configExpert.providerTools, + providerSkills: configExpert.providerSkills, + providerToolOptions: configExpert.providerToolOptions, + }) + } + } + const delegateExpert = experts[input.delegateExpertName] + if (!delegateExpert) { + const validKeys = Object.keys(config.experts ?? {}).filter( + (key) => !validateDelegation(expert.name, key), + ) + throw new Error( + `Expert "${input.delegateExpertName}" not found in config "${input.configPath}". Available experts that "${expert.name}" can delegate to: ${validKeys.join(", ")}`, + ) + } + const delegationError = validateDelegation(expert.name, input.delegateExpertName) + if (delegationError) throw new Error(delegationError) + await sm.addDelegate(delegateExpert) + if (!expert.delegates.includes(input.delegateExpertName)) { + expert.delegates.push(input.delegateExpertName) + } + const added = sm.getAdapters().get(delegateExpert.name) + const toolName = added?.getToolDefinitions()[0]?.name ?? delegateExpert.name + return { delegateToolName: toolName } + }, }) break } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a1608187..feda6833 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -745,6 +745,9 @@ importers: '@perstack/core': specifier: workspace:* version: link:../core + '@perstack/perstack-toml': + specifier: workspace:* + version: link:../perstack-toml devDependencies: '@tsconfig/node22': specifier: ^22.0.5 From a702dd8f3fba87bbbae79031a8ef173fc747a77c Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Fri, 20 Feb 2026 03:53:28 +0000 Subject: [PATCH 8/9] fix: hide failed delegate retries from delegation tree Error nodes are terminal just like completed nodes, but flattenTree() was only filtering out completed children. This caused stale failed attempts to clutter the tree when a parent retried delegation. Co-Authored-By: Claude Opus 4.6 --- .../hooks/use-delegation-tree.test.ts | 271 ++++++++++++++++++ .../execution/hooks/use-delegation-tree.ts | 8 +- 2 files changed, 275 insertions(+), 4 deletions(-) diff --git a/packages/tui-components/src/execution/hooks/use-delegation-tree.test.ts b/packages/tui-components/src/execution/hooks/use-delegation-tree.test.ts index ac7050a9..02fca6f8 100644 --- a/packages/tui-components/src/execution/hooks/use-delegation-tree.test.ts +++ b/packages/tui-components/src/execution/hooks/use-delegation-tree.test.ts @@ -1163,6 +1163,277 @@ describe("flattenTree", () => { // Check ancestor tracking for tree drawing expect(flat[2]!.ancestorIsLast).toEqual([true, true]) }) + + it("filters out error children from parent's visible children", () => { + const state = createInitialDelegationTreeState() + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-root", "root@1.0.0", { + initialCheckpoint: makeCheckpoint({ runId: "run-root" }), + inputMessages: [], + }), + ) + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByDelegate", "run-root", "root@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-root" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + }), + ) + // Error child + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-err", "worker@1.0.0", { + initialCheckpoint: makeCheckpoint({ + runId: "run-err", + delegatedBy: { + expert: { key: "root@1.0.0", name: "root", version: "1.0.0" }, + toolCallId: "tc-1", + toolName: "delegate", + checkpointId: "cp-1", + runId: "run-root", + }, + }), + inputMessages: [], + }), + ) + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByError", "run-err", "worker@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-err" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + error: { name: "Error", message: "No output generated", isRetryable: false }, + }), + ) + // Running child + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-ok", "worker@1.0.0", { + initialCheckpoint: makeCheckpoint({ + runId: "run-ok", + delegatedBy: { + expert: { key: "root@1.0.0", name: "root", version: "1.0.0" }, + toolCallId: "tc-2", + toolName: "delegate", + checkpointId: "cp-1", + runId: "run-root", + }, + }), + inputMessages: [], + }), + ) + const flat = flattenTree(state) + expect(flat).toHaveLength(2) + expect(flat[0]!.node.runId).toBe("run-root") + expect(flat[1]!.node.runId).toBe("run-ok") + }) + + it("prunes subtree of error nodes", () => { + const state = createInitialDelegationTreeState() + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-root", "root@1.0.0", { + initialCheckpoint: makeCheckpoint({ runId: "run-root" }), + inputMessages: [], + }), + ) + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByDelegate", "run-root", "root@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-root" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + }), + ) + // Child that errored — had its own grandchild before failing + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-mid", "mid@1.0.0", { + initialCheckpoint: makeCheckpoint({ + runId: "run-mid", + delegatedBy: { + expert: { key: "root@1.0.0", name: "root", version: "1.0.0" }, + toolCallId: "tc-1", + toolName: "delegate", + checkpointId: "cp-1", + runId: "run-root", + }, + }), + inputMessages: [], + }), + ) + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByDelegate", "run-mid", "mid@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-mid" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + }), + ) + // Grandchild still running + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-grand", "leaf@1.0.0", { + initialCheckpoint: makeCheckpoint({ + runId: "run-grand", + delegatedBy: { + expert: { key: "mid@1.0.0", name: "mid", version: "1.0.0" }, + toolCallId: "tc-2", + toolName: "delegate", + checkpointId: "cp-2", + runId: "run-mid", + }, + }), + inputMessages: [], + }), + ) + // Now the mid node errors out + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByError", "run-mid", "mid@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-mid" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + error: { name: "Error", message: "Failed", isRetryable: false }, + }), + ) + const flat = flattenTree(state) + // Root only — error child pruned along with its grandchild + expect(flat).toHaveLength(1) + expect(flat[0]!.node.runId).toBe("run-root") + // Grandchild should NOT appear as orphan + expect(flat.find((f) => f.node.runId === "run-grand")).toBeUndefined() + }) + + it("does not show orphaned error nodes", () => { + const state = createInitialDelegationTreeState() + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-root", "root@1.0.0", { + initialCheckpoint: makeCheckpoint({ runId: "run-root" }), + inputMessages: [], + }), + ) + // Manually add an orphan error node (not linked to root) + state.nodes.set("orphan-err", { + runId: "orphan-err", + expertName: "orphan", + expertKey: "orphan@1.0.0", + status: "error", + actionLabel: "Something went wrong", + actionFileArg: undefined, + contextWindowUsage: 0, + parentRunId: "nonexistent-parent", + childRunIds: [], + totalTokens: 0, + }) + const flat = flattenTree(state) + // Only root, orphan error node should not appear + expect(flat).toHaveLength(1) + expect(flat[0]!.node.runId).toBe("run-root") + }) + + it("filters multiple failed retries, shows only active attempt", () => { + const state = createInitialDelegationTreeState() + // Parent (expert-tester) suspending + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-tester", "expert-tester@1.0.0", { + initialCheckpoint: makeCheckpoint({ runId: "run-tester" }), + inputMessages: [], + }), + ) + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByDelegate", "run-tester", "expert-tester@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-tester" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + }), + ) + // Attempt 1: fails + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-attempt-1", "game-coordinator@1.0.0", { + initialCheckpoint: makeCheckpoint({ + runId: "run-attempt-1", + delegatedBy: { + expert: { + key: "expert-tester@1.0.0", + name: "expert-tester", + version: "1.0.0", + }, + toolCallId: "tc-1", + toolName: "delegate", + checkpointId: "cp-1", + runId: "run-tester", + }, + }), + inputMessages: [], + }), + ) + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByError", "run-attempt-1", "game-coordinator@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-attempt-1" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + error: { name: "Error", message: "No output generated", isRetryable: false }, + }), + ) + // Attempt 2: fails + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-attempt-2", "game-coordinator@1.0.0", { + initialCheckpoint: makeCheckpoint({ + runId: "run-attempt-2", + delegatedBy: { + expert: { + key: "expert-tester@1.0.0", + name: "expert-tester", + version: "1.0.0", + }, + toolCallId: "tc-2", + toolName: "delegate", + checkpointId: "cp-2", + runId: "run-tester", + }, + }), + inputMessages: [], + }), + ) + processDelegationTreeEvent( + state, + makeRunEvent("stopRunByError", "run-attempt-2", "game-coordinator@1.0.0", { + checkpoint: makeCheckpoint({ runId: "run-attempt-2" }), + step: { stepNumber: 1, newMessages: [], usage: baseUsage }, + error: { name: "Error", message: "No output generated", isRetryable: false }, + }), + ) + // Attempt 3: still running + processDelegationTreeEvent( + state, + makeRunEvent("startRun", "run-attempt-3", "game-coordinator@1.0.0", { + initialCheckpoint: makeCheckpoint({ + runId: "run-attempt-3", + delegatedBy: { + expert: { + key: "expert-tester@1.0.0", + name: "expert-tester", + version: "1.0.0", + }, + toolCallId: "tc-3", + toolName: "delegate", + checkpointId: "cp-3", + runId: "run-tester", + }, + }), + inputMessages: [], + }), + ) + const flat = flattenTree(state) + // Only tester + active attempt 3; failed attempts 1 and 2 hidden + expect(flat).toHaveLength(2) + expect(flat[0]!.node.runId).toBe("run-tester") + expect(flat[1]!.node.runId).toBe("run-attempt-3") + expect(flat[1]!.depth).toBe(1) + expect(flat[1]!.isLast).toBe(true) + }) }) describe("getStatusCounts", () => { diff --git a/packages/tui-components/src/execution/hooks/use-delegation-tree.ts b/packages/tui-components/src/execution/hooks/use-delegation-tree.ts index df674a54..b98a8c1d 100644 --- a/packages/tui-components/src/execution/hooks/use-delegation-tree.ts +++ b/packages/tui-components/src/execution/hooks/use-delegation-tree.ts @@ -146,8 +146,8 @@ export function flattenTree(state: DelegationTreeState): FlatTreeNode[] { const children = node.childRunIds - // Pruning: skip entire subtree of completed nodes - if (node.status === "completed") { + // Pruning: skip entire subtree of completed or error nodes + if (node.status === "completed" || node.status === "error") { for (const childId of children) markSubtreeVisited(childId) return } @@ -155,7 +155,7 @@ export function flattenTree(state: DelegationTreeState): FlatTreeNode[] { // Filter out individually completed children; keep non-completed ones visible const visibleChildren = children.filter((id) => { const child = state.nodes.get(id) - return child && child.status !== "completed" + return child && child.status !== "completed" && child.status !== "error" }) // Mark completed children as visited so they don't appear as orphans @@ -175,7 +175,7 @@ export function flattenTree(state: DelegationTreeState): FlatTreeNode[] { // Safety net: show orphaned non-completed nodes that weren't reached from root for (const [nodeId, node] of state.nodes) { - if (!visited.has(nodeId) && node.status !== "completed") { + if (!visited.has(nodeId) && node.status !== "completed" && node.status !== "error") { result.push({ node, depth: 0, isLast: true, ancestorIsLast: [] }) } } From ff355e84cb9452ef58034032fef339c356cb61fd Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Fri, 20 Feb 2026 06:50:54 +0000 Subject: [PATCH 9/9] refactor: make stepNumber per-run and fix E2E test regressions Make stepNumber always start at 1 for each run, completing the decoupling from the removed maxSteps feature. Accumulate totalSteps across runs in the job instead. - Rename createNextStepCheckpoint to createRunStartCheckpoint (stepNumber: 1) - Reset stepNumber to 1 in delegation child/return checkpoints - Accumulate job.totalSteps across runs (job.totalSteps + resultCheckpoint.stepNumber) - Refresh job from storage after delegation to pick up child updates - Replace stepNumber-based checkpoint sorting with file mtime - Remove maxStepNumber parameter from TUI event fetching - Fix log data-fetcher fallback to sum max stepNumber per runId - Remove deleted --max-steps E2E test - Fix create-expert E2E: update expertKey and tool name assertions Co-Authored-By: Claude Opus 4.6 --- e2e/create-expert/create-expert.test.ts | 30 +++++++++---------- e2e/perstack-cli/options.test.ts | 28 +---------------- packages/filesystem/src/checkpoint.ts | 12 ++++++-- packages/log/src/data-fetcher.test.ts | 15 ++++++++++ packages/log/src/data-fetcher.ts | 14 ++++++++- .../runtime/src/helpers/checkpoint.test.ts | 14 ++++----- packages/runtime/src/helpers/checkpoint.ts | 6 ++-- packages/runtime/src/helpers/index.ts | 2 +- .../coordinator-executor.test.ts | 8 ++--- .../src/orchestration/coordinator-executor.ts | 4 +-- .../orchestration/delegation-executor.test.ts | 4 +-- .../src/orchestration/delegation-executor.ts | 6 ++-- packages/runtime/src/run.ts | 4 ++- packages/tui/src/lib/run-manager.ts | 6 ++-- packages/tui/src/start-handler.ts | 2 +- 15 files changed, 82 insertions(+), 73 deletions(-) diff --git a/e2e/create-expert/create-expert.test.ts b/e2e/create-expert/create-expert.test.ts index 47c499c4..a9b9372e 100644 --- a/e2e/create-expert/create-expert.test.ts +++ b/e2e/create-expert/create-expert.test.ts @@ -2,8 +2,8 @@ * Create Expert E2E Tests * * Tests the create-expert agent that creates/modifies perstack.toml files: - * - Creates new expert definitions via createExpert + addDelegate workflow - * - Tests experts in-process via delegation before writing perstack.toml + * - Creates new expert definitions via planner + definition-writer + expert-tester delegates + * - Tests experts via addDelegateFromConfig after writing perstack.toml * - Preserves existing experts when modifying perstack.toml * * Binary: apps/create-expert/dist/bin/cli.js (--headless mode) @@ -103,23 +103,23 @@ describe("create-expert", () => { expect(controlFlow).toContain("stopRunByDelegate") expect(controlFlow.at(-1)).toBe("completeRun") - // Verify the coordinator (expert) starts and completes + // Verify the coordinator (create-expert) starts and completes const startEvents = filterEventsByType(result.events, "startRun") const completeEvents = filterEventsByType(result.events, "completeRun") - expect(startEvents.some((e) => (e as { expertKey: string }).expertKey === "expert")).toBe( - true, - ) - expect(completeEvents.some((e) => (e as { expertKey: string }).expertKey === "expert")).toBe( - true, - ) + expect( + startEvents.some((e) => (e as { expertKey: string }).expertKey === "create-expert"), + ).toBe(true) + expect( + completeEvents.some((e) => (e as { expertKey: string }).expertKey === "create-expert"), + ).toBe(true) // Verify delegation: at least 2 completeRun (delegate + coordinator) expect(completeEvents.length).toBeGreaterThanOrEqual(2) - // Verify createExpert + addDelegate tools were called + // Verify definition-writer writes TOML and expert-tester tests via addDelegateFromConfig const toolNames = getAllCalledToolNames(result) - expect(toolNames).toContain("createExpert") - expect(toolNames).toContain("addDelegate") + expect(toolNames).toContain("writeTextFile") + expect(toolNames).toContain("addDelegateFromConfig") // Verify perstack.toml was created with valid expert definitions const tomlPath = path.join(tempDir, "perstack.toml") @@ -167,10 +167,10 @@ pick = ["attemptCompletion"] .passed, ).toBe(true) - // Verify createExpert + addDelegate tools were called + // Verify definition-writer writes TOML and expert-tester tests via addDelegateFromConfig const toolNames = getAllCalledToolNames(result) - expect(toolNames).toContain("createExpert") - expect(toolNames).toContain("addDelegate") + expect(toolNames).toContain("writeTextFile") + expect(toolNames).toContain("addDelegateFromConfig") // Verify perstack.toml was updated const tomlPath = path.join(tempDir, "perstack.toml") diff --git a/e2e/perstack-cli/options.test.ts b/e2e/perstack-cli/options.test.ts index 33c72a62..c857a734 100644 --- a/e2e/perstack-cli/options.test.ts +++ b/e2e/perstack-cli/options.test.ts @@ -3,13 +3,12 @@ * * Tests CLI option handling in perstack: * - --provider, --model - * - --max-steps, --max-retries, --timeout + * - --max-retries, --timeout * - --job-id, --env-path, --verbose * * TOML: e2e/experts/global-runtime.toml */ import { describe, expect, it } from "vitest" -import { assertEventSequenceContains } from "../lib/assertions.js" import { runCli, withEventParsing } from "../lib/runner.js" const GLOBAL_RUNTIME_CONFIG = "./e2e/experts/global-runtime.toml" @@ -59,31 +58,6 @@ describe.concurrent("CLI Options", () => { LLM_TIMEOUT, ) - /** Verifies --max-steps option is accepted and run completes. */ - it( - "should accept --max-steps option", - async () => { - const cmdResult = await runCli( - [ - "run", - "--config", - GLOBAL_RUNTIME_CONFIG, - "--max-steps", - "10", - "e2e-global-runtime", - "Say hello", - ], - { timeout: LLM_TIMEOUT }, - ) - const result = withEventParsing(cmdResult) - expect(result.exitCode).toBe(0) - expect(assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed).toBe( - true, - ) - }, - LLM_TIMEOUT, - ) - /** Verifies --max-retries option is accepted. */ it( "should accept --max-retries option", diff --git a/packages/filesystem/src/checkpoint.ts b/packages/filesystem/src/checkpoint.ts index 16f61c1a..f4aca4ff 100644 --- a/packages/filesystem/src/checkpoint.ts +++ b/packages/filesystem/src/checkpoint.ts @@ -1,4 +1,4 @@ -import { existsSync, readdirSync, readFileSync } from "node:fs" +import { existsSync, readdirSync, readFileSync, statSync } from "node:fs" import { mkdir, readFile, writeFile } from "node:fs/promises" import path from "node:path" import { type Checkpoint, checkpointSchema } from "@perstack/core" @@ -46,5 +46,13 @@ export function getCheckpointsByJobId(jobId: string): Checkpoint[] { // Ignore invalid checkpoints } } - return checkpoints.sort((a, b) => a.stepNumber - b.stepNumber) + return checkpoints.sort((a, b) => { + try { + const aStat = statSync(path.resolve(checkpointDir, `${a.id}.json`)) + const bStat = statSync(path.resolve(checkpointDir, `${b.id}.json`)) + return aStat.mtimeMs - bStat.mtimeMs + } catch { + return 0 + } + }) } diff --git a/packages/log/src/data-fetcher.test.ts b/packages/log/src/data-fetcher.test.ts index 8b19f754..a8394bf3 100644 --- a/packages/log/src/data-fetcher.test.ts +++ b/packages/log/src/data-fetcher.test.ts @@ -156,4 +156,19 @@ describe("createLogDataFetcher", () => { const result = await fetcher.getRuns("job-1") expect(result).toEqual([mockRunSetting]) }) + + it("computes fallback totalSteps as sum of max stepNumber per runId", async () => { + mockStorage.retrieveJob.mockResolvedValue(undefined) + const checkpoints: Checkpoint[] = [ + { ...mockCheckpoint, id: "cp-1", runId: "run-1", stepNumber: 1 }, + { ...mockCheckpoint, id: "cp-2", runId: "run-1", stepNumber: 3 }, + { ...mockCheckpoint, id: "cp-3", runId: "run-2", stepNumber: 1 }, + { ...mockCheckpoint, id: "cp-4", runId: "run-2", stepNumber: 2 }, + ] + mockStorage.getCheckpointsByJobId.mockResolvedValue(checkpoints) + const fetcher = createLogDataFetcher(mockStorage) + const result = await fetcher.getJob("job-1") + // run-1 max=3, run-2 max=2, total=5 + expect(result?.totalSteps).toBe(5) + }) }) diff --git a/packages/log/src/data-fetcher.ts b/packages/log/src/data-fetcher.ts index 26853c65..ca9a7ac8 100644 --- a/packages/log/src/data-fetcher.ts +++ b/packages/log/src/data-fetcher.ts @@ -42,11 +42,23 @@ export function createLogDataFetcher(storage: StorageAdapter): LogDataFetcher { if (checkpoints.length === 0) return undefined const firstCheckpoint = checkpoints[0] const lastCheckpoint = checkpoints[checkpoints.length - 1] + // Sum max stepNumber per unique runId to get total steps across all runs + const maxStepByRun = new Map() + for (const cp of checkpoints) { + const current = maxStepByRun.get(cp.runId) ?? 0 + if (cp.stepNumber > current) { + maxStepByRun.set(cp.runId, cp.stepNumber) + } + } + let totalSteps = 0 + for (const steps of maxStepByRun.values()) { + totalSteps += steps + } return { id: jobId, coordinatorExpertKey: firstCheckpoint.expert.key, runtimeVersion: "v1.0", - totalSteps: lastCheckpoint.stepNumber, + totalSteps, usage: lastCheckpoint.usage, startedAt: getJobDirMtime(storage.getBasePath(), jobId), finishedAt: Date.now(), diff --git a/packages/runtime/src/helpers/checkpoint.test.ts b/packages/runtime/src/helpers/checkpoint.test.ts index b19c2ad0..12878651 100644 --- a/packages/runtime/src/helpers/checkpoint.test.ts +++ b/packages/runtime/src/helpers/checkpoint.test.ts @@ -3,7 +3,7 @@ import { describe, expect, it, vi } from "vitest" import { buildDelegationReturnState, createInitialCheckpoint, - createNextStepCheckpoint, + createRunStartCheckpoint, } from "./checkpoint.js" describe("@perstack/runtime: createInitialCheckpoint", () => { @@ -52,7 +52,7 @@ describe("@perstack/runtime: createInitialCheckpoint", () => { }) }) -describe("@perstack/runtime: createNextStepCheckpoint", () => { +describe("@perstack/runtime: createRunStartCheckpoint", () => { const baseCheckpoint: Checkpoint = { id: "old-id", jobId: "job-123", @@ -74,15 +74,15 @@ describe("@perstack/runtime: createNextStepCheckpoint", () => { contextWindowUsage: 0.5, } - it("increments stepNumber and updates id and runId", () => { - const result = createNextStepCheckpoint("new-checkpoint-id", baseCheckpoint, "new-run-id") + it("resets stepNumber to 1 and updates id and runId", () => { + const result = createRunStartCheckpoint("new-checkpoint-id", baseCheckpoint, "new-run-id") expect(result.id).toBe("new-checkpoint-id") expect(result.runId).toBe("new-run-id") - expect(result.stepNumber).toBe(6) + expect(result.stepNumber).toBe(1) }) it("preserves other checkpoint properties", () => { - const result = createNextStepCheckpoint("new-checkpoint-id", baseCheckpoint, "new-run-id") + const result = createRunStartCheckpoint("new-checkpoint-id", baseCheckpoint, "new-run-id") expect(result.expert).toEqual(baseCheckpoint.expert) expect(result.status).toBe(baseCheckpoint.status) expect(result.messages).toEqual(baseCheckpoint.messages) @@ -169,7 +169,7 @@ describe("@perstack/runtime: buildDelegationReturnState", () => { it("builds checkpoint with parent data and updated stepNumber/usage", () => { const result = buildDelegationReturnState(baseSetting, resultCheckpoint, parentCheckpoint) expect(result.checkpoint.id).toBe("parent-checkpoint-id") - expect(result.checkpoint.stepNumber).toBe(5) + expect(result.checkpoint.stepNumber).toBe(1) expect(result.checkpoint.usage).toEqual(resultCheckpoint.usage) }) diff --git a/packages/runtime/src/helpers/checkpoint.ts b/packages/runtime/src/helpers/checkpoint.ts index 1eb6b473..64b51a32 100644 --- a/packages/runtime/src/helpers/checkpoint.ts +++ b/packages/runtime/src/helpers/checkpoint.ts @@ -32,7 +32,7 @@ export function createInitialCheckpoint( } } -export function createNextStepCheckpoint( +export function createRunStartCheckpoint( checkpointId: string, checkpoint: Checkpoint, runId: string, @@ -41,7 +41,7 @@ export function createNextStepCheckpoint( ...checkpoint, id: checkpointId, runId, - stepNumber: checkpoint.stepNumber + 1, + stepNumber: 1, } } @@ -90,7 +90,7 @@ export function buildDelegationReturnState( checkpoint: { ...parentCheckpoint, runId: newRunId, - stepNumber: resultCheckpoint.stepNumber, + stepNumber: 1, usage: resultCheckpoint.usage, pendingToolCalls: parentCheckpoint.pendingToolCalls, partialToolResults: parentCheckpoint.partialToolResults, diff --git a/packages/runtime/src/helpers/index.ts b/packages/runtime/src/helpers/index.ts index 09b13ca8..d6a13ba7 100644 --- a/packages/runtime/src/helpers/index.ts +++ b/packages/runtime/src/helpers/index.ts @@ -1,7 +1,7 @@ export { buildDelegationReturnState, createInitialCheckpoint, - createNextStepCheckpoint, + createRunStartCheckpoint, type DelegationStateResult, } from "./checkpoint.js" export { getLockfileExpertToolDefinitions } from "./lockfile.js" diff --git a/packages/runtime/src/orchestration/coordinator-executor.test.ts b/packages/runtime/src/orchestration/coordinator-executor.test.ts index 908f1d27..ededa333 100644 --- a/packages/runtime/src/orchestration/coordinator-executor.test.ts +++ b/packages/runtime/src/orchestration/coordinator-executor.test.ts @@ -40,11 +40,11 @@ vi.mock("../helpers/index.js", () => ({ }, contextWindow: params.contextWindow, })), - createNextStepCheckpoint: vi.fn().mockImplementation((id, checkpoint, runId) => ({ + createRunStartCheckpoint: vi.fn().mockImplementation((id, checkpoint, runId) => ({ ...checkpoint, id, runId, - stepNumber: checkpoint.stepNumber + 1, + stepNumber: 1, })), })) @@ -186,14 +186,14 @@ describe("@perstack/runtime: coordinator-executor", () => { }) it("creates next step checkpoint when checkpoint provided", async () => { - const { createNextStepCheckpoint } = await import("../helpers/index.js") + const { createRunStartCheckpoint } = await import("../helpers/index.js") const executor = new CoordinatorExecutor() const setting = createMockSetting() const checkpoint = createMockCheckpoint() await executor.execute(setting, checkpoint) - expect(createNextStepCheckpoint).toHaveBeenCalled() + expect(createRunStartCheckpoint).toHaveBeenCalled() }) it("emits init event when eventListener is provided", async () => { diff --git a/packages/runtime/src/orchestration/coordinator-executor.ts b/packages/runtime/src/orchestration/coordinator-executor.ts index 10390284..104ec53e 100644 --- a/packages/runtime/src/orchestration/coordinator-executor.ts +++ b/packages/runtime/src/orchestration/coordinator-executor.ts @@ -14,7 +14,7 @@ import pkg from "../../package.json" with { type: "json" } import { RunEventEmitter } from "../events/event-emitter.js" import { createInitialCheckpoint, - createNextStepCheckpoint, + createRunStartCheckpoint, getContextWindow, getLockfileExpertToolDefinitions, type ResolveExpertToRunFn, @@ -83,7 +83,7 @@ export class CoordinatorExecutor { }) const initialCheckpoint = checkpoint - ? createNextStepCheckpoint(createId(), checkpoint, setting.runId) + ? createRunStartCheckpoint(createId(), checkpoint, setting.runId) : createInitialCheckpoint(createId(), { jobId: setting.jobId, runId: setting.runId, diff --git a/packages/runtime/src/orchestration/delegation-executor.test.ts b/packages/runtime/src/orchestration/delegation-executor.test.ts index 2a9536f5..a99392cc 100644 --- a/packages/runtime/src/orchestration/delegation-executor.test.ts +++ b/packages/runtime/src/orchestration/delegation-executor.test.ts @@ -101,7 +101,7 @@ describe("@perstack/runtime: delegation-executor", () => { expect(runFn).toHaveBeenCalledTimes(1) expect(result.nextSetting.input.interactiveToolCallResult?.toolCallId).toBe("tc-1") - expect(result.nextCheckpoint.stepNumber).toBe(3) + expect(result.nextCheckpoint.stepNumber).toBe(1) expect(result.nextCheckpoint.partialToolResults).toEqual([]) }) @@ -143,7 +143,7 @@ describe("@perstack/runtime: delegation-executor", () => { expect(runFn).toHaveBeenCalledTimes(2) expect(result.nextSetting.input.interactiveToolCallResult?.toolCallId).toBe("tc-1") - expect(result.nextCheckpoint.stepNumber).toBe(5) // max step number + expect(result.nextCheckpoint.stepNumber).toBe(1) // per-run reset expect(result.nextCheckpoint.partialToolResults).toHaveLength(1) expect(result.nextCheckpoint.partialToolResults?.[0].id).toBe("tc-2") }) diff --git a/packages/runtime/src/orchestration/delegation-executor.ts b/packages/runtime/src/orchestration/delegation-executor.ts index 66afcd71..b7f07d09 100644 --- a/packages/runtime/src/orchestration/delegation-executor.ts +++ b/packages/runtime/src/orchestration/delegation-executor.ts @@ -124,8 +124,6 @@ export class DelegationExecutor { context.usage, ) - const maxStepNumber = Math.max(...allResults.map((r) => r.stepNumber)) - const restToolResults: ToolResult[] = restResults.map((result) => ({ id: result.toolCallId, skillName: `delegate/${result.expertKey}`, @@ -161,7 +159,7 @@ export class DelegationExecutor { jobId: setting.jobId, runId: returnRunId, status: "stoppedByDelegate", - stepNumber: maxStepNumber, + stepNumber: 1, messages: context.messages, // Restore parent's conversation history expert: { key: parentExpert.key, @@ -202,7 +200,7 @@ export class DelegationExecutor { jobId: parentSetting.jobId, runId: delegateRunId, status: "init", - stepNumber: parentContext.stepNumber, + stepNumber: 1, messages: [], // Child starts fresh - no parent context inheritance expert: { key: expert.key, diff --git a/packages/runtime/src/run.ts b/packages/runtime/src/run.ts index b17d22ee..6ee43021 100755 --- a/packages/runtime/src/run.ts +++ b/packages/runtime/src/run.ts @@ -95,7 +95,7 @@ export async function run(runInput: RunParamsInput, options?: RunOptions): Promi job = { ...job, - totalSteps: resultCheckpoint.stepNumber, + totalSteps: job.totalSteps + resultCheckpoint.stepNumber, usage: resultCheckpoint.usage, } @@ -107,6 +107,7 @@ export async function run(runInput: RunParamsInput, options?: RunOptions): Promi } if (resultCheckpoint.delegatedBy) { await storeJob(job) + job = (await retrieveJob(setting.jobId)) ?? job const parentCheckpoint = await retrieveCheckpoint( setting.jobId, resultCheckpoint.delegatedBy.checkpointId, @@ -146,6 +147,7 @@ export async function run(runInput: RunParamsInput, options?: RunOptions): Promi options, ) + job = (await retrieveJob(setting.jobId)) ?? job setting = delegationResult.nextSetting checkpoint = delegationResult.nextCheckpoint break diff --git a/packages/tui/src/lib/run-manager.ts b/packages/tui/src/lib/run-manager.ts index 3d420493..453f745a 100644 --- a/packages/tui/src/lib/run-manager.ts +++ b/packages/tui/src/lib/run-manager.ts @@ -84,18 +84,18 @@ export function getCheckpointsWithDetails( stepNumber: cp.stepNumber, contextWindowUsage: cp.contextWindowUsage ?? 0, })) - .sort((a, b) => b.stepNumber - a.stepNumber) + .reverse() } export function getEventContents(jobId: string, runId: string, maxStepNumber?: number): RunEvent[] { return runtimeGetEventContents(jobId, runId, maxStepNumber) } -export function getAllEventContentsForJob(jobId: string, maxStepNumber?: number): RunEvent[] { +export function getAllEventContentsForJob(jobId: string): RunEvent[] { const runIds = getRunIdsByJobId(jobId) const allEvents: RunEvent[] = [] for (const runId of runIds) { - const events = runtimeGetEventContents(jobId, runId, maxStepNumber) + const events = runtimeGetEventContents(jobId, runId) allEvents.push(...events) } return allEvents.sort((a, b) => a.timestamp - b.timestamp) diff --git a/packages/tui/src/start-handler.ts b/packages/tui/src/start-handler.ts index b6248e93..860d0b49 100644 --- a/packages/tui/src/start-handler.ts +++ b/packages/tui/src/start-handler.ts @@ -140,7 +140,7 @@ export async function startHandler( // On subsequent iterations, skip historical events (previous TUI already displayed them) let isFirstIteration = true const initialHistoricalEvents: ReturnType | undefined = currentCheckpoint - ? getAllEventContentsForJob(currentCheckpoint.jobId, currentCheckpoint.stepNumber) + ? getAllEventContentsForJob(currentCheckpoint.jobId) : undefined // First iteration: if no query from CLI, the execution TUI will collect it