From 734e224c6a9b3be815050e3baac0c381a92e7980 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Mon, 9 Feb 2026 13:56:48 +0000 Subject: [PATCH 1/2] docs: fix documentation-code mismatches after multi-runtime removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit all 50+ markdown files against current source code and fix remaining inaccuracies left after the multi-runtime refactoring. Fixes: - Fix run() API examples to match RunParamsInput signature (deployment.md, sandbox-integration.md) - Fix event type names: errorRun→stopRunByError, abbreviated streaming events→full names - Remove ghost event types from events.md (callInteractiveTool, callDelegate, finishAllToolCalls, proxyAccess) - Add missing event types (finishMcpTools, skipDelegates, proceedToInteractiveTools) - Fix CLI reference: timeout default 60000→300000, remove deleted commands (publish/unpublish/tag/status) - Add missing CLI options (--reasoning-budget, --filter) and deepseek provider - Add missing perstack.toml fields (envPath, providerTools, providerSkills, allowedDomains, lazyInit) - Update packages/react README with useJobStream/useJobStreams hooks - Fix going-to-production.md event JSON examples to use actual event type names - Remove stale "(not Docker)" from benchmarks README Co-Authored-By: Claude Opus 4.6 --- benchmarks/README.md | 2 +- docs/guides/going-to-production.md | 8 +- docs/operating-experts/deployment.md | 9 +- docs/references/cli.md | 122 +++--------------- docs/references/events.md | 30 ++--- docs/references/perstack-toml.md | 25 ++-- docs/understanding-perstack/runtime.md | 18 +-- .../sandbox-integration.md | 22 +++- docs/using-experts/error-handling.md | 6 +- e2e/README.md | 3 +- packages/react/README.md | 80 ++++++++++++ 11 files changed, 164 insertions(+), 161 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index da65876a..38720011 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -157,7 +157,7 @@ Skill Spawn Handshake ToolDisc Total ## Environment Benchmarks run on: -- **Runtime**: local (not Docker) +- **Runtime**: local - **Model**: claude-sonnet-4-5 (Anthropic) - **Node.js**: v22+ - **Platform**: macOS (darwin 23.4.0) diff --git a/docs/guides/going-to-production.md b/docs/guides/going-to-production.md index f52fca1e..f643d0dc 100644 --- a/docs/guides/going-to-production.md +++ b/docs/guides/going-to-production.md @@ -118,10 +118,10 @@ Each user gets isolated state. The Expert reads and writes to their workspace on `perstack run` outputs JSON events to stdout. Each line is a structured event: ```json -{"type":"generation:start","timestamp":"2024-01-15T10:30:00Z"} -{"type":"tool:called","toolName":"search","input":{"query":"..."},"timestamp":"..."} -{"type":"generation:complete","content":"Based on my search...","timestamp":"..."} -{"type":"complete","timestamp":"..."} +{"type":"startRun","timestamp":1705312200000,"runId":"abc123",...} +{"type":"startGeneration","timestamp":1705312201000,...} +{"type":"callTools","timestamp":1705312202000,...} +{"type":"completeRun","timestamp":1705312203000,...} ``` Pipe these to your logging system: diff --git a/docs/operating-experts/deployment.md b/docs/operating-experts/deployment.md index 583914f8..35fd5363 100644 --- a/docs/operating-experts/deployment.md +++ b/docs/operating-experts/deployment.md @@ -87,7 +87,14 @@ export default { const { query, expertKey } = await request.json() const events: unknown[] = [] await run( - { expertKey, query, providerConfig: { apiKey: env.ANTHROPIC_API_KEY } }, + { + setting: { + model: "claude-sonnet-4-5", + providerConfig: { providerName: "anthropic", apiKey: env.ANTHROPIC_API_KEY }, + expertKey, + input: { text: query }, + }, + }, { eventListener: (event) => events.push(event) } ) return Response.json(events) diff --git a/docs/references/cli.md b/docs/references/cli.md index d88397d6..6e2e6969 100644 --- a/docs/references/cli.md +++ b/docs/references/cli.md @@ -46,15 +46,21 @@ Both `start` and `run` accept the same options: | `--provider ` | LLM provider | `anthropic` | | `--model ` | Model name | `claude-sonnet-4-5` | -Providers: `anthropic`, `google`, `openai`, `ollama`, `azure-openai`, `amazon-bedrock`, `google-vertex` +Providers: `anthropic`, `google`, `openai`, `deepseek`, `ollama`, `azure-openai`, `amazon-bedrock`, `google-vertex` ### Execution Control | Option | Description | Default | | ------------------- | -------------------------------------------- | --------- | -| `--max-steps ` | Maximum total steps across all Runs in a Job | unlimited | +| `--max-steps ` | Maximum total steps across all Runs in a Job | `100` | | `--max-retries ` | Max retry attempts per generation | `5` | -| `--timeout ` | Timeout per generation (ms) | `60000` | +| `--timeout ` | Timeout per generation (ms) | `300000` | + +### Reasoning + +| Option | Description | Default | +| ------------------------------- | ------------------------------------------------------------------------ | ------- | +| `--reasoning-budget ` | Reasoning budget for native LLM reasoning (`minimal`, `low`, `medium`, `high`, or token count) | - | ### Configuration @@ -95,6 +101,12 @@ Providers: `anthropic`, `google`, `openai`, `ollama`, `azure-openai`, `amazon-be Use with `--continue` to respond to interactive tool calls from the Coordinator Expert. +### Output Filtering (`run` only) + +| Option | Description | +| ------------------ | ---------------------------------------------------------------------- | +| `--filter ` | Filter events by type (comma-separated, e.g., `completeRun,stopRunByError`) | + ### Other | Option | Description | @@ -145,110 +157,6 @@ npx perstack run tic-tac-toe "Let's play!" npx perstack run @org/expert@1.0.0 "query" ``` -## Registry Management - -### `perstack publish` - -Publish an Expert to the registry. - -```bash -perstack publish [expertName] [options] -``` - -**Arguments:** -- `[expertName]`: Expert name from `perstack.toml` (prompts if not provided) - -**Options:** -| Option | Description | -| ----------------- | --------------------------- | -| `--config ` | Path to `perstack.toml` | -| `--dry-run` | Validate without publishing | - -**Example:** -```bash -perstack publish my-expert -perstack publish my-expert --dry-run -``` - -Requires `PERSTACK_API_KEY` environment variable. - -**Note:** Published Experts must use `npx` or `uvx` as skill commands. Arbitrary commands are not allowed for security reasons. See [Publishing](../making-experts/publishing.md#skill-requirements). - -### `perstack unpublish` - -Remove an Expert version from the registry. - -```bash -perstack unpublish [expertKey] [options] -``` - -**Arguments:** -- `[expertKey]`: Expert key with version (e.g., `my-expert@1.0.0`) - -**Options:** -| Option | Description | -| ----------------- | ------------------------------------------------ | -| `--config ` | Path to `perstack.toml` | -| `--force` | Skip confirmation (required for non-interactive) | - -**Example:** -```bash -perstack unpublish # Interactive mode -perstack unpublish my-expert@1.0.0 --force # Non-interactive -``` - -### `perstack tag` - -Add or update tags on an Expert version. - -```bash -perstack tag [expertKey] [tags...] [options] -``` - -**Arguments:** -- `[expertKey]`: Expert key with version (e.g., `my-expert@1.0.0`) -- `[tags...]`: Tags to set (e.g., `stable`, `beta`) - -**Options:** -| Option | Description | -| ----------------- | ----------------------- | -| `--config ` | Path to `perstack.toml` | - -**Example:** -```bash -perstack tag # Interactive mode -perstack tag my-expert@1.0.0 stable beta # Set tags directly -``` - -### `perstack status` - -Change the status of an Expert version. - -```bash -perstack status [expertKey] [status] [options] -``` - -**Arguments:** -- `[expertKey]`: Expert key with version (e.g., `my-expert@1.0.0`) -- `[status]`: New status (`available`, `deprecated`, `disabled`) - -**Options:** -| Option | Description | -| ----------------- | ----------------------- | -| `--config ` | Path to `perstack.toml` | - -**Example:** -```bash -perstack status # Interactive mode -perstack status my-expert@1.0.0 deprecated -``` - -| Status | Meaning | -| ------------ | ---------------------------- | -| `available` | Normal, visible in registry | -| `deprecated` | Still usable but discouraged | -| `disabled` | Cannot be executed | - ## Debugging and Inspection ### `perstack log` diff --git a/docs/references/events.md b/docs/references/events.md index ec94e330..6b62b41d 100644 --- a/docs/references/events.md +++ b/docs/references/events.md @@ -75,14 +75,14 @@ interface BaseEvent { | Event Type | Description | Key Payload | | --------------------- | ---------------------------------------- | ---------------------------------------- | -| `callTools` | Regular tool calls | `newMessage`, `toolCalls`, `usage` | -| `callInteractiveTool` | Interactive tool call (needs user input) | `newMessage`, `toolCall`, `usage` | -| `callDelegate` | Delegation to another Expert | `newMessage`, `toolCalls`, `usage` | -| `resolveToolResults` | Tool results received | `toolResults` | -| `attemptCompletion` | Completion tool called | `toolResult` | -| `finishToolCall` | Single tool call finished | `newMessages` | -| `resumeToolCalls` | Resume pending tool calls | `pendingToolCalls`, `partialToolResults` | -| `finishAllToolCalls` | All tool calls finished | `newMessages` | +| `callTools` | Regular tool calls | `newMessage`, `toolCalls`, `usage` | +| `resolveToolResults` | Tool results received | `toolResults` | +| `attemptCompletion` | Completion tool called | `toolResult` | +| `finishToolCall` | Single tool call finished | `newMessages` | +| `resumeToolCalls` | Resume pending tool calls | `pendingToolCalls`, `partialToolResults` | +| `finishMcpTools` | All MCP tool calls finished | `newMessages` | +| `skipDelegates` | Delegates skipped | (empty) | +| `proceedToInteractiveTools` | Proceeding to interactive tool calls | `pendingToolCalls`, `partialToolResults` | #### Step Transition Events @@ -158,7 +158,7 @@ RuntimeEvent represents **infrastructure-level side effects** — the runtime en ### Characteristics - Only the **latest state matters** — past RuntimeEvents are not meaningful -- Includes infrastructure-level information (skills, proxy) +- Includes infrastructure-level information (skills) - Not tied to the agent loop state machine ### Base Properties @@ -191,12 +191,6 @@ interface BaseRuntimeEvent { | `skillStderr` | Skill stderr output | `skillName`, `message` | | `skillDisconnected` | MCP skill disconnected | `skillName` | -#### Network Events - -| Event Type | Description | Key Payload | -| ------------- | -------------------------- | ------------------------------------ | -| `proxyAccess` | Network access allow/block | `action`, `domain`, `port`, `reason` | - ### Processing RuntimeEvents RuntimeEvents should be processed as **current state** — only the latest value matters. @@ -410,7 +404,7 @@ function ActivityLog({ activities }: { activities: Activity[] }) { │ └──────────────────────┬───────────────────────────────┘ │ │ │ │ │ ┌──────────────────────┼───────────────────────────────┐ │ -│ │ Skills, Proxy │ │ +│ │ Skills │ │ │ │ │ │ │ │ │ RuntimeEvents │ │ │ │ (environment state) │ │ @@ -448,10 +442,6 @@ function formatEvent(event: Record): string | null { // RuntimeEvents switch (type) { case "skillConnected": return `Skill connected: ${event.skillName}` - case "proxyAccess": { - const action = event.action === "allowed" ? "✓" : "✗" - return `Proxy ${action} ${event.domain}:${event.port}` - } } return null diff --git a/docs/references/perstack-toml.md b/docs/references/perstack-toml.md index e86a6a19..fed72def 100644 --- a/docs/references/perstack-toml.md +++ b/docs/references/perstack-toml.md @@ -108,6 +108,7 @@ headers = { "X-Custom-Header" = "value" } | `maxSteps` | number | Maximum steps per run | | `maxRetries` | number | Maximum retry attempts | | `timeout` | number | Timeout per generation (ms) | +| `envPath` | string[] | Paths to environment files | ### Provider Configuration @@ -115,7 +116,7 @@ Configure LLM provider under `[provider]` table. ```toml [provider] -providerName = "anthropic" # Required: anthropic, google, openai, ollama, azure-openai, amazon-bedrock, google-vertex +providerName = "anthropic" # Required: anthropic, google, openai, deepseek, ollama, azure-openai, amazon-bedrock, google-vertex [provider.setting] # Provider-specific options (all optional) ``` @@ -214,8 +215,11 @@ delegates = ["other-expert", "@org/another-expert"] | `minRuntimeVersion` | string | No | Minimum runtime version | | `description` | string | No | Brief description (max 2048 chars) | | `instruction` | string | **Yes** | Behavior instructions (max 20KB) | -| `delegates` | string[] | No | Experts this Expert can delegate to | -| `tags` | string[] | No | Tags for categorization | +| `delegates` | string[] | No | Experts this Expert can delegate to | +| `tags` | string[] | No | Tags for categorization | +| `providerTools` | string[] | No | Provider-specific tool names (e.g., `["webSearch", "codeExecution"]`) | +| `providerSkills` | array | No | Anthropic Agent Skills (builtin or custom) | +| `providerToolOptions` | object | No | Provider tool options (e.g., webSearch maxUses, allowedDomains) | ## Skill Definition @@ -244,9 +248,11 @@ requiredEnv = ["API_KEY"] | `command` | string | **Yes** | Command to execute (`npx`, `python`, `uvx`) | | `packageName` | string | No | Package name (for `npx`) | | `args` | string[] | No | Command-line arguments | -| `pick` | string[] | No | Tools to include (whitelist) | -| `omit` | string[] | No | Tools to exclude (blacklist) | -| `requiredEnv` | string[] | No | Required environment variables | +| `pick` | string[] | No | Tools to include (whitelist) | +| `omit` | string[] | No | Tools to exclude (blacklist) | +| `requiredEnv` | string[] | No | Required environment variables | +| `allowedDomains` | string[] | No | Allowed domain patterns for network access | +| `lazyInit` | boolean | No | Delay initialization until first use (default: `false`) | ### MCP SSE Skill @@ -264,9 +270,10 @@ omit = ["tool2"] | `type` | literal | **Yes** | `"mcpSseSkill"` | | `description` | string | No | Skill description | | `rule` | string | No | Additional usage guidelines | -| `endpoint` | string | **Yes** | MCP server URL | -| `pick` | string[] | No | Tools to include | -| `omit` | string[] | No | Tools to exclude | +| `endpoint` | string | **Yes** | MCP server URL | +| `pick` | string[] | No | Tools to include | +| `omit` | string[] | No | Tools to exclude | +| `allowedDomains` | string[] | No | Allowed domain patterns for network access | ### Interactive Skill diff --git a/docs/understanding-perstack/runtime.md b/docs/understanding-perstack/runtime.md index 39c29a66..28e42a84 100644 --- a/docs/understanding-perstack/runtime.md +++ b/docs/understanding-perstack/runtime.md @@ -237,18 +237,18 @@ The runtime supports real-time streaming of LLM output through fire-and-forget e ### Event sequence -| Phase | Events | Description | -| --------- | ------------------------------------------------------------- | ------------------------ | -| Reasoning | `startReasoning` → `streamReasoning...` → `completeReasoning` | Extended thinking output | -| Result | `startRunResult` → `streamRunResult...` → `completeRun` | Final completion text | +| Phase | Events | Description | +| --------- | --------------------------------------------------------------------------------- | ------------------------ | +| Reasoning | `startStreamingReasoning` → `streamReasoning...` → `completeStreamingReasoning` | Extended thinking output | +| Result | `startStreamingRunResult` → `streamRunResult...` → `completeStreamingRunResult` | Final completion text | ### Streaming vs state machine events -| Event type | State transition? | Purpose | -| ----------- | ------------------ | ------------------------------------- | -| `start*` | No | Marks stream beginning (display hint) | -| `stream*` | No | Incremental delta (fire-and-forget) | -| `complete*` | `completeRun` only | Final result with full text | +| Event type | State transition? | Purpose | +| ----------- | ----------------- | ------------------------------------- | +| `start*` | No | Marks stream beginning (display hint) | +| `stream*` | No | Incremental delta (fire-and-forget) | +| `complete*` | Streaming: No; `completeRun` (ExpertStateEvent): Yes | Full text / final result | ### When streaming is used diff --git a/docs/understanding-perstack/sandbox-integration.md b/docs/understanding-perstack/sandbox-integration.md index 04b34007..62742abb 100644 --- a/docs/understanding-perstack/sandbox-integration.md +++ b/docs/understanding-perstack/sandbox-integration.md @@ -118,14 +118,24 @@ Platform-native controls apply automatically: resource limits, network isolation ```javascript // Cloudflare Workers +import { run } from "@perstack/runtime" + export default { async fetch(request, env) { - const { run } = await import("@perstack/runtime"); - const result = await run({ - expertKey: "@org/expert", - query: await request.text() - }); - return new Response(result.lastMessage.content); + const events = [] + await run( + { + setting: { + model: "claude-sonnet-4-5", + providerConfig: { providerName: "anthropic", apiKey: env.ANTHROPIC_API_KEY }, + expertKey: "@org/expert", + input: { text: await request.text() }, + experts: { /* expert definitions */ }, + }, + }, + { eventListener: (event) => events.push(event) } + ) + return Response.json(events) } } ``` diff --git a/docs/using-experts/error-handling.md b/docs/using-experts/error-handling.md index 40964a77..9753ae41 100644 --- a/docs/using-experts/error-handling.md +++ b/docs/using-experts/error-handling.md @@ -36,10 +36,10 @@ When a Delegated Expert fails, the Job continues — the error is returned to th ## Events for monitoring -Use `errorRun` events to monitor failures: +Use `stopRunByError` events to monitor failures: ```bash -npx perstack run my-expert "query" | jq 'select(.type == "errorRun")' +npx perstack run my-expert "query" | jq 'select(.type == "stopRunByError")' ``` For programmatic access: @@ -49,7 +49,7 @@ import { run } from "@perstack/runtime" await run(params, { eventListener: (event) => { - if (event.type === "errorRun") { + if (event.type === "stopRunByError") { // Log, alert, or handle the error } } diff --git a/e2e/README.md b/e2e/README.md index 45ce6503..9154099f 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -46,7 +46,8 @@ e2e/ │ ├── runtime-version.test.ts # Runtime version │ ├── skills.test.ts # Skill configuration │ ├── streaming.test.ts # Streaming events -│ └── validation.test.ts # CLI validation +│ ├── validation.test.ts # CLI validation +│ └── versioned-base.test.ts # Versioned base skill ├── lib/ # Test utilities │ ├── assertions.ts # Custom assertions │ ├── event-parser.ts # Runtime event parsing diff --git a/packages/react/README.md b/packages/react/README.md index e2edeff0..3815c0bc 100644 --- a/packages/react/README.md +++ b/packages/react/README.md @@ -52,6 +52,58 @@ function ExpertRunner() { } ``` +### useJobStream + +Stream events for a single job. Wraps `useRun` with automatic stream connection management. + +```tsx +import { useJobStream } from "@perstack/react" + +function JobViewer({ jobId }: { jobId: string }) { + const { activities, streaming, latestActivity, isConnected, error } = useJobStream({ + jobId, + connect: (jobId, signal) => fetchStream(`/api/jobs/${jobId}/events`, signal), + }) + + return ( +
+ {isConnected && Connected} + {error && Error: {error.message}} + +
+ ) +} +``` + +### useJobStreams + +Track multiple jobs simultaneously with lightweight summaries (latest activity only). + +```tsx +import { useJobStreams } from "@perstack/react" + +function JobList({ jobIds }: { jobIds: string[] }) { + const states = useJobStreams({ + jobs: jobIds.map((id) => ({ id, enabled: true })), + connect: (jobId, signal) => fetchStream(`/api/jobs/${jobId}/events`, signal), + }) + + return ( +
    + {jobIds.map((id) => { + const state = states.get(id) + return ( +
  • + {id}: {state?.isConnected ? "Connected" : "Disconnected"} + {state?.latestActivity && ` - ${state.latestActivity.type}`} +
  • + ) + })} +
+ ) +} +``` + ### Utility Functions For advanced use cases, you can use the utility functions directly: @@ -91,6 +143,34 @@ Returns an object with: **Note:** Activities are append-only and never cleared. This is required for compatibility with Ink's `` component. +### useJobStream(options) + +Streams events for a single job. Parameters: + +- `jobId`: Job ID to stream (or `null` to disable) +- `connect`: `StreamConnector` function `(jobId, signal) => AsyncIterable` +- `enabled`: Whether to connect (default: `true`) + +Returns: + +- `activities`: Array of `ActivityOrGroup` +- `streaming`: Current `StreamingState` +- `latestActivity`: Most recent activity (or `null`) +- `isConnected`: Whether the stream is active +- `error`: Connection error (or `null`) + +### useJobStreams(options) + +Tracks multiple jobs with lightweight summaries. Parameters: + +- `jobs`: Array of `{ id: string; enabled: boolean }` +- `connect`: `StreamConnector` function + +Returns a `Map` where each summary contains: + +- `latestActivity`: Most recent activity for the job +- `isConnected`: Whether the stream is active + ## Types ### StreamingState From 529454e3daf785f5d30e4f7e54cd33545735f492 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Mon, 9 Feb 2026 14:00:10 +0000 Subject: [PATCH 2/2] ci: skip changeset check for docs-only changes When a PR only modifies markdown files (*.md), the changeset check is skipped since documentation changes don't require package version bumps. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 77334708..3c71e02e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -244,19 +244,34 @@ jobs: with: fetch-depth: 0 + - name: Check if package source code changed + id: check_files + run: | + PACKAGE_CHANGES=$(git diff --name-only origin/main...HEAD -- apps/ packages/ | grep -v '\.md$' || true) + if [ -z "$PACKAGE_CHANGES" ]; then + echo "needs_changeset=false" >> "$GITHUB_OUTPUT" + echo "No package source changes — skipping changeset check" + else + echo "needs_changeset=true" >> "$GITHUB_OUTPUT" + fi + - name: Setup pnpm + if: steps.check_files.outputs.needs_changeset == 'true' uses: pnpm/action-setup@v4 with: version: ${{ env.PNPM_VERSION }} - name: Setup Node.js + if: steps.check_files.outputs.needs_changeset == 'true' uses: actions/setup-node@v4 with: node-version: ${{ env.NODE_VERSION }} cache: "pnpm" - name: Install dependencies + if: steps.check_files.outputs.needs_changeset == 'true' run: pnpm install --frozen-lockfile - name: Check changeset + if: steps.check_files.outputs.needs_changeset == 'true' run: pnpm changeset status --since=origin/main