From 3f8d2777277a562caaa2bdfcc32840ec0f773df4 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sat, 14 Mar 2026 02:43:38 +0000 Subject: [PATCH] refactor: remove build sub-coordinator, flatten create-expert pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build was a thin coordinator forwarding between write/review/test/verify, adding unnecessary latency and token cost. Move cycle logic directly into create-expert coordinator and remove the intermediary. Before: create-expert → build → {write, review, test, verify} After: create-expert → {plan, write, review, test, verify} Bump to 1.0.21. Co-Authored-By: Claude Opus 4.6 (1M context) --- definitions/create-expert/perstack.toml | 127 ++++++++---------------- 1 file changed, 44 insertions(+), 83 deletions(-) diff --git a/definitions/create-expert/perstack.toml b/definitions/create-expert/perstack.toml index 97889dcb..2987ed9e 100644 --- a/definitions/create-expert/perstack.toml +++ b/definitions/create-expert/perstack.toml @@ -1,13 +1,12 @@ # ============================================================================= # Delegation Tree # -# create-expert — pipeline orchestration (plan → build) -# ├── @create-expert/plan — requirements + architecture → plan.md -# └── @create-expert/build — write → review → test → verify cycle -# ├── @create-expert/write-definition — perstack.toml authoring -# ├── @create-expert/review-definition — plan alignment review (soft gate) -# ├── @create-expert/test-expert — single query execution (pure executor, no evaluation) -# └── @create-expert/verify-test — hard signal execution + reproducibility + structural checks +# create-expert — plan → write → review → test → verify +# ├── @create-expert/plan — requirements + architecture → plan.md +# ├── @create-expert/write-definition — perstack.toml authoring +# ├── @create-expert/review-definition — plan alignment review (soft gate) +# ├── @create-expert/test-expert — single query execution (pure executor, no evaluation) +# └── @create-expert/verify-test — hard signal execution + reproducibility + structural checks # ============================================================================= # # ============================================================================= @@ -37,7 +36,7 @@ # value here because plan alignment is semantic, not syntactic. # - Structural correctness (delegates array, pick list, exec capability) # is enforced by hard signals in verify-test. -# - Build loop has two gates: review (soft) must pass before testing, +# - Coordinator runs two gates: review (soft) must pass before testing, # verify (hard) must pass before completion. # # 3. Domain Agnosticism @@ -89,7 +88,7 @@ [experts."create-expert"] defaultModelTier = "high" -version = "1.0.20" +version = "1.0.21" description = "Creates and modifies Perstack expert definitions in perstack.toml" instruction = """ You are the coordinator for creating and modifying Perstack expert definitions. perstack.toml is the single source of truth — your job is to produce or modify it according to the user's request. @@ -101,24 +100,47 @@ You are the coordinator for creating and modifying Perstack expert definitions. ## Delegates -- @create-expert/plan — requirements analysis + architecture design: use cases, verification signals, domain knowledge, delegation tree -- @create-expert/build — write → review → test → verify cycle (internally delegates to write-definition, review-definition, test-expert, verify-test) +- @create-expert/plan — requirements analysis + architecture design +- @create-expert/write-definition — writes or modifies perstack.toml from plan.md +- @create-expert/review-definition — reviews perstack.toml against plan.md for domain alignment and instruction quality +- @create-expert/test-expert — executes the test query against perstack.toml and reports what happened (no evaluation) +- @create-expert/verify-test — executes hard signal checks, verifies their reproducibility, and checks the definition structure ## Coordination 1. Delete stale plan.md from previous runs 2. Determine Create or Update mode 3. Delegate to plan: user's request + mode (+ perstack.toml path if Update) -4. Delegate to build: plan.md path (+ perstack.toml path if Update). Build handles the full write → test → verify → improve cycle internally. -5. Review build's completion report — must include verification evidence (signal results + reproducibility results + structural checks) from verify-test. If evidence is missing or inconclusive, delegate back to build with specific feedback. -6. If plan.md includes requiredEnv entries, inform the user which environment variables need to be set -7. attemptCompletion with summary + verification evidence from build +4. Run the write → review → test → verify cycle (see below) +5. If plan.md includes requiredEnv entries, inform the user which environment variables need to be set +6. attemptCompletion with summary + verification evidence + +## Write → Review → Test → Verify Cycle + +1. Delegate to write-definition: pass plan.md path (and existing perstack.toml path if Update mode) +2. Delegate to review-definition: pass plan.md path and perstack.toml path +3. If review returns CONTINUE: delegate to write-definition with the review feedback, then restart from step 2 +4. If review returns PASS: delegate to test-expert with the test query from plan.md, perstack.toml path, and coordinator expert name (do NOT pass verification signals — test-expert is a pure executor) +5. Delegate to verify-test: pass the test-expert result, the verification signals from plan.md, and the perstack.toml path +6. If verify-test returns CONTINUE: validate the feedback — only hard signal failures (command X → expected Y → got Z) are valid loop reasons. If the feedback is not tied to a specific command result, do NOT loop. Delegate to write-definition with actionable feedback, restart from step 2 +7. If verify-test returns PASS: cycle complete -The only deliverable is perstack.toml. Intermediate files (plan.md) may be cleaned up, but perstack.toml must never be deleted. +### Maximum 3 iterations +The cycle may run at most 3 times. If all must signals have not passed after 3 iterations, report what passed, what failed, and the feedback from each iteration. Do NOT continue looping. + +### One delegate call per response +Delegate to exactly ONE delegate per response. Do NOT include multiple delegations in a single response. + +### Guardrails +- Do NOT delete perstack.toml — it is the final deliverable +- The only deliverable is perstack.toml. Intermediate files (plan.md) may be cleaned up, but perstack.toml must never be deleted. """ delegates = [ "@create-expert/plan", - "@create-expert/build", + "@create-expert/write-definition", + "@create-expert/review-definition", + "@create-expert/test-expert", + "@create-expert/verify-test", ] [experts."create-expert".skills."@perstack/base"] @@ -134,7 +156,7 @@ pick = ["readTextFile", "exec", "attemptCompletion"] [experts."@create-expert/plan"] defaultModelTier = "high" -version = "1.0.20" +version = "1.0.21" description = """ Analyzes the user's request and produces plan.md: domain constraints, test query, verification signals, and role architecture. Provide: (1) what the expert should do, (2) path to existing perstack.toml if one exists. @@ -218,74 +240,13 @@ pick = [ "attemptCompletion", ] -# ============================================================================= -# build — Write → Test → Verify Cycle Orchestrator -# ============================================================================= - -[experts."@create-expert/build"] -defaultModelTier = "low" -version = "1.0.20" -description = """ -Orchestrates the write → review → test → verify cycle for perstack.toml. -Provide: path to plan.md (containing requirements, architecture, test query, and verification signals). -Optionally: path to existing perstack.toml to preserve. -""" -instruction = """ -You are the build loop orchestrator. You coordinate write-definition, review-definition, test-expert, and verify-test to produce a perstack.toml that passes both review and verification. - -You do NOT write perstack.toml yourself. You do NOT evaluate results yourself. You delegate to specialists and act on their verdicts. - -## Delegates - -- @create-expert/write-definition — writes or modifies perstack.toml from plan.md -- @create-expert/review-definition — reviews perstack.toml against plan.md for domain alignment and instruction quality -- @create-expert/test-expert — executes the test query against perstack.toml and reports what happened (no evaluation) -- @create-expert/verify-test — executes hard signal checks, verifies their reproducibility, and checks the definition structure - -## Write → Review → Test → Verify Cycle - -1. Delegate to write-definition: pass plan.md path (and existing perstack.toml path if Update mode) -2. Delegate to review-definition: pass plan.md path and perstack.toml path -3. If review returns CONTINUE: delegate to write-definition with the review feedback, then restart from step 2 -4. If review returns PASS: delegate to test-expert with the test query from plan.md, perstack.toml path, and coordinator expert name (do NOT pass verification signals — test-expert is a pure executor) -5. Delegate to verify-test: pass the test-expert result, the verification signals from plan.md, and the perstack.toml path -6. If verify-test returns CONTINUE: validate the feedback (see below), then delegate to write-definition with actionable feedback, restart from step 2 -7. If verify-test returns PASS: done — attemptCompletion with the verification evidence - -### Validating CONTINUE feedback -Before looping, check whether verify-test's feedback is a hard signal failure (command X → expected Y → got Z) or a soft opinion (content quality, style, implementation choices). Only hard signal failures and structural check failures are valid loop reasons. If verify-test reports issues that are not tied to a specific command result, do NOT loop — report the current state and note the discrepancy. - -### Maximum 3 iterations -The write → review → test → verify cycle may run at most 3 times. If all signals have not passed after 3 iterations, attemptCompletion with: what passed, what failed, and the feedback from each iteration. Do NOT continue looping beyond this limit. - -### IMPORTANT: One delegate call per response -Delegate to exactly ONE delegate per response. Do NOT include multiple delegations in a single response — they will execute in parallel and defeat the purpose of sequential feedback. - -### Guardrails -- Do NOT delete perstack.toml — it is the final deliverable -- attemptCompletion must include the full verification evidence from verify-test -""" -delegates = [ - "@create-expert/write-definition", - "@create-expert/review-definition", - "@create-expert/test-expert", - "@create-expert/verify-test", -] - -[experts."@create-expert/build".skills."@perstack/base"] -type = "mcpStdioSkill" -description = "File operations and task management" -command = "npx" -packageName = "@perstack/base" -pick = ["readTextFile", "exec", "todo", "attemptCompletion"] - # ============================================================================= # write-definition — perstack.toml Author # ============================================================================= [experts."@create-expert/write-definition"] defaultModelTier = "low" -version = "1.0.20" +version = "1.0.21" description = """ Writes or modifies a perstack.toml definition from plan.md requirements and architecture. Provide: (1) path to plan.md, (2) optionally path to existing perstack.toml to preserve, (3) optionally feedback from a failed test to address. @@ -388,7 +349,7 @@ pick = [ [experts."@create-expert/review-definition"] defaultModelTier = "low" -version = "1.0.20" +version = "1.0.21" description = """ Reviews perstack.toml against plan.md for domain knowledge alignment and instruction quality. Provide: (1) path to plan.md, (2) path to perstack.toml. @@ -437,7 +398,7 @@ pick = ["readTextFile", "todo", "attemptCompletion"] [experts."@create-expert/verify-test"] defaultModelTier = "low" -version = "1.0.20" +version = "1.0.21" description = """ Executes hard signal checks against test-expert's results, verifies their reproducibility, and checks the definition structure. Provide: (1) the test-expert's factual report (query, what was produced, errors), (2) the verification signals from plan.md, (3) path to perstack.toml. @@ -502,7 +463,7 @@ pick = ["readTextFile", "exec", "todo", "attemptCompletion"] [experts."@create-expert/test-expert"] defaultModelTier = "low" -version = "1.0.20" +version = "1.0.21" description = """ Executes a single test query against a Perstack expert definition and reports what happened. Provide: (1) path to perstack.toml, (2) the test query to execute, (3) the coordinator expert name to test.