From dd205d7a5d6d2e038fa7ee4256f891950b589991 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sat, 14 Mar 2026 11:01:18 +0000 Subject: [PATCH 1/3] fix: restrict delegation scope, isolate test workspace, protect perstack.toml Issues observed in production: 1. create-expert could directly delegate to review/test (bypassing write/verify) 2. Generated expert's scaffold deleted perstack.toml (rm -rf) 3. Test artifacts scattered across workspace root 4. verify feedback targeted produced artifacts instead of perstack.toml Fixes: - Coordinator delegates restricted to write + verify only - test: creates dedicated work directory per run, instructs expert to work inside it, explicitly prohibits perstack.toml deletion - verify: runs signals in test's work directory, fix feedback must target perstack.toml (the iterated deliverable), not produced artifacts - Bump to 1.0.24 Co-Authored-By: Claude Opus 4.6 (1M context) --- definitions/create-expert/perstack.toml | 32 ++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/definitions/create-expert/perstack.toml b/definitions/create-expert/perstack.toml index e510aaea..2ccc13c2 100644 --- a/definitions/create-expert/perstack.toml +++ b/definitions/create-expert/perstack.toml @@ -41,7 +41,7 @@ [experts."create-expert"] defaultModelTier = "high" -version = "1.0.23" +version = "1.0.24" description = "Creates and modifies Perstack expert definitions in perstack.toml" instruction = """ You create and modify Perstack expert definitions. perstack.toml is the single deliverable. @@ -73,9 +73,7 @@ If must signal has not passed after 3 iterations, report what passed, what faile """ delegates = [ "@create-expert/write", - "@create-expert/review", "@create-expert/verify", - "@create-expert/test", ] [experts."create-expert".skills."@perstack/base"] @@ -91,7 +89,7 @@ pick = ["readTextFile", "exec", "attemptCompletion"] [experts."@create-expert/write"] defaultModelTier = "high" -version = "1.0.23" +version = "1.0.24" description = """ Produces perstack.toml from the user's request. The file includes an embedded test spec in the header comments. Provide: (1) the user's request, (2) optionally path to existing perstack.toml, (3) optionally verification failure feedback. @@ -207,7 +205,7 @@ pick = [ [experts."@create-expert/review"] defaultModelTier = "low" -version = "1.0.23" +version = "1.0.24" description = """ Reviews perstack.toml for instruction quality and signal design. Provide: (1) path to perstack.toml, (2) the user's original request. @@ -255,7 +253,7 @@ pick = ["readTextFile", "todo", "attemptCompletion"] [experts."@create-expert/verify"] defaultModelTier = "low" -version = "1.0.23" +version = "1.0.24" description = """ Runs the test query via @create-expert/test, then executes hard signal checks from the perstack.toml header. Provide: (1) path to perstack.toml, (2) the coordinator expert name to test. @@ -266,11 +264,11 @@ You run the test and verify the results. Two phases: ## Phase 1: Run Test -Read the test spec from the perstack.toml header comments to extract the test query. Delegate to @create-expert/test with: perstack.toml path, the test query, and the coordinator expert name. +Read the test spec from the perstack.toml header comments to extract the test query. Delegate to @create-expert/test with: perstack.toml path, the test query, and the coordinator expert name. Note the work directory path returned by test. ## Phase 2: Execute Hard Signals -After test completes, execute the verification signals from the perstack.toml header. +After test completes, execute the verification signals from the perstack.toml header. Run signal commands in the work directory reported by test. You do NOT read produced artifacts. You do NOT review content, quality, or style. Your only inputs are command outputs and their expected results. @@ -291,11 +289,11 @@ Re-run the must signal. Compare with first result. ## Verdicts - **PASS** — must signal passes and reproduces. Should signal results reported with counts vs threshold. -- **CONTINUE** — must signal failed or did not reproduce. Include: command, expected, actual, fix needed. +- **CONTINUE** — must signal failed or did not reproduce. Include: command, expected, actual, and a fix recommendation **for perstack.toml** (not for the produced artifacts). The deliverable being iterated is the expert definition, not the test output. Should signal failures beyond threshold are reported as known limitations but do NOT cause CONTINUE — only the must signal blocks. -attemptCompletion with: verdict, must signal result, should signal results, reproducibility result, and (if CONTINUE) fix feedback. +attemptCompletion with: verdict, must signal result, should signal results, reproducibility result, and (if CONTINUE) fix feedback targeting perstack.toml. """ delegates = ["@create-expert/test"] @@ -312,7 +310,7 @@ pick = ["readTextFile", "exec", "todo", "attemptCompletion"] [experts."@create-expert/test"] defaultModelTier = "low" -version = "1.0.23" +version = "1.0.24" description = """ Executes a test query against a Perstack expert and reports what happened. Provide: (1) path to perstack.toml, (2) the test query, (3) the coordinator expert name. @@ -323,15 +321,17 @@ Run a test query against an expert and report exactly what happened. Do NOT eval You can ONLY delegate to coordinators (plain names like "game-dev"), NOT to delegates (names starting with @). -1. Read perstack.toml to understand the expert structure -2. Use addDelegateFromConfig to add the coordinator as a delegate -3. Call the coordinator with the test query -4. removeDelegate to unload the expert +1. Create a dedicated work directory for this test run (e.g., test-run-1). Tell the expert to work exclusively inside this directory. +2. Read perstack.toml to understand the expert structure +3. Use addDelegateFromConfig to add the coordinator as a delegate +4. Call the coordinator with the test query, instructing it to use the work directory +5. removeDelegate to unload the expert -Do NOT delete or modify perstack.toml. Report facts only. +NEVER delete or modify perstack.toml — it is the deliverable being tested, not a workspace file. Instruct the created expert that perstack.toml must not be touched. attemptCompletion with: - **Query**: the test query executed +- **Work directory**: the path where the expert produced its output - **Produced**: files created/modified, outputs returned, actions taken - **Errors**: any failures (if none, state "none") """ From c8c77ac61630056f4aed50f58d668751449a9e75 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sat, 14 Mar 2026 11:02:47 +0000 Subject: [PATCH 2/3] fix: isolate test by directory separation instead of explicit instruction Telling the expert not to touch perstack.toml alters the test query. Instead, run the expert in a subdirectory so perstack.toml in the parent is naturally isolated from file operations. Co-Authored-By: Claude Opus 4.6 (1M context) --- definitions/create-expert/perstack.toml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/definitions/create-expert/perstack.toml b/definitions/create-expert/perstack.toml index 2ccc13c2..15f00788 100644 --- a/definitions/create-expert/perstack.toml +++ b/definitions/create-expert/perstack.toml @@ -321,13 +321,14 @@ Run a test query against an expert and report exactly what happened. Do NOT eval You can ONLY delegate to coordinators (plain names like "game-dev"), NOT to delegates (names starting with @). -1. Create a dedicated work directory for this test run (e.g., test-run-1). Tell the expert to work exclusively inside this directory. -2. Read perstack.toml to understand the expert structure -3. Use addDelegateFromConfig to add the coordinator as a delegate -4. Call the coordinator with the test query, instructing it to use the work directory -5. removeDelegate to unload the expert - -NEVER delete or modify perstack.toml — it is the deliverable being tested, not a workspace file. Instruct the created expert that perstack.toml must not be touched. +1. Create a dedicated work directory for this test run (e.g., test-run-1) +2. cd into the work directory before proceeding +3. Read perstack.toml (in the parent directory) to understand the expert structure +4. Use addDelegateFromConfig to add the coordinator as a delegate +5. Call the coordinator with the test query +6. removeDelegate to unload the expert + +NEVER delete or modify perstack.toml. By running the expert in a separate work directory, perstack.toml in the parent directory is naturally isolated from the expert's file operations. attemptCompletion with: - **Query**: the test query executed From 44336b682e0e933d9aea3eb737c0eec131c5470f Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sat, 14 Mar 2026 11:03:53 +0000 Subject: [PATCH 3/3] fix: remove unnecessary cd step from test instruction Co-Authored-By: Claude Opus 4.6 (1M context) --- definitions/create-expert/perstack.toml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/definitions/create-expert/perstack.toml b/definitions/create-expert/perstack.toml index 15f00788..d26725aa 100644 --- a/definitions/create-expert/perstack.toml +++ b/definitions/create-expert/perstack.toml @@ -322,11 +322,10 @@ Run a test query against an expert and report exactly what happened. Do NOT eval You can ONLY delegate to coordinators (plain names like "game-dev"), NOT to delegates (names starting with @). 1. Create a dedicated work directory for this test run (e.g., test-run-1) -2. cd into the work directory before proceeding -3. Read perstack.toml (in the parent directory) to understand the expert structure -4. Use addDelegateFromConfig to add the coordinator as a delegate -5. Call the coordinator with the test query -6. removeDelegate to unload the expert +2. Read perstack.toml to understand the expert structure +3. Use addDelegateFromConfig to add the coordinator as a delegate +4. Call the coordinator with the test query +5. removeDelegate to unload the expert NEVER delete or modify perstack.toml. By running the expert in a separate work directory, perstack.toml in the parent directory is naturally isolated from the expert's file operations.