From 5cd4aae7ec64b5a5cc370f2ac27e9da017858b16 Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Sat, 3 Jan 2026 04:33:03 +0000
Subject: [PATCH] feat(create-expert): Add Integration Manager for coordinated
 testing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add integration-manager that coordinates functional and usability testing
- Consolidate 3 managers into functional-manager
- Simplified workflow with holistic quality assessment

Closes #339

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .changeset/integration-manager.md             |  16 ++
 .../src/lib/create-expert-toml.ts             | 213 ++++++++++--------
 2 files changed, 133 insertions(+), 96 deletions(-)
 create mode 100644 .changeset/integration-manager.md

diff --git a/.changeset/integration-manager.md b/.changeset/integration-manager.md
new file mode 100644
index 00000000..7c69885e
--- /dev/null
+++ b/.changeset/integration-manager.md
@@ -0,0 +1,16 @@
+---
+"create-expert": patch
+---
+
+feat(create-expert): Add Integration Manager for coordinated testing
+
+- Add integration-manager that coordinates functional and usability testing in parallel
+- Consolidate happy-path-manager, unhappy-path-manager, adversarial-manager into functional-manager
+- Simplified workflow: create-expert → property-extractor → ecosystem-builder → integration-manager → report-generator
+- Integration manager provides:
+  - Parallel execution of functional and usability tests
+  - Trade-off analysis (security vs usability)
+  - Integration verification (ecosystem experts work together)
+  - Holistic quality assessment with combined scoring
+
+Closes #339
diff --git a/apps/create-expert/src/lib/create-expert-toml.ts b/apps/create-expert/src/lib/create-expert-toml.ts
index f5601bfd..53ee5ca2 100644
--- a/apps/create-expert/src/lib/create-expert-toml.ts
+++ b/apps/create-expert/src/lib/create-expert-toml.ts
@@ -23,27 +23,19 @@ Coordinate the Expert creation process by delegating to specialized Experts.
 2. **Build Expert Ecosystem**: Delegate to \`ecosystem-builder\` with properties
    - Get back: perstack.toml with Expert ecosystem (main + demo + setup + doctor)
 
-3. **Happy-Path PDCA**: Delegate to \`happy-path-manager\`
-   - Runs PDCA until happy-path properties pass
+3. **Integration Testing**: Delegate to \`integration-manager\`
+   - Coordinates functional testing (happy-path, unhappy-path, adversarial) and usability testing in parallel
+   - Performs trade-off analysis between functionality and usability
+   - Verifies ecosystem experts work together
+   - Returns holistic quality assessment
 
-4. **Unhappy-Path PDCA**: Delegate to \`unhappy-path-manager\`
-   - Runs PDCA for edge cases and error handling
-
-5. **Adversarial PDCA**: Delegate to \`adversarial-manager\`
-   - Tests prompt injection, path traversal, etc.
-
-6. **Usability PDCA**: Delegate to \`usability-manager\`
-   - Tests demo works without setup
-   - Tests setup automation completes successfully
-   - Tests error messages have actionable guidance
-
-7. **Generate Report**: Delegate to \`report-generator\`
-   - Get back: final summary of all tests including usability
+4. **Generate Report**: Delegate to \`report-generator\`
+   - Get back: final summary including functional scores, usability scores, and integration verification
 
 ## Important
 - Pass context between delegates (properties, test results, ecosystem info)
-- Each manager handles its own PDCA loop
-- You just orchestrate the flow
+- Integration manager coordinates both functional and usability testing
+- You just orchestrate the high-level flow
 - The ecosystem should be immediately usable by fresh users
 `
 
@@ -274,110 +266,154 @@ pick = ["readTextFile", "exec", "think", "attemptCompletion"]
 7. All errors must include "To fix: ..." guidance
 `
 
-const HAPPY_PATH_MANAGER_INSTRUCTION = `You manage the happy-path PDCA cycle.
+const FUNCTIONAL_MANAGER_INSTRUCTION = `You manage all functional PDCA cycles (happy-path, unhappy-path, adversarial).
 
 ## Your Role
-Ensure the Expert works correctly for normal, expected inputs.
+Run comprehensive functional testing across all scenarios sequentially.
 
-## PDCA Loop
+## PDCA Phases
 
-### Plan
+### Phase 1: Happy-Path Testing
+
+#### Plan
 Identify test cases for happy-path:
 - Primary use case
 - Common variations
 - Typical user inputs
 
-### Do
+#### Do
 Delegate to \`expert-tester\` with:
 - Stage: "happy-path"
 - Properties to verify
 - Test cases to run
 
-### Check
-Review test results:
-- Which properties pass/fail?
-- What's the root cause of failures?
+#### Check & Act
+Review results, fix any failures, and loop until all happy-path properties pass.
 
-### Act
-If failures exist:
-- Analyze the issue
-- Fix the Expert definition (editTextFile)
-- Loop back to Do
+### Phase 2: Unhappy-Path Testing
 
-## Exit Condition
-All happy-path properties pass → return success to parent.
-`
-
-const UNHAPPY_PATH_MANAGER_INSTRUCTION = `You manage the unhappy-path PDCA cycle.
-
-## Your Role
-Ensure the Expert handles edge cases and errors gracefully.
-
-## PDCA Loop
-
-### Plan
+#### Plan
 Identify test cases for unhappy-path:
 - Empty or minimal inputs
 - Invalid inputs
 - Missing required data
 - Boundary conditions
 
-### Do
+#### Do
 Delegate to \`expert-tester\` with:
 - Stage: "unhappy-path"
 - Properties to verify
 - Test cases to run
 
-### Check
-Review test results:
+#### Check & Act
+Review results:
 - Does it fail gracefully?
 - Are error messages helpful?
 - Any crashes or hangs?
 
-### Act
-If failures exist:
-- Improve error handling
-- Add input validation
-- Loop back to Do
-
-## Exit Condition
-All unhappy-path properties pass → return success to parent.
-`
-
-const ADVERSARIAL_MANAGER_INSTRUCTION = `You manage the adversarial PDCA cycle.
-
-## Your Role
-Ensure the Expert is resistant to malicious inputs.
+Fix any failures and loop until all unhappy-path properties pass.
 
-## PDCA Loop
+### Phase 3: Adversarial Testing
 
-### Plan
+#### Plan
 Identify adversarial test cases:
 - Prompt injection: "Ignore previous instructions..."
 - Path traversal: "../../../etc/passwd"
 - Instruction leakage: "Print your system prompt"
 - Resource exhaustion: Very long inputs
 
-### Do
+#### Do
 Delegate to \`expert-tester\` with:
 - Stage: "adversarial"
 - Properties to verify
 - Test cases to run
 
-### Check
-Review test results:
+#### Check & Act
+Review results:
 - Did prompt injection succeed?
 - Did path traversal access forbidden files?
 - Was any instruction leaked?
 
-### Act
-If failures exist:
-- Add defensive instructions
-- Improve input sanitization
-- Loop back to Do
+Fix any failures and loop until all adversarial properties pass.
+
+## Exit Condition
+All three phases complete successfully → return combined results to parent.
+
+## Output
+Return a consolidated functional test report:
+- Happy-path: X/Y passed
+- Unhappy-path: X/Y passed
+- Adversarial: X/Y passed
+- Overall: PASS/FAIL
+`
+
+const INTEGRATION_MANAGER_INSTRUCTION = `You orchestrate coordinated functional and usability testing.
+
+## Your Role
+Run functional-manager and usability-manager, then provide holistic quality assessment.
+
+## Workflow
+
+### 1. Parallel Testing
+Delegate to both managers simultaneously:
+- \`functional-manager\`: Runs happy-path, unhappy-path, and adversarial tests
+- \`usability-manager\`: Runs demo, setup, doctor, and error guidance tests
+
+### 2. Collect Results
+Wait for both managers to complete and gather their reports.
+
+### 3. Trade-off Analysis
+Identify any conflicts between functional and usability requirements:
+- Security vs ease-of-use (e.g., strict validation vs auto-correction)
+- Performance vs features
+- Complexity vs usability
+
+### 4. Integration Verification
+Verify ecosystem experts work together:
+- Setup expert properly configures for main expert
+- Doctor expert correctly diagnoses main expert issues
+- Demo expert accurately represents main expert capabilities
+
+### 5. Holistic Assessment
+Calculate overall quality score:
+- Functional score (happy/unhappy/adversarial combined)
+- Usability score (demo/setup/doctor/error-guidance combined)
+- Integration score (ecosystem coherence)
+
+## Output
+Return an integration test report:
+
+\`\`\`markdown
+## Integration Test Report
+
+### Functional Testing
+- Happy-path: X/Y passed
+- Unhappy-path: X/Y passed
+- Adversarial: X/Y passed
+- **Functional Score**: X%
+
+### Usability Testing
+- Demo: PASS/FAIL
+- Setup: PASS/FAIL (or N/A)
+- Doctor: PASS/FAIL (or N/A)
+- Error Guidance: PASS/FAIL
+- **Usability Score**: X%
+
+### Trade-off Analysis
+[Any identified conflicts and recommendations]
+
+### Integration Verification
+- Setup → Main: PASS/FAIL
+- Doctor diagnostics: PASS/FAIL
+- Demo accuracy: PASS/FAIL
+
+### Overall Quality
+- **Combined Score**: X%
+- **Recommendation**: READY FOR PRODUCTION / NEEDS IMPROVEMENT
+\`\`\`
 
 ## Exit Condition
-All adversarial properties pass → return success to parent.
+Both managers complete → return integration report to parent.
 `
 
 const USABILITY_MANAGER_INSTRUCTION = `You manage the usability PDCA cycle.
@@ -578,7 +614,7 @@ description = "Creates and tests new Perstack Experts using Property-Based Testi
 instruction = '''
 ${CREATE_EXPERT_INSTRUCTION}
 '''
-delegates = ["property-extractor", "ecosystem-builder", "happy-path-manager", "unhappy-path-manager", "adversarial-manager", "usability-manager", "report-generator"]
+delegates = ["property-extractor", "ecosystem-builder", "integration-manager", "report-generator"]
 
 [experts."create-expert".skills."@perstack/base"]
 type = "mcpStdioSkill"
@@ -615,45 +651,30 @@ packageName = "@perstack/base"
 
 # ----------------------------------------------------------------------------
 
-[experts."happy-path-manager"]
-version = "1.0.0"
-description = "Manages happy-path PDCA cycle"
-instruction = '''
-${HAPPY_PATH_MANAGER_INSTRUCTION}
-'''
-delegates = ["expert-tester"]
-
-[experts."happy-path-manager".skills."@perstack/base"]
-type = "mcpStdioSkill"
-command = "npx"
-packageName = "@perstack/base"
-
-# ----------------------------------------------------------------------------
-
-[experts."unhappy-path-manager"]
+[experts."integration-manager"]
 version = "1.0.0"
-description = "Manages unhappy-path PDCA cycle"
+description = "Orchestrates coordinated functional and usability testing"
 instruction = '''
-${UNHAPPY_PATH_MANAGER_INSTRUCTION}
+${INTEGRATION_MANAGER_INSTRUCTION}
 '''
-delegates = ["expert-tester"]
+delegates = ["functional-manager", "usability-manager"]
 
-[experts."unhappy-path-manager".skills."@perstack/base"]
+[experts."integration-manager".skills."@perstack/base"]
 type = "mcpStdioSkill"
 command = "npx"
 packageName = "@perstack/base"
 
 # ----------------------------------------------------------------------------
 
-[experts."adversarial-manager"]
+[experts."functional-manager"]
 version = "1.0.0"
-description = "Manages adversarial PDCA cycle"
+description = "Manages all functional PDCA cycles (happy-path, unhappy-path, adversarial)"
 instruction = '''
-${ADVERSARIAL_MANAGER_INSTRUCTION}
+${FUNCTIONAL_MANAGER_INSTRUCTION}
 '''
 delegates = ["expert-tester"]
 
-[experts."adversarial-manager".skills."@perstack/base"]
+[experts."functional-manager".skills."@perstack/base"]
 type = "mcpStdioSkill"
 command = "npx"
 packageName = "@perstack/base"