diff --git a/.changeset/rich-numbers-try.md b/.changeset/rich-numbers-try.md new file mode 100644 index 00000000..a845151c --- /dev/null +++ b/.changeset/rich-numbers-try.md @@ -0,0 +1,2 @@ +--- +--- diff --git a/e2e/create-expert/create-expert.test.ts b/e2e/create-expert/create-expert.test.ts index 3a6f0e23..47c499c4 100644 --- a/e2e/create-expert/create-expert.test.ts +++ b/e2e/create-expert/create-expert.test.ts @@ -21,8 +21,10 @@ import { type CommandResult, type RunResult, withEventParsing } from "../lib/run const PROJECT_ROOT = path.resolve(process.cwd()) const CLI_PATH = path.join(PROJECT_ROOT, "apps/create-expert/dist/bin/cli.js") -// LLM API calls require extended timeout; delegation adds extra LLM round-trips -const LLM_TIMEOUT = 180_000 +// LLM API calls require extended timeout; delegation adds extra LLM round-trips. +// The create-expert workflow involves multiple delegation round-trips which can +// take over 2 minutes when the LLM needs to iterate on expert definitions. +const LLM_TIMEOUT = 300_000 function runCreateExpert(query: string, cwd: string, timeout = LLM_TIMEOUT): Promise { const args = injectProviderArgs(["--headless", query]) diff --git a/packages/runtime/src/orchestration/delegation-executor.test.ts b/packages/runtime/src/orchestration/delegation-executor.test.ts index 5672ebdd..3b1fbe4f 100644 --- a/packages/runtime/src/orchestration/delegation-executor.test.ts +++ b/packages/runtime/src/orchestration/delegation-executor.test.ts @@ -305,7 +305,43 @@ describe("@perstack/runtime: delegation-executor", () => { expect(result.nextCheckpoint.usage.reasoningTokens).toBe(10) // 0 + 0 + 10 }) - it("throws error if delegation result has no expertMessage", async () => { + it("returns error result when child run returns stoppedByError", async () => { + const executor = new DelegationExecutor() + const setting = createMockSetting() + const delegations = [createMockDelegation({ toolCallId: "tc-1" })] + const context = createMockContext() + const parentExpert = { key: "parent", name: "Parent", version: "1.0" } + + const runFn = vi.fn().mockResolvedValue({ + ...createMockCheckpoint(), + status: "stoppedByError", + messages: [], + }) + + const result = await executor.execute(delegations, setting, context, parentExpert, runFn) + + expect(result.nextSetting.input.interactiveToolCallResult?.text).toContain( + "ended with status: stoppedByError", + ) + }) + + it("returns error result when child run throws an exception", async () => { + const executor = new DelegationExecutor() + const setting = createMockSetting() + const delegations = [createMockDelegation({ toolCallId: "tc-1" })] + const context = createMockContext() + const parentExpert = { key: "parent", name: "Parent", version: "1.0" } + + const runFn = vi.fn().mockRejectedValue(new Error("MCP connection failed")) + + const result = await executor.execute(delegations, setting, context, parentExpert, runFn) + + expect(result.nextSetting.input.interactiveToolCallResult?.text).toContain( + "failed: MCP connection failed", + ) + }) + + it("throws error if completed delegation result has no expertMessage", async () => { const executor = new DelegationExecutor() const setting = createMockSetting() const delegations = [ @@ -317,6 +353,7 @@ describe("@perstack/runtime: delegation-executor", () => { const runFn = vi.fn().mockResolvedValue({ ...createMockCheckpoint(), + status: "completed", messages: [{ id: "msg-1", type: "userMessage", contents: [] }], }) @@ -338,6 +375,7 @@ describe("@perstack/runtime: delegation-executor", () => { const runFn = vi.fn().mockResolvedValue({ ...createMockCheckpoint(), + status: "completed", messages: [ { id: "msg-1", type: "expertMessage", contents: [{ type: "imagePart", id: "img-1" }] }, ], diff --git a/packages/runtime/src/orchestration/delegation-executor.ts b/packages/runtime/src/orchestration/delegation-executor.ts index 1f8eb3c6..1db42722 100644 --- a/packages/runtime/src/orchestration/delegation-executor.ts +++ b/packages/runtime/src/orchestration/delegation-executor.ts @@ -223,10 +223,35 @@ export class DelegationExecutor { // Merge parent options with returnOnDelegationComplete to ensure child runs // inherit callbacks for checkpoint persistence and event emission - const resultCheckpoint = await runFn( - { setting: delegateSetting, checkpoint: delegateCheckpoint }, - { ...parentOptions, returnOnDelegationComplete: true }, - ) + let resultCheckpoint: Checkpoint + try { + resultCheckpoint = await runFn( + { setting: delegateSetting, checkpoint: delegateCheckpoint }, + { ...parentOptions, returnOnDelegationComplete: true }, + ) + } catch (error) { + // Child run crashed (e.g., MCP connection failure) - return error to parent + return { + toolCallId, + toolName, + expertKey: expert.key, + text: `Delegation to ${expert.key} failed: ${error instanceof Error ? error.message : String(error)}`, + stepNumber: parentContext.stepNumber, + deltaUsage: createEmptyUsage(), + } + } + + // Handle non-completed delegation (stoppedByError, stoppedByExceededMaxSteps, etc.) + if (resultCheckpoint.status !== "completed") { + return { + toolCallId, + toolName, + expertKey: expert.key, + text: `Delegation to ${expert.key} ended with status: ${resultCheckpoint.status}`, + stepNumber: resultCheckpoint.stepNumber, + deltaUsage: resultCheckpoint.usage, + } + } return this.extractDelegationResult(resultCheckpoint, toolCallId, toolName, expert.key) } diff --git a/packages/skill-manager/src/skill-manager.ts b/packages/skill-manager/src/skill-manager.ts index d215a0a4..1353d271 100644 --- a/packages/skill-manager/src/skill-manager.ts +++ b/packages/skill-manager/src/skill-manager.ts @@ -255,13 +255,25 @@ export class SkillManager { }, removeDelegate: (name) => sm.removeDelegate(name), createExpert: async (input) => { + // Ensure @perstack/base is always included in skills + const skills = input.skills + ? { + "@perstack/base": input.skills["@perstack/base"] ?? { + type: "mcpStdioSkill" as const, + command: "npx", + packageName: "@perstack/base", + pick: ["attemptCompletion"], + }, + ...input.skills, + } + : undefined const expert = expertSchema.parse({ key: input.key, name: input.key, version: input.version ?? "1.0.0", description: input.description, instruction: input.instruction, - skills: input.skills, + skills, delegates: input.delegates, tags: input.tags, providerTools: input.providerTools,