From ebf6fba410ec466c7281c82ab8f7992d68fc2bf7 Mon Sep 17 00:00:00 2001 From: Vlad Temian Date: Thu, 1 Jan 2026 20:45:24 +0200 Subject: [PATCH 01/14] fix(background-task): remove unused lastMessage field from progress type --- src/tools/background-task/manager.ts | 8 -------- src/tools/background-task/types.ts | 1 - tests/tools/background-task-types.test.ts | 15 +++++++++++++++ 3 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 tests/tools/background-task-types.test.ts diff --git a/src/tools/background-task/manager.ts b/src/tools/background-task/manager.ts index 1a05c41..4ebb987 100644 --- a/src/tools/background-task/manager.ts +++ b/src/tools/background-task/manager.ts @@ -197,14 +197,6 @@ export class BackgroundTaskManager { output += `\n### Error\n${task.error}\n`; } - if (task.progress?.lastMessage) { - const preview = - task.progress.lastMessage.length > 200 - ? `${task.progress.lastMessage.slice(0, 200)}...` - : task.progress.lastMessage; - output += `\n### Last Message Preview\n${preview}\n`; - } - return output; } diff --git a/src/tools/background-task/types.ts b/src/tools/background-task/types.ts index 9d6b385..757865d 100644 --- a/src/tools/background-task/types.ts +++ b/src/tools/background-task/types.ts @@ -15,7 +15,6 @@ export interface BackgroundTask { toolCalls: number; lastTool?: string; lastUpdate: Date; - lastMessage?: string; }; } diff --git a/tests/tools/background-task-types.test.ts b/tests/tools/background-task-types.test.ts new file mode 100644 index 0000000..f5f8441 --- /dev/null +++ b/tests/tools/background-task-types.test.ts @@ -0,0 +1,15 @@ +import { describe, it, expect } from "bun:test"; + +describe("background-task types", () => { + it("should not have lastMessage in progress type", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/tools/background-task/types.ts", "utf-8"); + expect(source).not.toContain("lastMessage"); + }); + + it("should not reference lastMessage in manager", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/tools/background-task/manager.ts", "utf-8"); + expect(source).not.toContain("lastMessage"); + }); +}); From ab57cd1c6268ebbb5ed852c9204276807e5bd0f3 Mon Sep 17 00:00:00 2001 From: Vlad Temian Date: Thu, 1 Jan 2026 20:47:02 +0200 Subject: [PATCH 02/14] feat(project-initializer): update to use background_task fire-and-collect pattern --- src/agents/project-initializer.ts | 112 ++++++++++++++++------- tests/agents/project-initializer.test.ts | 32 +++++++ 2 files changed, 112 insertions(+), 32 deletions(-) create mode 100644 tests/agents/project-initializer.test.ts diff --git a/src/agents/project-initializer.ts b/src/agents/project-initializer.ts index 328938f..9da543a 100644 --- a/src/agents/project-initializer.ts +++ b/src/agents/project-initializer.ts @@ -10,7 +10,7 @@ const PROMPT = ` MAXIMIZE PARALLELISM. Speed is critical. - - Spawn multiple agents simultaneously + - Fire ALL background tasks simultaneously - Run multiple tool calls in single message - Never wait for one thing when you can do many @@ -23,16 +23,33 @@ const PROMPT = ` - - - Spawn ALL discovery tasks simultaneously - + + + Fire a subagent to run in background. Returns task_id immediately. + Parameters: description, prompt, agent (subagent type) + Example: background_task(description="Find entry points", prompt="Find all entry points", agent="codebase-locator") + + + Get results from a background task. Use block=true to wait for completion. + Parameters: task_id, block (boolean), timeout (optional) + Example: background_output(task_id="abc123", block=true) + + + List all background tasks and their status. + No parameters required. + + + + + + Launch ALL discovery agents + run tools in a SINGLE message + Find entry points, configs, main modules Find test files and test patterns Find linter, formatter, CI configs Analyze directory structure Find naming conventions across files - + Glob for package.json, pyproject.toml, go.mod, Cargo.toml, etc. Glob for *.config.*, .eslintrc*, .prettierrc*, ruff.toml, etc. @@ -41,13 +58,19 @@ const PROMPT = ` - - Analyze core modules in parallel - + + Use background_output(block=true) to collect each result + Collect results from all fired agents + Process tool results from phase 1 + + + + Based on discovery, fire more background tasks + Analyze core/domain logic Analyze API/entry points Analyze data layer - + Read 5 core source files simultaneously Read 3 test files simultaneously @@ -55,8 +78,9 @@ const PROMPT = ` - - Write both files in parallel + + Collect deep analysis results, then write both files + Collect all deep analysis results Write ARCHITECTURE.md Write CODE_STYLE.md @@ -66,23 +90,36 @@ const PROMPT = ` Fast file/pattern finder. Spawn multiple with different queries. Examples: "Find all entry points", "Find all config files", "Find test directories" - Invoke with: Task tool, subagent_type="codebase-locator" + + Background: background_task(description="Find entry points", prompt="Find all entry points and main files", agent="codebase-locator") + Fallback: Task(description="Find entry points", prompt="Find all entry points and main files", subagent_type="codebase-locator") Deep module analyzer. Spawn multiple for different areas. Examples: "Analyze src/core", "Analyze api layer", "Analyze database module" - Invoke with: Task tool, subagent_type="codebase-analyzer" + + Background: background_task(description="Analyze core", prompt="Analyze the core module", agent="codebase-analyzer") + Fallback: Task(description="Analyze core", prompt="Analyze the core module", subagent_type="codebase-analyzer") Pattern extractor. Spawn for different pattern types. Examples: "Find naming patterns", "Find error handling patterns", "Find async patterns" - Invoke with: Task tool, subagent_type="pattern-finder" + + Background: background_task(description="Find patterns", prompt="Find naming conventions", agent="pattern-finder") + Fallback: Task(description="Find patterns", prompt="Find naming conventions", subagent_type="pattern-finder") + + If background_task fails or is unavailable, fall back to Task() tool. + The Task tool provides synchronous subagent execution. + Example fallback: Task(description="Find entry points", prompt="Find all entry points", subagent_type="codebase-locator") + + - You MUST use the Task tool to spawn subagents. Call multiple Task tools in a SINGLE message for parallelism. - Example: Task(description="Find entry points", prompt="Find all entry points and main files", subagent_type="codebase-locator") + Use background_task to fire subagents for TRUE parallelism. + Fire ALL background_task calls in a SINGLE message, then collect with background_output(block=true). + This is the fire-and-collect pattern - fire everything, then collect everything. @@ -148,10 +185,10 @@ const PROMPT = ` - ALWAYS spawn multiple agents in a SINGLE message + ALWAYS fire multiple background_task calls in a SINGLE message ALWAYS run multiple tool calls in a SINGLE message NEVER wait for one task when you can start others - Batch related queries into parallel agent spawns + Use fire-and-collect: fire all, then collect all @@ -176,27 +213,38 @@ const PROMPT = ` - - - In a SINGLE message, call Task tool multiple times AND run other tools: - - Task(description="Find entry points", prompt="Find all entry points and main files", subagent_type="codebase-locator") - - Task(description="Find configs", prompt="Find all config files (linters, formatters, build)", subagent_type="codebase-locator") - - Task(description="Find tests", prompt="Find test directories and test files", subagent_type="codebase-locator") - - Task(description="Analyze structure", prompt="Analyze the directory structure and organization", subagent_type="codebase-analyzer") - - Task(description="Find patterns", prompt="Find naming conventions used across the codebase", subagent_type="pattern-finder") + + + In a SINGLE message, fire ALL background_task calls AND run other tools: + - background_task(description="Find entry points", prompt="Find all entry points and main files", agent="codebase-locator") -> task_id_1 + - background_task(description="Find configs", prompt="Find all config files (linters, formatters, build)", agent="codebase-locator") -> task_id_2 + - background_task(description="Find tests", prompt="Find test directories and test files", agent="codebase-locator") -> task_id_3 + - background_task(description="Analyze structure", prompt="Analyze the directory structure and organization", agent="codebase-analyzer") -> task_id_4 + - background_task(description="Find patterns", prompt="Find naming conventions used across the codebase", agent="pattern-finder") -> task_id_5 - Glob: package.json, pyproject.toml, go.mod, Cargo.toml, etc. - Glob: README*, ARCHITECTURE*, docs/* - - Based on discovery, in a SINGLE message: - - Task for each major module: subagent_type="codebase-analyzer" + + In a SINGLE message, collect ALL results: + - background_output(task_id=task_id_1, block=true) + - background_output(task_id=task_id_2, block=true) + - background_output(task_id=task_id_3, block=true) + - background_output(task_id=task_id_4, block=true) + - background_output(task_id=task_id_5, block=true) + + + + Based on discovery, in a SINGLE message fire more tasks: + - background_task for each major module: agent="codebase-analyzer" - Read multiple source files simultaneously - Read multiple test files simultaneously - - Write ARCHITECTURE.md and CODE_STYLE.md + + Collect deep analysis results, then write: + - Write ARCHITECTURE.md + - Write CODE_STYLE.md diff --git a/tests/agents/project-initializer.test.ts b/tests/agents/project-initializer.test.ts new file mode 100644 index 0000000..f5cb790 --- /dev/null +++ b/tests/agents/project-initializer.test.ts @@ -0,0 +1,32 @@ +import { describe, it, expect } from "bun:test"; + +describe("project-initializer agent", () => { + it("should use background_task instead of Task", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/project-initializer.ts", "utf-8"); + + expect(source).toContain("background_task"); + expect(source).toContain("background_output"); + }); + + it("should have fire-and-collect pattern documentation", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/project-initializer.ts", "utf-8"); + + expect(source).toContain("fire-and-collect"); + }); + + it("should have fallback-rule section", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/project-initializer.ts", "utf-8"); + + expect(source).toContain(""); + }); + + it("should have background-tools section", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/project-initializer.ts", "utf-8"); + + expect(source).toContain(""); + }); +}); From 893adefee771071b7b2995debcbc156ef7bc8a68 Mon Sep 17 00:00:00 2001 From: Vlad Temian Date: Thu, 1 Jan 2026 20:47:09 +0200 Subject: [PATCH 03/14] feat(planner): update to use background_task fire-and-collect pattern --- src/agents/planner.ts | 66 ++++++++++++++++++++++++++++-------- tests/agents/planner.test.ts | 40 ++++++++++++++++++++++ 2 files changed, 92 insertions(+), 14 deletions(-) create mode 100644 tests/agents/planner.test.ts diff --git a/src/agents/planner.ts b/src/agents/planner.ts index ff32fd6..de0a400 100644 --- a/src/agents/planner.ts +++ b/src/agents/planner.ts @@ -13,13 +13,24 @@ Every task is bite-sized (2-5 minutes), with exact paths and complete code. FOLLOW THE DESIGN: The brainstormer's design is the spec. Do not explore alternatives. - SUBAGENTS: Spawn for implementation details (paths, signatures, line numbers). - TOOLS (grep, read, etc.): Do NOT use directly - use subagents instead. + BACKGROUND TASKS: Use background_task for parallel research (fire-and-collect pattern). + TOOLS (grep, read, etc.): Do NOT use directly - use background subagents instead. Every code example MUST be complete - never write "add validation here" Every file path MUST be exact - never write "somewhere in src/" Follow TDD: failing test → verify fail → implement → verify pass → commit + + Fire subagent tasks that run in parallel. Returns task_id immediately. + Collect results from background tasks. Use block=true to wait for completion. + List all background tasks and their status. + + + +If background_task fails or is unavailable, fall back to Task() for sequential execution. +Always prefer background_task for parallel research, but Task() works as a reliable fallback. + + Brainstormer did conceptual research (architecture, patterns, approaches). Your research is IMPLEMENTATION-LEVEL only: @@ -37,18 +48,19 @@ All research must serve the design - never second-guess design decisions. - + Find exact file paths needed for implementation. Examples: "Find exact path to UserService", "Find test directory structure" - + Get exact signatures and types for code examples. Examples: "Get function signature for createUser", "Get type definition for UserConfig" - + Find exact patterns to copy in code examples. Examples: "Find exact test setup pattern", "Find exact error handling in similar endpoint" + If background_task unavailable, use Task() with same subagent types. @@ -64,15 +76,20 @@ All research must serve the design - never second-guess design decisions. Note any constraints or decisions made by brainstormer - - Spawn subagents in PARALLEL to gather exact details: - - In a SINGLE message, spawn: - - codebase-locator: "Find exact path to [component from design]" - - codebase-locator: "Find test file naming convention" - - codebase-analyzer: "Get exact signature for [function mentioned in design]" - - pattern-finder: "Find exact test setup pattern for [type of test]" - + + Fire background tasks AND library research in parallel: + + In a SINGLE message, fire: + - background_task(agent="codebase-locator", prompt="Find exact path to [component]") + - background_task(agent="codebase-analyzer", prompt="Get signature for [function]") + - background_task(agent="pattern-finder", prompt="Find test setup pattern") + - context7_resolve-library-id + context7_query-docs for API docs + - btca_ask for library internals when needed + + + - background_output(task_id=..., block=true) for each background task + - Combine all results for planning phase + Only research what's needed to implement the design Never research alternatives to design decisions @@ -164,6 +181,27 @@ git commit -m "feat(scope): add specific feature" + + +// In a SINGLE message, fire all research tasks: +background_task(agent="codebase-locator", prompt="Find UserService path") // returns task_id_1 +background_task(agent="codebase-analyzer", prompt="Get createUser signature") // returns task_id_2 +background_task(agent="pattern-finder", prompt="Find test setup pattern") // returns task_id_3 +context7_resolve-library-id(libraryName="express") // runs in parallel +btca_ask(tech="express", question="middleware chain order") // runs in parallel + + +// Wait for all background tasks to complete: +background_output(task_id=task_id_1, block=true) // blocks until complete +background_output(task_id=task_id_2, block=true) +background_output(task_id=task_id_3, block=true) +// context7 and btca_ask results already available from fire step + + +// Use all collected results to write the implementation plan + + + Engineer knows nothing about our codebase Every code block is copy-paste ready diff --git a/tests/agents/planner.test.ts b/tests/agents/planner.test.ts new file mode 100644 index 0000000..070df4d --- /dev/null +++ b/tests/agents/planner.test.ts @@ -0,0 +1,40 @@ +import { describe, it, expect } from "bun:test"; + +describe("planner agent", () => { + it("should use background_task instead of Task for research", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/planner.ts", "utf-8"); + + expect(source).toContain("background_task"); + expect(source).toContain("background_output"); + }); + + it("should have fire-and-collect pattern documentation", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/planner.ts", "utf-8"); + + expect(source).toContain("fire-and-collect"); + }); + + it("should have fallback-rule section", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/planner.ts", "utf-8"); + + expect(source).toContain(""); + }); + + it("should have background-tools section", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/planner.ts", "utf-8"); + + expect(source).toContain(""); + }); + + it("should mention running library research in parallel with agents", async () => { + const fs = await import("node:fs/promises"); + const source = await fs.readFile("src/agents/planner.ts", "utf-8"); + + expect(source).toContain("context7"); + expect(source).toContain("btca_ask"); + }); +}); From 49be9de9d6923ab49f2c92b8a98031df1120af33 Mon Sep 17 00:00:00 2001 From: Vlad Temian Date: Thu, 1 Jan 2026 20:47:23 +0200 Subject: [PATCH 04/14] feat(executor): update to use background_task fire-and-check pattern --- src/agents/executor.ts | 141 ++++++++++++++++++++++------------ tests/agents/executor.test.ts | 40 ++++++++++ 2 files changed, 130 insertions(+), 51 deletions(-) create mode 100644 tests/agents/executor.test.ts diff --git a/src/agents/executor.ts b/src/agents/executor.ts index 1e9a1cd..c5e0f97 100644 --- a/src/agents/executor.ts +++ b/src/agents/executor.ts @@ -6,16 +6,24 @@ export const executorAgent: AgentConfig = { model: "anthropic/claude-opus-4-5", temperature: 0.2, prompt: ` -Execute plan tasks with maximum parallelism. +Execute plan tasks with maximum parallelism using fire-and-check pattern. Each task gets its own implementer → reviewer cycle. Detect and parallelize independent tasks. - + +You have access to background task management tools: +- background_task: Fire a subagent to run in background, returns task_id immediately +- background_output: Check status or get results from a background task +- background_list: List all background tasks and their status + + + Parse plan to extract individual tasks Analyze task dependencies to build execution graph Group tasks into parallel batches (independent tasks run together) -For each batch: spawn implementer → reviewer per task IN PARALLEL +Fire ALL implementers in batch as background_task +Poll with background_list, start reviewer immediately when each implementer finishes Wait for batch to complete before starting dependent batch Aggregate results and report @@ -35,83 +43,113 @@ Tasks are DEPENDENT (must be sequential) when: When uncertain, assume DEPENDENT (safer). - -Example: 9 tasks where tasks 1-3 are independent, 4-6 depend on 1-3, 7-9 depend on 4-6 - -Batch 1 (parallel): - - Spawn implementer for task 1 → reviewer - - Spawn implementer for task 2 → reviewer - - Spawn implementer for task 3 → reviewer - [Wait for all to complete] - -Batch 2 (parallel): - - Spawn implementer for task 4 → reviewer - - Spawn implementer for task 5 → reviewer - - Spawn implementer for task 6 → reviewer - [Wait for all to complete] - -Batch 3 (parallel): - - Spawn implementer for task 7 → reviewer - - Spawn implementer for task 8 → reviewer - - Spawn implementer for task 9 → reviewer - [Wait for all to complete] + +The fire-and-check pattern maximizes parallelism by: +1. Firing all implementers as background tasks simultaneously +2. Polling to detect completion as early as possible +3. Starting each reviewer immediately when its implementer finishes +4. Not waiting for all implementers before starting any reviewers + +Example: 3 independent tasks +- Fire implementer 1, 2, 3 as background_task (all start immediately) +- Poll with background_list +- Task 2 finishes first → immediately start reviewer 2 +- Task 1 finishes → immediately start reviewer 1 +- Task 3 finishes → immediately start reviewer 3 +- Reviewers run in parallel as they're spawned - + Executes ONE task from the plan. Input: Single task with context (which files, what to do). Output: Changes made and verification results for that task. - Invoke with: Task tool, subagent_type="implementer" + + background_task(description="Implement task 1", prompt="...", agent="implementer") + + + Task(description="Implement task 1", prompt="...", subagent_type="implementer") + - + Reviews ONE task's implementation. Input: Single task's changes against its requirements. Output: APPROVED or CHANGES REQUESTED for that task. - Invoke with: Task tool, subagent_type="reviewer" + + background_task(description="Review task 1", prompt="...", agent="reviewer") + + + Task(description="Review task 1", prompt="...", subagent_type="reviewer") + - -You MUST use the Task tool to spawn implementer and reviewer subagents. -Example: Task(description="Implement task 1", prompt="...", subagent_type="implementer") -Do NOT try to implement or review yourself - delegate to subagents. - - For each task: -1. Spawn implementer with task details -2. Wait for implementer to complete -3. Spawn reviewer to check that task -4. If reviewer requests changes: re-spawn implementer for fixes +1. Fire implementer as background_task +2. Poll until implementer completes +3. Start reviewer immediately when implementer finishes +4. If reviewer requests changes: fire new implementer for fixes 5. Max 3 cycles per task before marking as blocked 6. Report task status: DONE / BLOCKED - -Within a batch, spawn ALL implementers in a SINGLE message using the Task tool: - -Example for batch with tasks 1, 2, 3 - call Task tool 3 times in ONE message: -- Task(description="Task 1", prompt="Execute task 1: [details]", subagent_type="implementer") -- Task(description="Task 2", prompt="Execute task 2: [details]", subagent_type="implementer") -- Task(description="Task 3", prompt="Execute task 3: [details]", subagent_type="implementer") - -Then after all complete, in ONE message call Task tool for reviewers: -- Task(description="Review 1", prompt="Review task 1 implementation", subagent_type="reviewer") -- Task(description="Review 2", prompt="Review task 2 implementation", subagent_type="reviewer") -- Task(description="Review 3", prompt="Review task 3 implementation", subagent_type="reviewer") - + +Within a batch: +1. Fire ALL implementers as background_task in ONE message +2. Enter polling loop: + a. Call background_list to check status + b. For each newly completed implementer: + - Get result with background_output + - Start reviewer immediately (as background_task) + c. For each newly completed reviewer: + - Check if APPROVED or CHANGES REQUESTED + - If changes needed and cycles < 3: fire new implementer + d. Repeat until all tasks in batch are done or blocked +3. Move to next batch + + + +If background_task fails or is unavailable, fall back to Task() tool: +- Task(description="...", prompt="...", subagent_type="implementer") +- Task(description="...", prompt="...", subagent_type="reviewer") +The Task tool blocks until completion but still works correctly. + Parse ALL tasks from plan before starting execution ALWAYS analyze dependencies before parallelizing -Spawn parallel tasks in SINGLE message for true parallelism +Fire parallel tasks as background_task for true parallelism +Start reviewer immediately when its implementer finishes - don't wait for others Wait for entire batch before starting next batch Each task gets its own implement → review cycle Max 3 review cycles per task Continue with other tasks if one is blocked + +# Batch with tasks 1, 2, 3 (independent) + +## Step 1: Fire all implementers +background_task(description="Task 1", prompt="Execute task 1: [details]", agent="implementer") → task_id_1 +background_task(description="Task 2", prompt="Execute task 2: [details]", agent="implementer") → task_id_2 +background_task(description="Task 3", prompt="Execute task 3: [details]", agent="implementer") → task_id_3 + +## Step 2: Poll and react +background_list() → shows task_id_2 completed +background_output(task_id="task_id_2") → get result +background_task(description="Review 2", prompt="Review task 2 implementation", agent="reviewer") → review_id_2 + +background_list() → shows task_id_1, task_id_3 completed +background_output(task_id="task_id_1") → get result +background_output(task_id="task_id_3") → get result +background_task(description="Review 1", prompt="Review task 1 implementation", agent="reviewer") → review_id_1 +background_task(description="Review 3", prompt="Review task 3 implementation", agent="reviewer") → review_id_3 + +## Step 3: Continue polling until all reviews complete +... + +