From 62343efd0b0908103362d5ecc4ee9043e8953516 Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 04:43:04 +0000
Subject: [PATCH] Fix: Remove all comments from e2e tests and add retry logic
 for flaky create-expert test

Remove JSDoc blocks, inline comments, and section dividers from all e2e
files (test files + lib files + fixtures) to eliminate hallucination
sources. Fix biome warnings in minimal-mcp-server.mjs (node: protocol,
unused variable).

Add retry loop (up to 3 attempts) for the non-base MCP skill assertion
in create-expert test to handle LLM non-determinism when the model
doesn't forward skill-report.md through the delegation chain.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 e2e/create-expert/create-expert.test.ts   | 143 +++++++++-------------
 e2e/fixtures/minimal-mcp-server.mjs       |   9 +-
 e2e/lib/event-parser.ts                   |  13 +-
 e2e/lib/round-robin.ts                    |   3 -
 e2e/lib/runner.ts                         |   7 --
 e2e/perstack-cli/bundled-base.test.ts     |  16 +--
 e2e/perstack-cli/continue.test.ts         |  44 -------
 e2e/perstack-cli/delegate.test.ts         |  26 ----
 e2e/perstack-cli/error-handling.test.ts   |  14 ---
 e2e/perstack-cli/interactive.test.ts      |  17 ---
 e2e/perstack-cli/lockfile.test.ts         |  10 --
 e2e/perstack-cli/log.test.ts              |  10 --
 e2e/perstack-cli/options.test.ts          |  22 ----
 e2e/perstack-cli/providers.test.ts        |  15 ---
 e2e/perstack-cli/published-expert.test.ts |  12 --
 e2e/perstack-cli/reasoning-budget.test.ts |  13 --
 e2e/perstack-cli/run.test.ts              |  34 +----
 e2e/perstack-cli/runtime-version.test.ts  |  12 --
 e2e/perstack-cli/skills.test.ts           |  30 +----
 e2e/perstack-cli/streaming.test.ts        |  34 -----
 e2e/perstack-cli/validation.test.ts       |  41 -------
 e2e/perstack-cli/versioned-base.test.ts   |  15 ---
 22 files changed, 70 insertions(+), 470 deletions(-)
diff --git a/e2e/create-expert/create-expert.test.ts b/e2e/create-expert/create-expert.test.ts
index b92a72f0..a7d60670 100644
--- a/e2e/create-expert/create-expert.test.ts
+++ b/e2e/create-expert/create-expert.test.ts
@@ -1,16 +1,3 @@
-/**
- * Create Expert E2E Tests
- *
- * Tests the create-expert agent that creates/modifies perstack.toml files:
- * - Creates new expert definitions via planner + definition-writer + expert-tester delegates
- * - Investigates MCP registry skills via skill-finder delegate when external integrations needed
- * - Adds discovered MCP skills to generated perstack.toml expert definitions
- * - Tests experts via addDelegateFromConfig after writing perstack.toml
- * - Preserves existing experts when modifying perstack.toml
- *
- * Binary: apps/create-expert/dist/bin/cli.js (--headless mode)
- */
-
 import { describe, expect, it } from "bun:test"
 import { spawn } from "node:child_process"
 import fs from "node:fs"
@@ -23,10 +10,6 @@ import { type CommandResult, type RunResult, withEventParsing } from "../lib/run
 
 const PROJECT_ROOT = path.resolve(process.cwd())
 const CLI_PATH = path.join(PROJECT_ROOT, "apps/create-expert/dist/bin/cli.js")
-// LLM API calls require extended timeout; delegation adds extra LLM round-trips.
-// The create-expert workflow involves multiple delegation round-trips (planner →
-// skill-finder → definition-writer → expert-tester, with possible retries) which
-// can exceed 10 minutes in CI environments.
 const LLM_TIMEOUT = 900_000
 
 function runCreateExpert(query: string, cwd: string, timeout = LLM_TIMEOUT): Promise<RunResult> {
@@ -64,14 +47,12 @@ function createTempDir(): string {
   return fs.mkdtempSync(path.join(os.tmpdir(), "create-expert-"))
 }
 
-/** Extract all tool names called across callTools events */
 function getAllCalledToolNames(result: RunResult): string[] {
   return filterEventsByType(result.events, "callTools").flatMap((e) =>
     extractToolCalls(e).map((tc) => tc.toolName),
   )
 }
 
-/** Build a diagnostic string from RunResult for assertion failure messages */
 function diagnostics(result: RunResult): string {
   const errorEvents = result.events
     .filter((e) => e.type === "stopRunByError")
@@ -89,68 +70,64 @@ describe("create-expert", () => {
   it(
     "should create a new perstack.toml with MCP skill integration",
     async () => {
-      const tempDir = createTempDir()
-
-      // Request an expert that requires external API integration to trigger skill-finder
-      const result = await runCreateExpert(
-        "Create a GitHub repository analyzer expert that reads GitHub issues and pull requests via the GitHub API to generate project status reports",
-        tempDir,
-      )
-
-      expect(result.exitCode, diagnostics(result)).toBe(0)
-
-      // Verify control flow: coordinator starts, delegates, then completes
-      const controlFlow = result.events
-        .filter((e) => ["startRun", "stopRunByDelegate", "completeRun"].includes(e.type))
-        .map((e) => e.type)
-      expect(controlFlow[0]).toBe("startRun")
-      expect(controlFlow).toContain("stopRunByDelegate")
-      expect(controlFlow.at(-1)).toBe("completeRun")
-
-      // Verify the coordinator (create-expert) starts and completes
-      const startEvents = filterEventsByType(result.events, "startRun")
-      const completeEvents = filterEventsByType(result.events, "completeRun")
-      expect(
-        startEvents.some((e) => (e as { expertKey: string }).expertKey === "create-expert"),
-      ).toBe(true)
-      expect(
-        completeEvents.some((e) => (e as { expertKey: string }).expertKey === "create-expert"),
-      ).toBe(true)
-
-      // Verify delegation: at least 3 completeRun (planner + skill-finder + definition-writer/tester + coordinator)
-      expect(completeEvents.length).toBeGreaterThanOrEqual(3)
+      let result: RunResult | undefined
+      let tomlContent = ""
+      let nonBaseSkillMatches: RegExpMatchArray | null = null
+
+      for (let attempt = 0; attempt < 3; attempt++) {
+        const tempDir = createTempDir()
+
+        result = await runCreateExpert(
+          "Create a GitHub repository analyzer expert that reads GitHub issues and pull requests via the GitHub API to generate project status reports",
+          tempDir,
+        )
+
+        expect(result.exitCode, diagnostics(result)).toBe(0)
+
+        const controlFlow = result.events
+          .filter((e) => ["startRun", "stopRunByDelegate", "completeRun"].includes(e.type))
+          .map((e) => e.type)
+        expect(controlFlow[0]).toBe("startRun")
+        expect(controlFlow).toContain("stopRunByDelegate")
+        expect(controlFlow.at(-1)).toBe("completeRun")
+
+        const startEvents = filterEventsByType(result.events, "startRun")
+        const completeEvents = filterEventsByType(result.events, "completeRun")
+        expect(
+          startEvents.some((e) => (e as { expertKey: string }).expertKey === "create-expert"),
+        ).toBe(true)
+        expect(
+          completeEvents.some((e) => (e as { expertKey: string }).expertKey === "create-expert"),
+        ).toBe(true)
+
+        expect(completeEvents.length).toBeGreaterThanOrEqual(3)
+
+        const toolNames = getAllCalledToolNames(result)
+        expect(toolNames).toContain("writeTextFile")
+        expect(toolNames).toContain("addDelegateFromConfig")
+
+        expect(toolNames, "searchMcpRegistry should be called by skill-finder").toContain(
+          "searchMcpRegistry",
+        )
+
+        const skillReportPath = path.join(tempDir, "skill-report.md")
+        expect(fs.existsSync(skillReportPath)).toBe(true)
+
+        const tomlPath = path.join(tempDir, "perstack.toml")
+        expect(fs.existsSync(tomlPath)).toBe(true)
+        tomlContent = fs.readFileSync(tomlPath, "utf-8")
+        const expertMatches = tomlContent.match(/\[experts\."[^"]+"\]/g)
+        expect(expertMatches).not.toBeNull()
+        expect(expertMatches!.length).toBeGreaterThanOrEqual(1)
+
+        nonBaseSkillMatches = tomlContent.match(
+          /\[experts\."[^"]+".skills\."(?!@perstack\/base")[^"]+"\]/g,
+        )
+        if (nonBaseSkillMatches && nonBaseSkillMatches.length > 0) {
+          break
+        }
+      }
 
-      // Verify definition-writer writes TOML and expert-tester tests via addDelegateFromConfig
-      const toolNames = getAllCalledToolNames(result)
-      expect(toolNames).toContain("writeTextFile")
-      expect(toolNames).toContain("addDelegateFromConfig")
-
-      // Verify skill investigation: skill-finder searched the MCP registry
-      expect(toolNames, "searchMcpRegistry should be called by skill-finder").toContain(
-        "searchMcpRegistry",
-      )
-
-      // Verify skill-report.md was created (skill-finder output)
-      const skillReportPath = path.join(tempDir, "skill-report.md")
-      expect(
-        fs.existsSync(skillReportPath),
-        "skill-report.md should be created by skill-finder",
-      ).toBe(true)
-
-      // Verify perstack.toml was created with at least one expert definition
-      const tomlPath = path.join(tempDir, "perstack.toml")
-      expect(fs.existsSync(tomlPath)).toBe(true)
-      const tomlContent = fs.readFileSync(tomlPath, "utf-8")
-      const expertMatches = tomlContent.match(/\[experts\."[^"]+"\]/g)
-      expect(expertMatches).not.toBeNull()
-      expect(expertMatches!.length).toBeGreaterThanOrEqual(1)
-
-      // Verify skill addition: at least one expert has a non-base skill (MCP integration).
-      // This depends on the LLM correctly forwarding skill-report.md through the delegation
-      // chain (coordinator → definition-writer), which can be non-deterministic with smaller models.
-      const nonBaseSkillMatches = tomlContent.match(
-        /\[experts\."[^"]+".skills\."(?!@perstack\/base")[^"]+"\]/g,
-      )
       expect(
         nonBaseSkillMatches && nonBaseSkillMatches.length > 0,
         "at least one expert should have a non-base MCP skill",
@@ -164,7 +141,6 @@ describe("create-expert", () => {
     async () => {
       const tempDir = createTempDir()
 
-      // Create an existing perstack.toml with one expert
       const existingToml = `model = "claude-sonnet-4-5"
 
 [provider]
@@ -187,23 +163,18 @@ pick = ["attemptCompletion"]
 
       expect(result.exitCode, diagnostics(result)).toBe(0)
 
-      // Verify control flow: start → delegate → complete
       expect(
         assertEventSequenceContains(result.events, ["startRun", "stopRunByDelegate", "completeRun"])
           .passed,
       ).toBe(true)
 
-      // Verify definition-writer writes TOML and expert-tester tests via addDelegateFromConfig
       const toolNames = getAllCalledToolNames(result)
       expect(toolNames).toContain("writeTextFile")
       expect(toolNames).toContain("addDelegateFromConfig")
 
-      // Verify perstack.toml was updated with existing + new experts
       const tomlPath = path.join(tempDir, "perstack.toml")
       const tomlContent = fs.readFileSync(tomlPath, "utf-8")
-      // Original expert should be preserved
       expect(tomlContent).toContain('[experts."existing-expert"]')
-      // New expert should be added (at least 2 expert sections)
       const expertMatches = tomlContent.match(/\[experts\."[^"]+"\]/g)
       expect(expertMatches).not.toBeNull()
       expect(expertMatches!.length).toBeGreaterThanOrEqual(2)
diff --git a/e2e/fixtures/minimal-mcp-server.mjs b/e2e/fixtures/minimal-mcp-server.mjs
index daa35c22..61e9c55d 100644
--- a/e2e/fixtures/minimal-mcp-server.mjs
+++ b/e2e/fixtures/minimal-mcp-server.mjs
@@ -1,10 +1,5 @@
 #!/usr/bin/env bun
-/**
- * Minimal MCP server for e2e testing.
- * Uses newline-delimited JSON (NDJSON) protocol matching the MCP SDK stdio transport.
- */
-
-import { createInterface } from "readline"
+import { createInterface } from "node:readline"
 
 const rl = createInterface({ input: process.stdin, terminal: false })
 
@@ -21,7 +16,7 @@ rl.on("line", (line) => {
     return
   }
 
-  const { id, method, params } = message
+  const { id, method } = message
 
   switch (method) {
     case "initialize":
diff --git a/e2e/lib/event-parser.ts b/e2e/lib/event-parser.ts
index 1aaafea2..9abaaf5c 100644
--- a/e2e/lib/event-parser.ts
+++ b/e2e/lib/event-parser.ts
@@ -14,7 +14,6 @@ export type CheckpointState = {
   partialToolResults: ToolCallInfo[]
 }
 
-// Note: callDelegate, callInteractiveTool, finishAllToolCalls were removed in state-machine-redesign
 const RELEVANT_EVENT_TYPES = [
   "startRun",
   "resumeFromStop",
@@ -26,13 +25,6 @@ const RELEVANT_EVENT_TYPES = [
   "resolveToolResults",
 ] as const
 
-/**
- * Parses NDJSON events from CLI output that may contain literal newlines
- * inside JSON string values (e.g. base64 data with MIME-style line breaks).
- *
- * Strategy: group lines by event boundaries (lines starting with '{"type":')
- * and rejoin internal lines with escaped newlines before parsing.
- */
 export function parseEvents(output: string): ParsedEvent[] {
   const events: ParsedEvent[] = []
   const lines = output.split("\n")
@@ -45,7 +37,6 @@ export function parseEvents(output: string): ParsedEvent[] {
       }
       buffer = line
     } else if (buffer) {
-      // Continuation line (e.g. base64 data with literal newlines) — rejoin with escaped newline
       buffer += "\\n" + line
     } else {
       tryParseEvent(line, events)
@@ -64,9 +55,7 @@ function tryParseEvent(text: string, events: ParsedEvent[]): void {
     if (data.type) {
       events.push({ ...data, raw: text })
     }
-  } catch {
-    // skip unparseable lines
-  }
+  } catch {}
 }
 
 export function filterEventsByType<T extends RunEvent["type"]>(
diff --git a/e2e/lib/round-robin.ts b/e2e/lib/round-robin.ts
index 58d7a4a5..f46914fe 100644
--- a/e2e/lib/round-robin.ts
+++ b/e2e/lib/round-robin.ts
@@ -1,6 +1,3 @@
-// Fixed provider/model for E2E tests
-// - OpenAI: excluded due to reasoning overhead (~64s vs ~17s), see #194
-// - Google: excluded due to empty text bug in delegation, see #195
 const DEFAULT_PROVIDER = "anthropic"
 const DEFAULT_MODEL = "claude-haiku-4-5"
 
diff --git a/e2e/lib/runner.ts b/e2e/lib/runner.ts
index d829275c..e13910c0 100644
--- a/e2e/lib/runner.ts
+++ b/e2e/lib/runner.ts
@@ -48,10 +48,6 @@ function buildFinalArgs(args: string[], options?: RunOptions): string[] {
   return injectProviderArgs(args)
 }
 
-/**
- * Retries a CLI run if the LLM doesn't call the expected tool.
- * Handles LLM non-determinism where the model sometimes skips tool calls.
- */
 export async function runCliUntilToolCalled(
   args: string[],
   options: RunOptions,
@@ -64,7 +60,6 @@ export async function runCliUntilToolCalled(
     try {
       cmdResult = await runCli(args, options)
     } catch {
-      // Timeout or spawn error — retry
       continue
     }
     result = withEventParsing(cmdResult)
@@ -85,8 +80,6 @@ export async function runCli(args: string[], options?: RunOptions): Promise<Comm
   const cwd = options?.cwd ?? process.cwd()
   const env = options?.env ?? { ...process.env }
   const finalArgs = buildFinalArgs(args, options)
-  // Redirect stdout to a temp file to avoid Bun pipe buffering issues
-  // that truncate large outputs (e.g. events with base64-encoded file data)
   const stdoutFile = join(
     tmpdir(),
     `perstack-e2e-${Date.now()}-${Math.random().toString(36).slice(2)}.out`,
diff --git a/e2e/perstack-cli/bundled-base.test.ts b/e2e/perstack-cli/bundled-base.test.ts
index c553a4c6..60dc80df 100644
--- a/e2e/perstack-cli/bundled-base.test.ts
+++ b/e2e/perstack-cli/bundled-base.test.ts
@@ -1,22 +1,12 @@
-/**
- * Bundled Base Skill E2E Tests
- *
- * Tests that the bundled @perstack/base skill uses InMemoryTransport
- * for near-zero initialization latency.
- *
- * TOML: e2e/experts/bundled-base.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains } from "../lib/assertions.js"
 import { filterEventsByType } from "../lib/event-parser.js"
 import { runCli, withEventParsing } from "../lib/runner.js"
 
 const BUNDLED_BASE_CONFIG = "./e2e/experts/bundled-base.toml"
-// LLM API calls require extended timeout
 const LLM_TIMEOUT = 120000
 
 describe.concurrent("Bundled Base Skill", () => {
-  /** Verifies bundled base skill initializes with InMemoryTransport (spawnDurationMs = 0). */
   it(
     "should use InMemoryTransport for bundled base (spawnDurationMs = 0)",
     async () => {
@@ -31,7 +21,6 @@ describe.concurrent("Bundled Base Skill", () => {
         true,
       )
 
-      // Check that skillConnected event for @perstack/base has spawnDurationMs = 0
       const skillConnectedEvents = filterEventsByType(result.events, "skillConnected")
       const baseSkillEvent = skillConnectedEvents.find((e) => {
         const event = e as { skillName?: string }
@@ -44,15 +33,13 @@ describe.concurrent("Bundled Base Skill", () => {
         spawnDurationMs?: number
         totalDurationMs?: number
       }
-      expect(baseEvent.spawnDurationMs).toBe(0) // InMemoryTransport has no spawn
+      expect(baseEvent.spawnDurationMs).toBe(0)
       expect(baseEvent.totalDurationMs).toBeDefined()
-      // InMemoryTransport should be significantly faster than ~500ms for npx
       expect(baseEvent.totalDurationMs).toBeLessThan(100)
     },
     LLM_TIMEOUT,
   )
 
-  /** Verifies bundled base skill tools are available. */
   it(
     "should have all base skill tools available",
     async () => {
@@ -64,7 +51,6 @@ describe.concurrent("Bundled Base Skill", () => {
 
       expect(result.exitCode).toBe(0)
 
-      // Check that readTextFile was called (from pick list)
       const callToolsEvents = filterEventsByType(result.events, "callTools")
       const hasHealthCheck = callToolsEvents.some((e) => {
         const event = e as { toolCalls?: Array<{ toolName: string }> }
diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts
index ad43c3e5..a304ffaa 100644
--- a/e2e/perstack-cli/continue.test.ts
+++ b/e2e/perstack-cli/continue.test.ts
@@ -1,13 +1,3 @@
-/**
- * Continue Job E2E Tests
- *
- * Tests job continuation and resumption functionality:
- * - Continue from interactive tool stop (askUser)
- * - Resume from specific checkpoint
- * - Continue after parallel delegation completes
- *
- * TOML: e2e/experts/continue-resume.toml, e2e/experts/parallel-delegate.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains } from "../lib/assertions.js"
 import { filterEventsByType, getEventSequence } from "../lib/event-parser.js"
@@ -15,7 +5,6 @@ import { runCli, withEventParsing } from "../lib/runner.js"
 
 const CONTINUE_CONFIG = "./e2e/experts/continue-resume.toml"
 const PARALLEL_CONFIG = "./e2e/experts/parallel-delegate.toml"
-// LLM API calls require extended timeout beyond the default 30s
 const LLM_TIMEOUT = 180000
 
 function runArgs(expertKey: string, query: string): string[] {
@@ -37,14 +26,6 @@ function continueArgs(
 }
 
 describe.concurrent("Continue Job", () => {
-  // ─────────────────────────────────────────────────────────────────────────
-  // Interactive Tool Continuation
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /**
-   * Verifies job continuation from interactive tool stop.
-   * Initial run stops at askUser, continue run provides input and completes.
-   */
   it("should continue and complete job from interactive stop", async () => {
     const initialCmdResult = await runCli(
       runArgs("e2e-continue", "Test continue/resume functionality"),
@@ -60,7 +41,6 @@ describe.concurrent("Continue Job", () => {
       { timeout: LLM_TIMEOUT },
     )
     const continueResult = withEventParsing(continueCmdResult)
-    // Note: Continue runs emit resumeFromStop instead of startRun (state-machine-redesign)
     expect(assertEventSequenceContains(continueResult.events, ["resumeFromStop"]).passed).toBe(true)
     expect(
       continueResult.events.some(
@@ -73,25 +53,12 @@ describe.concurrent("Continue Job", () => {
     const completeEvents = filterEventsByType(continueResult.events, "completeRun")
     expect(completeEvents.length).toBe(1)
 
-    // Verify usage tracking flows through multi-turn conversations.
-    // On turn 2, the conversation prefix from turn 1 is resent — with automatic
-    // prompt caching enabled via providerOptions, cachedInputTokens should be populated.
-    // Actual cache hits depend on the model's minimum token threshold
-    // (e.g. 1024 for Sonnet, 4096 for Haiku 4.5).
     const completeEvent = completeEvents[0]
     const usage = (completeEvent as { usage?: Record<string, number> }).usage
     expect(usage).toBeDefined()
     expect(typeof usage?.cachedInputTokens).toBe("number")
   })
 
-  // ─────────────────────────────────────────────────────────────────────────
-  // Parallel Delegation Continuation
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /**
-   * Verifies job continuation after parallel delegation completes.
-   * Initial run delegates to 2 experts in parallel, continue run adds to conversation.
-   */
   it("should continue after parallel delegation and complete", async () => {
     const initialCmdResult = await runCli(
       [
@@ -105,11 +72,9 @@ describe.concurrent("Continue Job", () => {
     )
     const initialResult = withEventParsing(initialCmdResult)
     expect(initialResult.jobId).not.toBeNull()
-    // Find callTools events that contain delegate tool calls
     const callToolsEvents = filterEventsByType(initialResult.events, "callTools")
     const delegateToolCalls = callToolsEvents.flatMap((e) => {
       const toolCalls = (e as { toolCalls?: { toolName: string }[] }).toolCalls ?? []
-      // Delegate tools have the same name as the expert key (e.g., "e2e-delegate-math")
       return toolCalls.filter((tc) => ["math", "text"].includes(tc.toolName))
     })
     expect(delegateToolCalls.length).toBe(2)
@@ -139,20 +104,11 @@ describe.concurrent("Continue Job", () => {
     const lastCompleteEvent = continueCompleteEvents[continueCompleteEvents.length - 1]
     expect((lastCompleteEvent as { text?: string }).text).toBeDefined()
 
-    // Verify usage includes cache metrics on continued run
     const usage = (lastCompleteEvent as { usage?: Record<string, number> }).usage
     expect(usage).toBeDefined()
     expect(typeof usage?.cachedInputTokens).toBe("number")
   })
 
-  // ─────────────────────────────────────────────────────────────────────────
-  // Checkpoint and Resume Tests
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /**
-   * Verifies checkpoint ID is captured for resume-from functionality.
-   * Also verifies run stops at interactive tool with correct event sequence.
-   */
   it("should capture checkpoint ID for resume-from", async () => {
     const cmdResult = await runCli(runArgs("e2e-continue", "Test continue/resume functionality"), {
       timeout: LLM_TIMEOUT,
diff --git a/e2e/perstack-cli/delegate.test.ts b/e2e/perstack-cli/delegate.test.ts
index c06ad8f8..61070542 100644
--- a/e2e/perstack-cli/delegate.test.ts
+++ b/e2e/perstack-cli/delegate.test.ts
@@ -1,33 +1,11 @@
-/**
- * Delegate to Expert E2E Tests
- *
- * Tests expert delegation chain functionality:
- * - Multi-level delegation (chain → level1 → level2)
- * - Proper control flow and resumption after delegate completes
- * - Event sequence verification
- *
- * TOML: e2e/experts/delegate-chain.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertNoRetry } from "../lib/assertions.js"
 import { runCli, withEventParsing } from "../lib/runner.js"
 
 const CHAIN_CONFIG = "./e2e/experts/delegate-chain.toml"
-// LLM API calls require extended timeout beyond the default 30s
 const LLM_TIMEOUT = 180000
 
 describe("Delegate to Expert", () => {
-  /**
-   * Verifies multi-level delegation chain execution.
-   *
-   * Flow: e2e-delegate-chain → e2e-delegate-level1 → e2e-delegate-level2 → complete chain
-   * TOML: delegate-chain.toml defines 3 experts forming a delegation chain
-   * Expected:
-   *   - Chain starts at root, delegates to level1, then level2
-   *   - Each expert calls attemptCompletion
-   *   - Control flow: chain→level1→level2→(complete)→level1→(complete)→chain→(complete)
-   *   - Total 3 completeRun events (one per expert)
-   */
   it("should chain through multiple experts", async () => {
     const cmdResult = await runCli(
       [
@@ -44,9 +22,6 @@ describe("Delegate to Expert", () => {
     const result = withEventParsing(cmdResult)
     expect(assertNoRetry(result.events).passed).toBe(true)
 
-    // Verify delegation chain control flow
-    // Note: callDelegate was removed in state-machine-redesign
-    // Resume after delegate completes no longer emits startRun (handled internally)
     const controlFlow = result.events
       .filter((e) => ["startRun", "stopRunByDelegate", "completeRun"].includes(e.type))
       .map((e) => `${e.type}:${(e as { expertKey: string }).expertKey}`)
@@ -62,7 +37,6 @@ describe("Delegate to Expert", () => {
       "completeRun:e2e-delegate-chain",
     ])
 
-    // Verify all 3 experts completed
     const completeEvents = result.events.filter((e) => e.type === "completeRun")
     expect(completeEvents.length).toBe(3)
   })
diff --git a/e2e/perstack-cli/error-handling.test.ts b/e2e/perstack-cli/error-handling.test.ts
index 3c46843b..001af64e 100644
--- a/e2e/perstack-cli/error-handling.test.ts
+++ b/e2e/perstack-cli/error-handling.test.ts
@@ -1,13 +1,3 @@
-/**
- * Error Handling E2E Tests
- *
- * Tests graceful error handling in perstack:
- * - Tool error recovery (file not found)
- * - Invalid MCP skill command
- * - Invalid provider name
- *
- * TOML: e2e/experts/error-handling.toml, e2e/experts/errors.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains } from "../lib/assertions.js"
 import { filterEventsByType } from "../lib/event-parser.js"
@@ -16,11 +6,9 @@ import { runCli, withEventParsing } from "../lib/runner.js"
 const ERROR_HANDLING_CONFIG = "./e2e/experts/error-handling.toml"
 const ERRORS_CONFIG = "./e2e/experts/errors.toml"
 const GLOBAL_RUNTIME_CONFIG = "./e2e/experts/global-runtime.toml"
-// LLM API calls require extended timeout
 const LLM_TIMEOUT = 180000
 
 describe.concurrent("Error Handling", () => {
-  /** Verifies expert can recover from tool errors and complete. */
   it(
     "should recover from file not found error and complete successfully",
     async () => {
@@ -51,14 +39,12 @@ describe.concurrent("Error Handling", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies graceful failure for broken MCP skill. */
   it("should fail gracefully when MCP skill command is invalid", async () => {
     const result = await runCli(["run", "--config", ERRORS_CONFIG, "e2e-mcp-error", "Say hello"])
     expect(result.exitCode).toBe(1)
     expect(result.stderr).toMatch(/has no packageName or args/i)
   })
 
-  /** Verifies rejection of invalid provider name. */
   it("should fail with invalid provider name", async () => {
     const result = await runCli([
       "run",
diff --git a/e2e/perstack-cli/interactive.test.ts b/e2e/perstack-cli/interactive.test.ts
index 0a561648..c321c499 100644
--- a/e2e/perstack-cli/interactive.test.ts
+++ b/e2e/perstack-cli/interactive.test.ts
@@ -1,13 +1,3 @@
-/**
- * Interactive Input E2E Tests
- *
- * Tests mixed tool call handling with multiple tool types in one response:
- * - MCP tools (web_search_exa) execute first
- * - Delegate tools suspend run (stopRunByDelegate)
- * - Interactive tools suspend run (stopRunByInteractiveTool)
- *
- * TOML: e2e/experts/mixed-tools.toml
- */
 import { describe, expect, it } from "bun:test"
 import {
   assertCheckpointState,
@@ -19,14 +9,9 @@ import type { ToolCallInfo } from "../lib/event-parser.js"
 import { runCli, withEventParsing } from "../lib/runner.js"
 
 const CONFIG = "./e2e/experts/mixed-tools.toml"
-// LLM API calls require extended timeout beyond the default 30s
 const LLM_TIMEOUT = 180000
 
 describe("Interactive Input", () => {
-  /**
-   * Verifies mixed tool call processing order and checkpoint states.
-   * Expert calls 3 tools in parallel: web_search_exa, helper, askUser.
-   */
   it("should handle mixed tool calls with delegate and interactive stop", async () => {
     const cmdResult = await runCli(
       [
@@ -40,7 +25,6 @@ describe("Interactive Input", () => {
     )
     const result = withEventParsing(cmdResult)
 
-    // Note: callDelegate and callInteractiveTool were removed in state-machine-redesign
     expect(assertToolCallCount(result.events, "callTools", 3).passed).toBe(true)
     expect(
       assertEventSequenceContains(result.events, ["startRun", "callTools", "stopRunByDelegate"])
@@ -51,7 +35,6 @@ describe("Interactive Input", () => {
       assertPartialResultsContain(result.events, "stopRunByDelegate", ["web_search_exa"]).passed,
     ).toBe(true)
 
-    // After delegate completes, the parent run resumes and eventually stops at interactive tool
     expect(
       assertEventSequenceContains(result.events, [
         "stopRunByDelegate",
diff --git a/e2e/perstack-cli/lockfile.test.ts b/e2e/perstack-cli/lockfile.test.ts
index f589c312..7141ff3d 100644
--- a/e2e/perstack-cli/lockfile.test.ts
+++ b/e2e/perstack-cli/lockfile.test.ts
@@ -1,13 +1,3 @@
-/**
- * Lockfile E2E Tests
- *
- * Tests lockfile generation and usage:
- * - `perstack install` generates valid lockfile
- * - Runtime uses lockfile for instant startup
- *
- * TOML: e2e/experts/lockfile.toml
- */
-
 import { afterEach, beforeEach, describe, expect, it } from "bun:test"
 import { existsSync, readFileSync, unlinkSync } from "node:fs"
 import { assertEventSequenceContains } from "../lib/assertions.js"
diff --git a/e2e/perstack-cli/log.test.ts b/e2e/perstack-cli/log.test.ts
index af101b41..c644f819 100644
--- a/e2e/perstack-cli/log.test.ts
+++ b/e2e/perstack-cli/log.test.ts
@@ -1,12 +1,3 @@
-/**
- * Log Command E2E Tests
- *
- * Tests the perstack log command functionality:
- * - Shows help text
- * - Handles missing job gracefully
- *
- * These tests do NOT invoke LLM APIs - they test CLI parsing and basic behavior.
- */
 import { describe, expect, it } from "bun:test"
 import { runCli } from "../lib/runner.js"
 
@@ -43,7 +34,6 @@ describe("Log Command", () => {
     expect(result.stdout).toContain("No data found")
   })
 
-  // These tests use a nonexistent job ID to ensure "No data found" regardless of storage state
   it("should accept errors preset", async () => {
     const result = await runCli(["log", "--job", "nonexistent-job", "--errors"])
     expect(result.stdout).toContain("No data found")
diff --git a/e2e/perstack-cli/options.test.ts b/e2e/perstack-cli/options.test.ts
index 547cc47b..0043e77e 100644
--- a/e2e/perstack-cli/options.test.ts
+++ b/e2e/perstack-cli/options.test.ts
@@ -1,23 +1,10 @@
-/**
- * CLI Options E2E Tests
- *
- * Tests CLI option handling in perstack:
- * - --provider, --model
- * - --max-retries, --timeout
- * - --job-id, --env-path, --verbose
- * - --filter (multi-type + invalid type validation)
- *
- * TOML: e2e/experts/global-runtime.toml
- */
 import { describe, expect, it } from "bun:test"
 import { runCli, withEventParsing } from "../lib/runner.js"
 
 const GLOBAL_RUNTIME_CONFIG = "./e2e/experts/global-runtime.toml"
-// LLM API calls require extended timeout
 const LLM_TIMEOUT = 120000
 
 describe.concurrent("CLI Options", () => {
-  /** Verifies --provider option is accepted. */
   it(
     "should accept --provider option",
     async () => {
@@ -38,7 +25,6 @@ describe.concurrent("CLI Options", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --model option is accepted. */
   it(
     "should accept --model option",
     async () => {
@@ -59,7 +45,6 @@ describe.concurrent("CLI Options", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --max-retries option is accepted. */
   it(
     "should accept --max-retries option",
     async () => {
@@ -80,7 +65,6 @@ describe.concurrent("CLI Options", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --timeout option is accepted. */
   it(
     "should accept --timeout option",
     async () => {
@@ -101,7 +85,6 @@ describe.concurrent("CLI Options", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --job-id option is accepted and reflected in events. */
   it(
     "should accept --job-id option",
     async () => {
@@ -125,7 +108,6 @@ describe.concurrent("CLI Options", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --env-path option is accepted. */
   it(
     "should accept --env-path option",
     async () => {
@@ -147,7 +129,6 @@ describe.concurrent("CLI Options", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --verbose option is accepted. */
   it(
     "should accept --verbose option",
     async () => {
@@ -162,7 +143,6 @@ describe.concurrent("CLI Options", () => {
 })
 
 describe.concurrent("CLI Options - Filter", () => {
-  /** Verifies --filter option with multiple types */
   it(
     "should filter events to completeRun and initializeRuntime",
     async () => {
@@ -181,7 +161,6 @@ describe.concurrent("CLI Options - Filter", () => {
       const result = withEventParsing(cmdResult)
       expect(result.exitCode).toBe(0)
 
-      // All events should be completeRun or initializeRuntime
       const eventTypes = result.events.map((e) => e.type)
       expect(eventTypes.every((t) => t === "completeRun" || t === "initializeRuntime")).toBe(true)
       expect(eventTypes.length).toBeGreaterThan(0)
@@ -189,7 +168,6 @@ describe.concurrent("CLI Options - Filter", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --filter option rejects invalid event type */
   it(
     "should reject invalid filter type",
     async () => {
diff --git a/e2e/perstack-cli/providers.test.ts b/e2e/perstack-cli/providers.test.ts
index c71a1201..57987495 100644
--- a/e2e/perstack-cli/providers.test.ts
+++ b/e2e/perstack-cli/providers.test.ts
@@ -1,15 +1,3 @@
-/**
- * LLM Providers E2E Tests
- *
- * Tests that verify Perstack works correctly with multiple LLM providers:
- * - OpenAI (GPT models)
- * - Anthropic (Claude models)
- * - Google (Gemini models)
- *
- * Tests are skipped gracefully when the corresponding API key is not available.
- *
- * TOML: e2e/experts/providers.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains } from "../lib/assertions.js"
 import { hasAnthropicKey, hasGoogleKey, hasOpenAIKey } from "../lib/prerequisites.js"
@@ -44,11 +32,8 @@ describe.concurrent("LLM Providers", () => {
       )
       const completeEvent = result.events.find((e) => e.type === "completeRun")
       expect(completeEvent).toBeDefined()
-      // Note: text may be empty when using attemptCompletion tool (explicit completion)
-      // The actual response is in the checkpoint messages, not in completeRun.text
       expect((completeEvent as { text?: string }).text).toBeDefined()
 
-      // Verify usage tracking includes cache token metrics
       const usage = (completeEvent as { usage?: Record<string, unknown> }).usage
       expect(usage).toBeDefined()
       expect(typeof usage?.inputTokens).toBe("number")
diff --git a/e2e/perstack-cli/published-expert.test.ts b/e2e/perstack-cli/published-expert.test.ts
index cf4b9f00..d34a307c 100644
--- a/e2e/perstack-cli/published-expert.test.ts
+++ b/e2e/perstack-cli/published-expert.test.ts
@@ -1,25 +1,13 @@
-/**
- * Published Expert E2E Tests
- *
- * Tests error handling for published expert resolution:
- * - Nonexistent published experts (e.g., @user/expert)
- * - Invalid expert key formats
- *
- * These tests verify graceful error handling without LLM API calls
- * (errors occur before LLM generation starts).
- */
 import { describe, expect, it } from "bun:test"
 import { runCli } from "../lib/runner.js"
 
 describe.concurrent("Published Expert", () => {
-  /** Verifies error message for nonexistent @user/expert format */
   it("should fail gracefully for nonexistent published expert", async () => {
     const result = await runCli(["run", "@nonexistent-user/nonexistent-expert", "test query"])
     expect(result.exitCode).toBe(1)
     expect(result.stderr).toMatch(/not found|does not exist|failed|required/i)
   })
 
-  /** Verifies error for malformed expert key like @invalid */
   it("should fail gracefully for invalid expert key format", async () => {
     const result = await runCli(["run", "@invalid", "test query"])
     expect(result.exitCode).toBe(1)
diff --git a/e2e/perstack-cli/reasoning-budget.test.ts b/e2e/perstack-cli/reasoning-budget.test.ts
index 83e944ad..c29eac1e 100644
--- a/e2e/perstack-cli/reasoning-budget.test.ts
+++ b/e2e/perstack-cli/reasoning-budget.test.ts
@@ -1,17 +1,8 @@
-/**
- * Reasoning Budget E2E Tests
- *
- * Tests that reasoning budget is correctly passed to each provider
- * and produces reasoning tokens or thinking text.
- *
- * TOML: e2e/experts/reasoning-budget.toml
- */
 import { describe, expect, it } from "bun:test"
 import { filterEventsByType } from "../lib/event-parser.js"
 import { runCli, withEventParsing } from "../lib/runner.js"
 
 const REASONING_BUDGET_CONFIG = "./e2e/experts/reasoning-budget.toml"
-// Extended thinking requires longer timeout
 const LLM_TIMEOUT = 180000
 
 async function runReasoningTest(
@@ -37,7 +28,6 @@ async function runReasoningTest(
   )
   const result = withEventParsing(cmdResult)
 
-  // Get completeRun event for usage info
   const completeEvents = filterEventsByType(result.events, "completeRun")
   const completeEvent = completeEvents[0] as
     | {
@@ -51,15 +41,12 @@ async function runReasoningTest(
       }
     | undefined
 
-  // Get completeStreamingReasoning event for thinking text (renamed in state-machine-redesign)
   const reasoningEvents = filterEventsByType(result.events, "completeStreamingReasoning")
   const reasoningEvent = reasoningEvents[0] as { text?: string } | undefined
 
-  // Use checkpoint.usage as primary source (accumulates all step usage)
   const checkpointUsage = completeEvent?.checkpoint?.usage
   const reasoningTokens = checkpointUsage?.reasoningTokens ?? 0
 
-  // Get thinking from completeReasoning event or from checkpoint messages
   let thinking = reasoningEvent?.text
   if (!thinking && completeEvent?.checkpoint?.messages) {
     for (const message of completeEvent.checkpoint.messages) {
diff --git a/e2e/perstack-cli/run.test.ts b/e2e/perstack-cli/run.test.ts
index 989728ed..db03bd71 100644
--- a/e2e/perstack-cli/run.test.ts
+++ b/e2e/perstack-cli/run.test.ts
@@ -1,14 +1,3 @@
-/**
- * Run Expert E2E Tests
- *
- * Tests core expert execution in perstack:
- * - Simple question answering
- * - Multi-tool parallel execution
- * - PDF reading and summarization
- * - Image reading and description
- *
- * TOML: e2e/experts/global-runtime.toml, special-tools.toml, multi-modal.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains, assertToolCallCount } from "../lib/assertions.js"
 import { filterEventsByType } from "../lib/event-parser.js"
@@ -17,12 +6,10 @@ import { runCli, withEventParsing } from "../lib/runner.js"
 const GLOBAL_RUNTIME_CONFIG = "./e2e/experts/global-runtime.toml"
 const SPECIAL_TOOLS_CONFIG = "./e2e/experts/special-tools.toml"
 const MULTI_MODAL_CONFIG = "./e2e/experts/multi-modal.toml"
-// LLM API calls require extended timeout
 const LLM_TIMEOUT = 120000
 const LLM_EXTENDED_TIMEOUT = 180000
 
 describe.concurrent("Run Expert", () => {
-  /** Verifies simple query completes with text response. */
   it(
     "should answer a simple question and complete",
     async () => {
@@ -42,7 +29,6 @@ describe.concurrent("Run Expert", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies 3 tools execute in parallel (PDF, image, search). */
   it(
     "should execute multiple tools in parallel and complete",
     async () => {
@@ -78,7 +64,6 @@ describe.concurrent("Run Expert", () => {
     LLM_EXTENDED_TIMEOUT,
   )
 
-  /** Verifies PDF file reading and content extraction. */
   it(
     "should read and summarize PDF content",
     async () => {
@@ -94,27 +79,23 @@ describe.concurrent("Run Expert", () => {
       )
       const result = withEventParsing(cmdResult)
       expect(result.exitCode).toBe(0)
-      // Verify the execution flow (LLM may batch tools or complete in varying turns)
       expect(
         assertEventSequenceContains(result.events, ["startRun", "callTools", "completeRun"]).passed,
       ).toBe(true)
-      // Verify readPdfFile tool was called and returned a result
       const resolveEvents = filterEventsByType(result.events, "resolveToolResults")
-      expect(resolveEvents.length, "resolveToolResults should exist").toBeGreaterThan(0)
+      expect(resolveEvents.length).toBeGreaterThan(0)
       const hasPdfResult = resolveEvents.some((e) => {
         const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? []
         return toolResults.some((tr) => tr.toolName === "readPdfFile")
       })
-      expect(hasPdfResult, "readPdfFile should return a result").toBe(true)
-      // Verify completeRun has text content (summary)
+      expect(hasPdfResult).toBe(true)
       const completeEvent = result.events.find((e) => e.type === "completeRun")
       const text = completeEvent && "text" in completeEvent ? (completeEvent.text as string) : ""
-      expect(text.length, "completeRun text should not be empty").toBeGreaterThan(0)
+      expect(text.length).toBeGreaterThan(0)
     },
     LLM_EXTENDED_TIMEOUT,
   )
 
-  /** Verifies image file reading and visual description. */
   it(
     "should read and describe image content",
     async () => {
@@ -130,22 +111,19 @@ describe.concurrent("Run Expert", () => {
       )
       const result = withEventParsing(cmdResult)
       expect(result.exitCode).toBe(0)
-      // Verify the execution flow (LLM may batch tools or complete in varying turns)
       expect(
         assertEventSequenceContains(result.events, ["startRun", "callTools", "completeRun"]).passed,
       ).toBe(true)
-      // Verify readImageFile tool was called and returned a result
       const resolveEvents = filterEventsByType(result.events, "resolveToolResults")
-      expect(resolveEvents.length, "resolveToolResults should exist").toBeGreaterThan(0)
+      expect(resolveEvents.length).toBeGreaterThan(0)
       const hasImageResult = resolveEvents.some((e) => {
         const toolResults = (e as { toolResults?: { toolName: string }[] }).toolResults ?? []
         return toolResults.some((tr) => tr.toolName === "readImageFile")
       })
-      expect(hasImageResult, "readImageFile should return a result").toBe(true)
-      // Verify completeRun has text content (description)
+      expect(hasImageResult).toBe(true)
       const completeEvent = result.events.find((e) => e.type === "completeRun")
       const text = completeEvent && "text" in completeEvent ? (completeEvent.text as string) : ""
-      expect(text.length, "completeRun text should not be empty").toBeGreaterThan(0)
+      expect(text.length).toBeGreaterThan(0)
     },
     LLM_EXTENDED_TIMEOUT,
   )
diff --git a/e2e/perstack-cli/runtime-version.test.ts b/e2e/perstack-cli/runtime-version.test.ts
index a1560b95..f0bd8e1e 100644
--- a/e2e/perstack-cli/runtime-version.test.ts
+++ b/e2e/perstack-cli/runtime-version.test.ts
@@ -1,15 +1,3 @@
-/**
- * Runtime Version E2E Tests
- *
- * Tests runtime version validation in perstack:
- * - v1.0 minRuntimeVersion with 0.x.y runtime (special case)
- * - No minRuntimeVersion (default)
- * - Future version requirement (validation failure)
- * - 3-level delegation chain with all v1.0
- * - Nested delegate with future version requirement
- *
- * TOML: e2e/experts/runtime-version.toml, e2e/experts/runtime-version-future.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains } from "../lib/assertions.js"
 import { filterEventsByType } from "../lib/event-parser.js"
diff --git a/e2e/perstack-cli/skills.test.ts b/e2e/perstack-cli/skills.test.ts
index e8e76d52..d9c457cd 100644
--- a/e2e/perstack-cli/skills.test.ts
+++ b/e2e/perstack-cli/skills.test.ts
@@ -1,24 +1,12 @@
-/**
- * Skills E2E Tests
- *
- * Tests skill configuration in perstack:
- * - pick: Only allow specific tools
- * - omit: Exclude specific tools
- * - Multi-skill: Combine tools from multiple skills
- *
- * TOML: e2e/experts/skills.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains } from "../lib/assertions.js"
 import { filterEventsByType } from "../lib/event-parser.js"
 import { runCli, runCliUntilToolCalled, withEventParsing } from "../lib/runner.js"
 
 const SKILLS_CONFIG = "./e2e/experts/skills.toml"
-// LLM API calls require extended timeout
 const LLM_TIMEOUT = 180000
 
 describe.concurrent("Skills", () => {
-  /** Verifies picked tools only - readTextFile should NOT be available. */
   it(
     "should only have access to picked tools",
     async () => {
@@ -47,7 +35,6 @@ describe.concurrent("Skills", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies picked tools (todo, attemptCompletion) are usable. */
   it(
     "should be able to use picked tools",
     async () => {
@@ -71,7 +58,6 @@ describe.concurrent("Skills", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies omitted tools (exec) are not available. */
   it(
     "should not have access to omitted tools",
     async () => {
@@ -91,7 +77,6 @@ describe.concurrent("Skills", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies tools from multiple skills are all accessible. */
   it(
     "should have access to tools from multiple skills",
     async () => {
@@ -108,7 +93,6 @@ describe.concurrent("Skills", () => {
     LLM_TIMEOUT,
   )
 
-  /** Dynamic skill add/remove via addSkill and removeSkill tools */
   it("should dynamically add and remove skills", async () => {
     const PER_ATTEMPT_TIMEOUT = 90000
     const result = await runCliUntilToolCalled(
@@ -120,27 +104,21 @@ describe.concurrent("Skills", () => {
 
     const callToolsEvents = filterEventsByType(result.events, "callTools")
 
-    // Verify addSkill was called
     const addSkillIndex = callToolsEvents.findIndex((e) => {
       const calls = (e as { toolCalls?: { toolName: string }[] }).toolCalls ?? []
       return calls.some((c) => c.toolName === "addSkill")
     })
-    expect(addSkillIndex, "addSkill should be called").toBeGreaterThanOrEqual(0)
+    expect(addSkillIndex).toBeGreaterThanOrEqual(0)
 
-    // Verify readTextFile was called (from the dynamically added skill)
     const readTextFileIndex = callToolsEvents.findIndex((e) => {
       const calls = (e as { toolCalls?: { toolName: string }[] }).toolCalls ?? []
       return calls.some((c) => c.toolName === "readTextFile")
     })
-    expect(readTextFileIndex, "readTextFile should be called").toBeGreaterThanOrEqual(0)
+    expect(readTextFileIndex).toBeGreaterThanOrEqual(0)
 
-    // Verify ordering: readTextFile must come after addSkill (skill must be added before use)
-    expect(readTextFileIndex, "readTextFile should be called after addSkill").toBeGreaterThan(
-      addSkillIndex,
-    )
+    expect(readTextFileIndex).toBeGreaterThan(addSkillIndex)
   }, 300000)
 
-  /** Dynamic delegate add/remove via addDelegate and removeDelegate tools */
   it(
     "should dynamically add and remove delegates",
     async () => {
@@ -153,14 +131,12 @@ describe.concurrent("Skills", () => {
 
       const callToolsEvents = filterEventsByType(result.events, "callTools")
 
-      // Verify addDelegate was called
       const hasAddDelegate = callToolsEvents.some((e) => {
         const calls = (e as { toolCalls?: { toolName: string }[] }).toolCalls ?? []
         return calls.some((c) => c.toolName === "addDelegate")
       })
       expect(hasAddDelegate).toBe(true)
 
-      // Verify removeDelegate was called
       const hasRemoveDelegate = callToolsEvents.some((e) => {
         const calls = (e as { toolCalls?: { toolName: string }[] }).toolCalls ?? []
         return calls.some((c) => c.toolName === "removeDelegate")
diff --git a/e2e/perstack-cli/streaming.test.ts b/e2e/perstack-cli/streaming.test.ts
index a9beb8d3..521d3978 100644
--- a/e2e/perstack-cli/streaming.test.ts
+++ b/e2e/perstack-cli/streaming.test.ts
@@ -1,18 +1,8 @@
-/**
- * Streaming Events E2E Tests
- *
- * Tests that streaming events are emitted in the correct sequence:
- * - startReasoning → streamReasoning... → completeReasoning
- * - startRunResult → streamRunResult... → completeRun
- *
- * TOML: e2e/experts/reasoning-budget.toml (reuses reasoning budget experts)
- */
 import { describe, expect, it } from "bun:test"
 import type { ParsedEvent } from "../lib/event-parser.js"
 import { runCli, withEventParsing } from "../lib/runner.js"
 
 const STREAMING_CONFIG = "./e2e/experts/reasoning-budget.toml"
-// Streaming tests need enough time for LLM response
 const LLM_TIMEOUT = 180000
 
 const STREAMING_EVENTS = [
@@ -57,25 +47,20 @@ describe("Streaming Events", () => {
 
         expect(result.exitCode).toBe(0)
 
-        // Get all streaming-related events
         const streamingEvents = filterStreamingEvents(result.events)
 
-        // Verify reasoning events exist and are in order
         const reasoningEvents = streamingEvents.filter((e) =>
           ["startStreamingReasoning", "streamReasoning", "completeStreamingReasoning"].includes(
             e.type,
           ),
         )
 
-        // With reasoning budget enabled, we should have reasoning events
         expect(reasoningEvents.length).toBeGreaterThan(0)
 
         if (reasoningEvents.length > 0) {
-          // Should contain streamReasoning deltas
           const streamEvents = reasoningEvents.filter((e) => e.type === "streamReasoning")
           expect(streamEvents.length).toBeGreaterThan(0)
 
-          // Should end with completeStreamingReasoning
           const completeEvents = reasoningEvents.filter(
             (e) => e.type === "completeStreamingReasoning",
           )
@@ -108,29 +93,22 @@ describe("Streaming Events", () => {
 
         expect(result.exitCode).toBe(0)
 
-        // Get all streaming-related events
         const streamingEvents = filterStreamingEvents(result.events)
 
-        // Verify result events exist and are in order
         const resultEvents = streamingEvents.filter((e) =>
           ["startRunResult", "streamRunResult", "completeRun"].includes(e.type),
         )
 
-        // We should always have at least completeRun
         expect(resultEvents.length).toBeGreaterThan(0)
 
-        // Last event should always be completeRun
         expect(resultEvents[resultEvents.length - 1]?.type).toBe("completeRun")
 
-        // Check for result streaming events (only present if GeneratingRunResult was reached)
         const hasResultStreaming = resultEvents.some((e) => e.type === "startRunResult")
 
         if (hasResultStreaming) {
-          // First result event should be startRunResult
           const startIdx = resultEvents.findIndex((e) => e.type === "startRunResult")
           expect(startIdx).toBe(0)
 
-          // All events between startRunResult and completeRun should be streamRunResult
           const middleEvents = resultEvents.slice(1, -1)
           expect(middleEvents.every((e) => e.type === "streamRunResult")).toBe(true)
         }
@@ -163,13 +141,11 @@ describe("Streaming Events", () => {
 
         const streamingEvents = filterStreamingEvents(result.events)
 
-        // Find indices
         const completeReasoningIdx = streamingEvents.findIndex(
           (e) => e.type === "completeStreamingReasoning",
         )
         const startRunResultIdx = streamingEvents.findIndex((e) => e.type === "startRunResult")
 
-        // If both phases exist, reasoning should complete before result starts
         if (completeReasoningIdx !== -1 && startRunResultIdx !== -1) {
           expect(completeReasoningIdx).toBeLessThan(startRunResultIdx)
         }
@@ -179,7 +155,6 @@ describe("Streaming Events", () => {
   })
 
   describe("Without Reasoning", () => {
-    // Use a model/provider without reasoning or with reasoning disabled
     const ANTHROPIC_MODEL = "claude-haiku-4-5"
 
     it(
@@ -207,12 +182,9 @@ describe("Streaming Events", () => {
 
         const streamingEvents = filterStreamingEvents(result.events)
 
-        // Should NOT have reasoning events
         expect(streamingEvents.some((e) => e.type === "startStreamingReasoning")).toBe(false)
         expect(streamingEvents.some((e) => e.type === "streamReasoning")).toBe(false)
 
-        // Should still have result events (but might not have them if direct text completion)
-        // The completeRun should always exist
         expect(result.events.some((e) => e.type === "completeRun")).toBe(true)
       },
       LLM_TIMEOUT,
@@ -245,17 +217,14 @@ describe("Streaming Events", () => {
 
         expect(result.exitCode).toBe(0)
 
-        // Get streamReasoning events
         const streamReasoningEvents = result.events.filter((e) => e.type === "streamReasoning")
 
         if (streamReasoningEvents.length > 0) {
-          // Each streamReasoning should have a delta
           for (const event of streamReasoningEvents) {
             const delta = (event as { delta?: string }).delta
             expect(typeof delta).toBe("string")
           }
 
-          // At least some deltas should be non-empty
           const nonEmptyDeltas = streamReasoningEvents.filter(
             (e) => ((e as { delta?: string }).delta ?? "").length > 0,
           )
@@ -288,17 +257,14 @@ describe("Streaming Events", () => {
 
         expect(result.exitCode).toBe(0)
 
-        // Get streamRunResult events
         const streamResultEvents = result.events.filter((e) => e.type === "streamRunResult")
 
         if (streamResultEvents.length > 0) {
-          // Each streamRunResult should have a delta
           for (const event of streamResultEvents) {
             const delta = (event as { delta?: string }).delta
             expect(typeof delta).toBe("string")
           }
 
-          // At least some deltas should be non-empty
           const nonEmptyDeltas = streamResultEvents.filter(
             (e) => ((e as { delta?: string }).delta ?? "").length > 0,
           )
diff --git a/e2e/perstack-cli/validation.test.ts b/e2e/perstack-cli/validation.test.ts
index a3134bcf..b45fe773 100644
--- a/e2e/perstack-cli/validation.test.ts
+++ b/e2e/perstack-cli/validation.test.ts
@@ -1,37 +1,19 @@
-/**
- * CLI Validation E2E Tests
- *
- * Tests CLI argument validation and error handling:
- * - --version, --help output
- * - Missing required arguments
- * - Nonexistent config files
- * - Invalid option combinations (e.g., --resume-from without --continue-job)
- *
- * These tests do NOT invoke LLM APIs - they test CLI parsing and validation.
- */
 import { describe, expect, it } from "bun:test"
 import { runCli } from "../lib/runner.js"
 
 describe.concurrent("CLI Validation", () => {
-  // ─────────────────────────────────────────────────────────────────────────
-  // Help and Version
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /** Verifies --version outputs semver. */
   it("should show version", async () => {
     const result = await runCli(["--version"])
     expect(result.exitCode).toBe(0)
     expect(result.stdout).toMatch(/^\d+\.\d+\.\d+/)
   })
 
-  /** Verifies --help outputs usage info. */
   it("should show help", async () => {
     const result = await runCli(["--help"])
     expect(result.exitCode).toBe(0)
     expect(result.stdout).toContain("perstack")
   })
 
-  /** Verifies run --help shows expertKey and query. */
   it("should show run command help", async () => {
     const result = await runCli(["run", "--help"])
     expect(result.exitCode).toBe(0)
@@ -39,45 +21,27 @@ describe.concurrent("CLI Validation", () => {
     expect(result.stdout).toContain("query")
   })
 
-  // ─────────────────────────────────────────────────────────────────────────
-  // Missing Arguments
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /** Verifies run command requires expert and query */
   it("should fail without arguments", async () => {
     const result = await runCli(["run"])
     expect(result.exitCode).toBe(1)
   })
 
-  /** Verifies run command requires query after expert key */
   it("should fail with only expert key", async () => {
     const result = await runCli(["run", "expertOnly"])
     expect(result.exitCode).toBe(1)
   })
 
-  // ─────────────────────────────────────────────────────────────────────────
-  // Nonexistent Resources
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /** Verifies error for expert not found in config */
   it("should fail for nonexistent expert", async () => {
     const result = await runCli(["run", "nonexistent-expert", "test query"])
     expect(result.exitCode).toBe(1)
   })
 
-  /** Verifies error for nonexistent config file path */
   it("should fail with nonexistent config file", async () => {
     const result = await runCli(["run", "--config", "nonexistent.toml", "expert", "query"])
     expect(result.exitCode).toBe(1)
   })
 
-  // ─────────────────────────────────────────────────────────────────────────
-  // Invalid Option Combinations
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /** Verifies --resume-from requires --continue-job */
   it("should fail when --resume-from is used without --continue-job", async () => {
-    // Note: CLI requires config file before argument validation, so we use a valid config
     const result = await runCli([
       "run",
       "--config",
@@ -91,11 +55,6 @@ describe.concurrent("CLI Validation", () => {
     expect(result.stderr).toContain("--resume-from requires --continue-job")
   })
 
-  // ─────────────────────────────────────────────────────────────────────────
-  // Delegation Errors
-  // ─────────────────────────────────────────────────────────────────────────
-
-  /** Verifies clear error message when delegate expert doesn't exist */
   it("should fail with clear message for nonexistent delegate", async () => {
     const result = await runCli([
       "run",
diff --git a/e2e/perstack-cli/versioned-base.test.ts b/e2e/perstack-cli/versioned-base.test.ts
index 9c10bf9d..fb03c139 100644
--- a/e2e/perstack-cli/versioned-base.test.ts
+++ b/e2e/perstack-cli/versioned-base.test.ts
@@ -1,22 +1,12 @@
-/**
- * Versioned Base Skill E2E Tests
- *
- * Tests that pinning an explicit version for @perstack/base
- * falls back to StdioTransport (npx).
- *
- * TOML: e2e/experts/versioned-base.toml
- */
 import { describe, expect, it } from "bun:test"
 import { assertEventSequenceContains } from "../lib/assertions.js"
 import { filterEventsByType } from "../lib/event-parser.js"
 import { runCli, withEventParsing } from "../lib/runner.js"
 
 const VERSIONED_BASE_CONFIG = "./e2e/experts/versioned-base.toml"
-// LLM API calls + npx download require extended timeout
 const LLM_TIMEOUT = 180000
 
 describe.concurrent("Versioned Base Skill (StdioTransport Fallback)", () => {
-  /** Verifies versioned base skill uses StdioTransport (spawnDurationMs > 0). */
   it(
     "should use StdioTransport for versioned base (spawnDurationMs > 0)",
     async () => {
@@ -31,7 +21,6 @@ describe.concurrent("Versioned Base Skill (StdioTransport Fallback)", () => {
         true,
       )
 
-      // Check that skillConnected event for @perstack/base exists
       const skillConnectedEvents = filterEventsByType(result.events, "skillConnected")
       const baseSkillEvent = skillConnectedEvents.find((e) => {
         const event = e as { skillName?: string }
@@ -44,15 +33,12 @@ describe.concurrent("Versioned Base Skill (StdioTransport Fallback)", () => {
         spawnDurationMs?: number
         totalDurationMs?: number
       }
-      // StdioTransport spawns a process, so totalDurationMs should be > 0
-      // Note: spawnDurationMs might be 0 or small if npx is cached
       expect(baseEvent.totalDurationMs).toBeDefined()
       expect(baseEvent.totalDurationMs).toBeGreaterThan(0)
     },
     LLM_TIMEOUT,
   )
 
-  /** Verifies versioned base skill tools are available. */
   it(
     "should have picked tools available",
     async () => {
@@ -64,7 +50,6 @@ describe.concurrent("Versioned Base Skill (StdioTransport Fallback)", () => {
 
       expect(result.exitCode).toBe(0)
 
-      // Check that readTextFile was called (from pick list)
       const callToolsEvents = filterEventsByType(result.events, "callTools")
       const hasHealthCheck = callToolsEvents.some((e) => {
         const event = e as { toolCalls?: Array<{ toolName: string }> }