From 7ce1fa5854616823e0f714517430bed9be1f45df Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Sun, 15 Feb 2026 13:38:41 +0000
Subject: [PATCH] refactor: deduplicate E2E tests to reduce LLM API calls

Remove 5 duplicate E2E test cases across 3 deleted files and 2 trimmed
files, reducing ~4 LLM API calls per CI run while preserving coverage
by merging unique assertions into the canonical tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/e2e.yml                    |  2 -
 e2e/experts/limits.toml                      | 41 ------------
 e2e/perstack-cli/continue.test.ts            | 21 ++-----
 e2e/perstack-cli/limits.test.ts              | 65 --------------------
 e2e/perstack-cli/options.test.ts             |  8 ++-
 e2e/perstack-cli/published-expert.test.ts    | 11 ----
 e2e/perstack-cli/reasoning-budget.test.ts    | 37 -----------
 e2e/perstack-cli/runtime-interactive.test.ts | 38 ------------
 8 files changed, 10 insertions(+), 213 deletions(-)
 delete mode 100644 e2e/experts/limits.toml
 delete mode 100644 e2e/perstack-cli/limits.test.ts
 delete mode 100644 e2e/perstack-cli/runtime-interactive.test.ts

diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index 1bc06f63..9df7630e 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -30,12 +30,10 @@ jobs:
               e2e/perstack-cli/run.test.ts
               e2e/perstack-cli/options.test.ts
               e2e/perstack-cli/skills.test.ts
-              e2e/perstack-cli/limits.test.ts
               e2e/perstack-cli/lockfile.test.ts
               e2e/perstack-cli/providers.test.ts
               e2e/perstack-cli/error-handling.test.ts
               e2e/perstack-cli/interactive.test.ts
-              e2e/perstack-cli/runtime-interactive.test.ts
               e2e/perstack-cli/lazy-init.test.ts
               e2e/perstack-cli/bundled-base.test.ts
               e2e/perstack-cli/versioned-base.test.ts
diff --git a/e2e/experts/limits.toml b/e2e/experts/limits.toml
deleted file mode 100644
index 2f5090a4..00000000
--- a/e2e/experts/limits.toml
+++ /dev/null
@@ -1,41 +0,0 @@
-model = "claude-sonnet-4-5"
-
-[provider]
-providerName = "anthropic"
-
-envPath = [".env", ".env.local"]
-
-[experts."e2e-max-steps"]
-version = "1.0.0"
-description = "E2E test expert for maxSteps limit testing"
-instruction = """
-You are an E2E test expert that generates many steps to test maxSteps limit.
-
-When given any query:
-1. First use the think tool to think about the query
-2. Then use the think tool again to think more
-3. Keep using think tool multiple times
-4. After 5+ think calls, call attemptCompletion
-
-IMPORTANT: Make at least 5 separate tool calls before completing.
-"""
-
-[experts."e2e-max-steps".skills."@perstack/base"]
-type = "mcpStdioSkill"
-command = "npx"
-packageName = "@perstack/base"
-pick = ["attemptCompletion", "think"]
-
-[experts."e2e-slow-task"]
-version = "1.0.0"
-description = "E2E test expert for timeout testing"
-instruction = """
-You are an E2E test expert.
-Simply respond with a greeting and call attemptCompletion.
-"""
-
-[experts."e2e-slow-task".skills."@perstack/base"]
-type = "mcpStdioSkill"
-command = "npx"
-packageName = "@perstack/base"
-pick = ["attemptCompletion"]
diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts
index d0f3b171..4f64442f 100644
--- a/e2e/perstack-cli/continue.test.ts
+++ b/e2e/perstack-cli/continue.test.ts
@@ -136,33 +136,20 @@ describe.concurrent("Continue Job", () => {
 
   /**
    * Verifies checkpoint ID is captured for resume-from functionality.
+   * Also verifies run stops at interactive tool with correct event sequence.
    */
   it("should capture checkpoint ID for resume-from", async () => {
     const cmdResult = await runCli(runArgs("e2e-continue", "Test continue/resume functionality"), {
       timeout: LLM_TIMEOUT,
     })
     const result = withEventParsing(cmdResult)
+    expect(
+      assertEventSequenceContains(result.events, ["startRun", "stopRunByInteractiveTool"]).passed,
+    ).toBe(true)
     const stopEvents = filterEventsByType(result.events, "stopRunByInteractiveTool")
     expect(stopEvents.length).toBe(1)
     const checkpoint = (stopEvents[0] as { checkpoint?: { id?: string } }).checkpoint
     expect(checkpoint?.id).toBeDefined()
     expect(typeof checkpoint?.id).toBe("string")
   })
-
-  /**
-   * Verifies --resume-from requires --continue-job option.
-   */
-  it("should fail when --resume-from is used without --continue-job", async () => {
-    const result = await runCli([
-      "run",
-      "--config",
-      CONTINUE_CONFIG,
-      "--resume-from",
-      "checkpoint-123",
-      "e2e-continue",
-      "test",
-    ])
-    expect(result.exitCode).toBe(1)
-    expect(result.stderr).toContain("--resume-from requires --continue-job")
-  })
 })
diff --git a/e2e/perstack-cli/limits.test.ts b/e2e/perstack-cli/limits.test.ts
deleted file mode 100644
index 30d37c9b..00000000
--- a/e2e/perstack-cli/limits.test.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * Execution Limits E2E Tests
- *
- * Tests execution limit options in perstack:
- * - --max-steps: Maximum generation steps
- * - --max-retries: Maximum retry attempts
- *
- * TOML: e2e/experts/global-runtime.toml
- */
-import { describe, expect, it } from "vitest"
-import { assertEventSequenceContains } from "../lib/assertions.js"
-import { runCli, withEventParsing } from "../lib/runner.js"
-
-const GLOBAL_RUNTIME_CONFIG = "./e2e/experts/global-runtime.toml"
-// LLM API calls require extended timeout
-const LLM_TIMEOUT = 120000
-
-describe.concurrent("Execution Limits", () => {
-  /** Verifies --max-steps option is accepted and run completes. */
-  it(
-    "should accept --max-steps option and complete within limit",
-    async () => {
-      const cmdResult = await runCli(
-        [
-          "run",
-          "--config",
-          GLOBAL_RUNTIME_CONFIG,
-          "--max-steps",
-          "10",
-          "e2e-global-runtime",
-          "Say hello",
-        ],
-        { timeout: LLM_TIMEOUT },
-      )
-      const result = withEventParsing(cmdResult)
-      expect(result.exitCode).toBe(0)
-      expect(assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed).toBe(
-        true,
-      )
-    },
-    LLM_TIMEOUT,
-  )
-
-  /** Verifies --max-retries option is accepted. */
-  it(
-    "should accept --max-retries option",
-    async () => {
-      const cmdResult = await runCli(
-        [
-          "run",
-          "--config",
-          GLOBAL_RUNTIME_CONFIG,
-          "--max-retries",
-          "3",
-          "e2e-global-runtime",
-          "Say hello",
-        ],
-        { timeout: LLM_TIMEOUT },
-      )
-      const result = withEventParsing(cmdResult)
-      expect(result.exitCode).toBe(0)
-    },
-    LLM_TIMEOUT,
-  )
-})
diff --git a/e2e/perstack-cli/options.test.ts b/e2e/perstack-cli/options.test.ts
index dee28190..f22cbe74 100644
--- a/e2e/perstack-cli/options.test.ts
+++ b/e2e/perstack-cli/options.test.ts
@@ -59,11 +59,11 @@ describe.concurrent("CLI Options", () => {
     LLM_TIMEOUT,
   )
 
-  /** Verifies --max-steps option is accepted. */
+  /** Verifies --max-steps option is accepted and run completes. */
   it(
     "should accept --max-steps option",
     async () => {
-      const result = await runCli(
+      const cmdResult = await runCli(
         [
           "run",
           "--config",
@@ -75,7 +75,11 @@ describe.concurrent("CLI Options", () => {
         ],
         { timeout: LLM_TIMEOUT },
       )
+      const result = withEventParsing(cmdResult)
       expect(result.exitCode).toBe(0)
+      expect(assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed).toBe(
+        true,
+      )
     },
     LLM_TIMEOUT,
   )
diff --git a/e2e/perstack-cli/published-expert.test.ts b/e2e/perstack-cli/published-expert.test.ts
index 76b7802d..0292d311 100644
--- a/e2e/perstack-cli/published-expert.test.ts
+++ b/e2e/perstack-cli/published-expert.test.ts
@@ -4,18 +4,13 @@
  * Tests error handling for published expert resolution:
  * - Nonexistent published experts (e.g., @user/expert)
  * - Invalid expert key formats
- * - Failed delegation to nonexistent published experts
  *
  * These tests verify graceful error handling without LLM API calls
  * (errors occur before LLM generation starts).
- *
- * TOML: e2e/experts/error-handling.toml
  */
 import { describe, expect, it } from "vitest"
 import { runCli } from "../lib/runner.js"
 
-const CONFIG = "./e2e/experts/error-handling.toml"
-
 describe.concurrent("Published Expert", () => {
   /** Verifies error message for nonexistent @user/expert format */
   it("should fail gracefully for nonexistent published expert", async () => {
@@ -29,10 +24,4 @@ describe.concurrent("Published Expert", () => {
     const result = await runCli(["run", "@invalid", "test query"])
     expect(result.exitCode).toBe(1)
   })
-
-  /** Verifies error when expert tries to delegate to nonexistent expert */
-  it("should fail gracefully when delegating to nonexistent published expert", async () => {
-    const result = await runCli(["run", "--config", CONFIG, "e2e-invalid-delegate", "test"])
-    expect(result.exitCode).not.toBe(0)
-  })
 })
diff --git a/e2e/perstack-cli/reasoning-budget.test.ts b/e2e/perstack-cli/reasoning-budget.test.ts
index 0a92dc91..a30c06f6 100644
--- a/e2e/perstack-cli/reasoning-budget.test.ts
+++ b/e2e/perstack-cli/reasoning-budget.test.ts
@@ -115,43 +115,6 @@ describe("Reasoning Budget", () => {
       LLM_TIMEOUT,
     )
 
-    it(
-      "should emit streaming reasoning events",
-      async () => {
-        const expertKey = "e2e-reasoning-anthropic-medium"
-        const cmdResult = await runCli(
-          [
-            "run",
-            "--config",
-            REASONING_BUDGET_CONFIG,
-            expertKey,
-            "Calculate 3+3",
-            "--provider",
-            "anthropic",
-            "--model",
-            ANTHROPIC_MODEL,
-            "--reasoning-budget",
-            "medium",
-          ],
-          { timeout: LLM_TIMEOUT },
-        )
-        const result = withEventParsing(cmdResult)
-
-        expect(result.exitCode).toBe(0)
-
-        // Verify streaming events were emitted (renamed in state-machine-redesign)
-        const streamReasoningEvents = result.events.filter((e) => e.type === "streamReasoning")
-        expect(streamReasoningEvents.length).toBeGreaterThan(0)
-
-        // Verify completeStreamingReasoning is emitted at least once
-        const completeReasoningEvents = result.events.filter(
-          (e) => e.type === "completeStreamingReasoning",
-        )
-        expect(completeReasoningEvents.length).toBeGreaterThan(0)
-      },
-      LLM_TIMEOUT,
-    )
-
     it(
       "should produce more reasoning tokens with higher budget",
       async () => {
diff --git a/e2e/perstack-cli/runtime-interactive.test.ts b/e2e/perstack-cli/runtime-interactive.test.ts
deleted file mode 100644
index ef14bad9..00000000
--- a/e2e/perstack-cli/runtime-interactive.test.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Interactive Input E2E Tests (Runtime)
- *
- * Tests interactive tool handling:
- * - Stop at interactive tool (askUser)
- * - Checkpoint emission for resume
- *
- * TOML: e2e/experts/continue-resume.toml
- */
-import { describe, expect, it } from "vitest"
-import { assertEventSequenceContains } from "../lib/assertions.js"
-import { runCli, withEventParsing } from "../lib/runner.js"
-
-const CONTINUE_CONFIG = "./e2e/experts/continue-resume.toml"
-// LLM API calls require extended timeout
-const LLM_TIMEOUT = 180000
-
-describe.concurrent("Interactive Input", () => {
-  /** Verifies run stops at interactive tool and emits checkpoint. */
-  it(
-    "should stop at interactive tool and emit checkpoint",
-    async () => {
-      const cmdResult = await runCli(
-        ["run", "--config", CONTINUE_CONFIG, "e2e-continue", "Test continue/resume functionality"],
-        { timeout: LLM_TIMEOUT },
-      )
-      const result = withEventParsing(cmdResult)
-      // Note: callInteractiveTool was removed in state-machine-redesign
-      expect(
-        assertEventSequenceContains(result.events, ["startRun", "stopRunByInteractiveTool"]).passed,
-      ).toBe(true)
-      const stopEvent = result.events.find((e) => e.type === "stopRunByInteractiveTool")
-      expect(stopEvent).toBeDefined()
-      expect((stopEvent as { checkpoint?: { id?: string } }).checkpoint?.id).toBeDefined()
-    },
-    LLM_TIMEOUT,
-  )
-})