From 7ce1fa5854616823e0f714517430bed9be1f45df Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Sun, 15 Feb 2026 13:38:41 +0000 Subject: [PATCH] refactor: deduplicate E2E tests to reduce LLM API calls Remove 5 duplicate E2E test cases across 3 deleted files and 2 trimmed files, reducing ~4 LLM API calls per CI run while preserving coverage by merging unique assertions into the canonical tests. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/e2e.yml | 2 - e2e/experts/limits.toml | 41 ------------ e2e/perstack-cli/continue.test.ts | 21 ++----- e2e/perstack-cli/limits.test.ts | 65 -------------------- e2e/perstack-cli/options.test.ts | 8 ++- e2e/perstack-cli/published-expert.test.ts | 11 ---- e2e/perstack-cli/reasoning-budget.test.ts | 37 ----------- e2e/perstack-cli/runtime-interactive.test.ts | 38 ------------ 8 files changed, 10 insertions(+), 213 deletions(-) delete mode 100644 e2e/experts/limits.toml delete mode 100644 e2e/perstack-cli/limits.test.ts delete mode 100644 e2e/perstack-cli/runtime-interactive.test.ts diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 1bc06f63..9df7630e 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -30,12 +30,10 @@ jobs: e2e/perstack-cli/run.test.ts e2e/perstack-cli/options.test.ts e2e/perstack-cli/skills.test.ts - e2e/perstack-cli/limits.test.ts e2e/perstack-cli/lockfile.test.ts e2e/perstack-cli/providers.test.ts e2e/perstack-cli/error-handling.test.ts e2e/perstack-cli/interactive.test.ts - e2e/perstack-cli/runtime-interactive.test.ts e2e/perstack-cli/lazy-init.test.ts e2e/perstack-cli/bundled-base.test.ts e2e/perstack-cli/versioned-base.test.ts diff --git a/e2e/experts/limits.toml b/e2e/experts/limits.toml deleted file mode 100644 index 2f5090a4..00000000 --- a/e2e/experts/limits.toml +++ /dev/null @@ -1,41 +0,0 @@ -model = "claude-sonnet-4-5" - -[provider] -providerName = "anthropic" - -envPath = [".env", ".env.local"] - -[experts."e2e-max-steps"] -version = "1.0.0" -description = "E2E test expert for maxSteps limit testing" -instruction = """ -You are an E2E test expert that generates many steps to test maxSteps limit. - -When given any query: -1. First use the think tool to think about the query -2. Then use the think tool again to think more -3. Keep using think tool multiple times -4. After 5+ think calls, call attemptCompletion - -IMPORTANT: Make at least 5 separate tool calls before completing. -""" - -[experts."e2e-max-steps".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -pick = ["attemptCompletion", "think"] - -[experts."e2e-slow-task"] -version = "1.0.0" -description = "E2E test expert for timeout testing" -instruction = """ -You are an E2E test expert. -Simply respond with a greeting and call attemptCompletion. -""" - -[experts."e2e-slow-task".skills."@perstack/base"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@perstack/base" -pick = ["attemptCompletion"] diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts index d0f3b171..4f64442f 100644 --- a/e2e/perstack-cli/continue.test.ts +++ b/e2e/perstack-cli/continue.test.ts @@ -136,33 +136,20 @@ describe.concurrent("Continue Job", () => { /** * Verifies checkpoint ID is captured for resume-from functionality. + * Also verifies run stops at interactive tool with correct event sequence. */ it("should capture checkpoint ID for resume-from", async () => { const cmdResult = await runCli(runArgs("e2e-continue", "Test continue/resume functionality"), { timeout: LLM_TIMEOUT, }) const result = withEventParsing(cmdResult) + expect( + assertEventSequenceContains(result.events, ["startRun", "stopRunByInteractiveTool"]).passed, + ).toBe(true) const stopEvents = filterEventsByType(result.events, "stopRunByInteractiveTool") expect(stopEvents.length).toBe(1) const checkpoint = (stopEvents[0] as { checkpoint?: { id?: string } }).checkpoint expect(checkpoint?.id).toBeDefined() expect(typeof checkpoint?.id).toBe("string") }) - - /** - * Verifies --resume-from requires --continue-job option. - */ - it("should fail when --resume-from is used without --continue-job", async () => { - const result = await runCli([ - "run", - "--config", - CONTINUE_CONFIG, - "--resume-from", - "checkpoint-123", - "e2e-continue", - "test", - ]) - expect(result.exitCode).toBe(1) - expect(result.stderr).toContain("--resume-from requires --continue-job") - }) }) diff --git a/e2e/perstack-cli/limits.test.ts b/e2e/perstack-cli/limits.test.ts deleted file mode 100644 index 30d37c9b..00000000 --- a/e2e/perstack-cli/limits.test.ts +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Execution Limits E2E Tests - * - * Tests execution limit options in perstack: - * - --max-steps: Maximum generation steps - * - --max-retries: Maximum retry attempts - * - * TOML: e2e/experts/global-runtime.toml - */ -import { describe, expect, it } from "vitest" -import { assertEventSequenceContains } from "../lib/assertions.js" -import { runCli, withEventParsing } from "../lib/runner.js" - -const GLOBAL_RUNTIME_CONFIG = "./e2e/experts/global-runtime.toml" -// LLM API calls require extended timeout -const LLM_TIMEOUT = 120000 - -describe.concurrent("Execution Limits", () => { - /** Verifies --max-steps option is accepted and run completes. */ - it( - "should accept --max-steps option and complete within limit", - async () => { - const cmdResult = await runCli( - [ - "run", - "--config", - GLOBAL_RUNTIME_CONFIG, - "--max-steps", - "10", - "e2e-global-runtime", - "Say hello", - ], - { timeout: LLM_TIMEOUT }, - ) - const result = withEventParsing(cmdResult) - expect(result.exitCode).toBe(0) - expect(assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed).toBe( - true, - ) - }, - LLM_TIMEOUT, - ) - - /** Verifies --max-retries option is accepted. */ - it( - "should accept --max-retries option", - async () => { - const cmdResult = await runCli( - [ - "run", - "--config", - GLOBAL_RUNTIME_CONFIG, - "--max-retries", - "3", - "e2e-global-runtime", - "Say hello", - ], - { timeout: LLM_TIMEOUT }, - ) - const result = withEventParsing(cmdResult) - expect(result.exitCode).toBe(0) - }, - LLM_TIMEOUT, - ) -}) diff --git a/e2e/perstack-cli/options.test.ts b/e2e/perstack-cli/options.test.ts index dee28190..f22cbe74 100644 --- a/e2e/perstack-cli/options.test.ts +++ b/e2e/perstack-cli/options.test.ts @@ -59,11 +59,11 @@ describe.concurrent("CLI Options", () => { LLM_TIMEOUT, ) - /** Verifies --max-steps option is accepted. */ + /** Verifies --max-steps option is accepted and run completes. */ it( "should accept --max-steps option", async () => { - const result = await runCli( + const cmdResult = await runCli( [ "run", "--config", @@ -75,7 +75,11 @@ describe.concurrent("CLI Options", () => { ], { timeout: LLM_TIMEOUT }, ) + const result = withEventParsing(cmdResult) expect(result.exitCode).toBe(0) + expect(assertEventSequenceContains(result.events, ["startRun", "completeRun"]).passed).toBe( + true, + ) }, LLM_TIMEOUT, ) diff --git a/e2e/perstack-cli/published-expert.test.ts b/e2e/perstack-cli/published-expert.test.ts index 76b7802d..0292d311 100644 --- a/e2e/perstack-cli/published-expert.test.ts +++ b/e2e/perstack-cli/published-expert.test.ts @@ -4,18 +4,13 @@ * Tests error handling for published expert resolution: * - Nonexistent published experts (e.g., @user/expert) * - Invalid expert key formats - * - Failed delegation to nonexistent published experts * * These tests verify graceful error handling without LLM API calls * (errors occur before LLM generation starts). - * - * TOML: e2e/experts/error-handling.toml */ import { describe, expect, it } from "vitest" import { runCli } from "../lib/runner.js" -const CONFIG = "./e2e/experts/error-handling.toml" - describe.concurrent("Published Expert", () => { /** Verifies error message for nonexistent @user/expert format */ it("should fail gracefully for nonexistent published expert", async () => { @@ -29,10 +24,4 @@ describe.concurrent("Published Expert", () => { const result = await runCli(["run", "@invalid", "test query"]) expect(result.exitCode).toBe(1) }) - - /** Verifies error when expert tries to delegate to nonexistent expert */ - it("should fail gracefully when delegating to nonexistent published expert", async () => { - const result = await runCli(["run", "--config", CONFIG, "e2e-invalid-delegate", "test"]) - expect(result.exitCode).not.toBe(0) - }) }) diff --git a/e2e/perstack-cli/reasoning-budget.test.ts b/e2e/perstack-cli/reasoning-budget.test.ts index 0a92dc91..a30c06f6 100644 --- a/e2e/perstack-cli/reasoning-budget.test.ts +++ b/e2e/perstack-cli/reasoning-budget.test.ts @@ -115,43 +115,6 @@ describe("Reasoning Budget", () => { LLM_TIMEOUT, ) - it( - "should emit streaming reasoning events", - async () => { - const expertKey = "e2e-reasoning-anthropic-medium" - const cmdResult = await runCli( - [ - "run", - "--config", - REASONING_BUDGET_CONFIG, - expertKey, - "Calculate 3+3", - "--provider", - "anthropic", - "--model", - ANTHROPIC_MODEL, - "--reasoning-budget", - "medium", - ], - { timeout: LLM_TIMEOUT }, - ) - const result = withEventParsing(cmdResult) - - expect(result.exitCode).toBe(0) - - // Verify streaming events were emitted (renamed in state-machine-redesign) - const streamReasoningEvents = result.events.filter((e) => e.type === "streamReasoning") - expect(streamReasoningEvents.length).toBeGreaterThan(0) - - // Verify completeStreamingReasoning is emitted at least once - const completeReasoningEvents = result.events.filter( - (e) => e.type === "completeStreamingReasoning", - ) - expect(completeReasoningEvents.length).toBeGreaterThan(0) - }, - LLM_TIMEOUT, - ) - it( "should produce more reasoning tokens with higher budget", async () => { diff --git a/e2e/perstack-cli/runtime-interactive.test.ts b/e2e/perstack-cli/runtime-interactive.test.ts deleted file mode 100644 index ef14bad9..00000000 --- a/e2e/perstack-cli/runtime-interactive.test.ts +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Interactive Input E2E Tests (Runtime) - * - * Tests interactive tool handling: - * - Stop at interactive tool (askUser) - * - Checkpoint emission for resume - * - * TOML: e2e/experts/continue-resume.toml - */ -import { describe, expect, it } from "vitest" -import { assertEventSequenceContains } from "../lib/assertions.js" -import { runCli, withEventParsing } from "../lib/runner.js" - -const CONTINUE_CONFIG = "./e2e/experts/continue-resume.toml" -// LLM API calls require extended timeout -const LLM_TIMEOUT = 180000 - -describe.concurrent("Interactive Input", () => { - /** Verifies run stops at interactive tool and emits checkpoint. */ - it( - "should stop at interactive tool and emit checkpoint", - async () => { - const cmdResult = await runCli( - ["run", "--config", CONTINUE_CONFIG, "e2e-continue", "Test continue/resume functionality"], - { timeout: LLM_TIMEOUT }, - ) - const result = withEventParsing(cmdResult) - // Note: callInteractiveTool was removed in state-machine-redesign - expect( - assertEventSequenceContains(result.events, ["startRun", "stopRunByInteractiveTool"]).passed, - ).toBe(true) - const stopEvent = result.events.find((e) => e.type === "stopRunByInteractiveTool") - expect(stopEvent).toBeDefined() - expect((stopEvent as { checkpoint?: { id?: string } }).checkpoint?.id).toBeDefined() - }, - LLM_TIMEOUT, - ) -})