From 1fdb39c822ec85ae216f957bd3926b66796af34d Mon Sep 17 00:00:00 2001
From: Andrea Nodari <andrea@entire.io>
Date: Tue, 24 Mar 2026 15:48:43 +0100
Subject: [PATCH 1/2] Add external-agents-tests

---
 .claude/skills/entire-external-agent/SKILL.md | 126 ++--
 .../skills/entire-external-agent/implement.md | 299 +++------
 .../entire-external-agent/write-tests.md      | 325 +++-------
 .github/workflows/protocol-compliance.yml     |  65 ++
 AGENTS.md                                     |  11 +-
 Makefile                                      |  26 +-
 README.md                                     |  52 +-
 agents/entire-agent-kiro/README.md            |  30 +-
 .../kiro/.entire/tmp/kiro-tool-calls.jsonl    |   6 +
 e2e/README.md                                 |  21 +-
 e2e/fixtures.go                               |  50 --
 e2e/harness.go                                |  88 ---
 e2e/kiro/fixtures_test.go                     | 105 ---
 e2e/kiro/kiro_test.go                         | 613 ------------------
 e2e/kiro/setup_test.go                        |  37 --
 e2e/kiro/testenv_test.go                      |  26 -
 e2e/lifecycle_test.go                         |  43 +-
 e2e/testenv.go                                | 145 -----
 18 files changed, 398 insertions(+), 1670 deletions(-)
 create mode 100644 .github/workflows/protocol-compliance.yml
 create mode 100644 agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl
 delete mode 100644 e2e/fixtures.go
 delete mode 100644 e2e/harness.go
 delete mode 100644 e2e/kiro/fixtures_test.go
 delete mode 100644 e2e/kiro/kiro_test.go
 delete mode 100644 e2e/kiro/setup_test.go
 delete mode 100644 e2e/kiro/testenv_test.go
 delete mode 100644 e2e/testenv.go

diff --git a/.claude/skills/entire-external-agent/SKILL.md b/.claude/skills/entire-external-agent/SKILL.md
index 7bd84ea..883c26a 100644
--- a/.claude/skills/entire-external-agent/SKILL.md
+++ b/.claude/skills/entire-external-agent/SKILL.md
@@ -2,101 +2,111 @@
 name: entire-external-agent
 description: >
   Run all three external agent binary phases sequentially: research, write-tests,
-  and implement using E2E-first TDD (unit tests written last).
-  Accepts an optional argument to run a single phase: research, write-tests, or implement.
-  Usage: /entire-external-agent [phase] — omit phase to run full pipeline.
-  Use when the user says "build external agent", "create agent binary",
-  "external agent plugin", or wants to run the full pipeline end-to-end.
+  and implement using black-box-first TDD across protocol compliance, lifecycle
+  integration, and unit tests. Accepts an optional argument to run a single phase:
+  research, write-tests, or implement.
 ---
 
 # External Agent Binary — Full Pipeline
 
-Build a standalone external agent binary that implements the Entire CLI's external agent protocol using E2E-first TDD. Parameters are collected once and reused across all phases.
+Build a standalone external agent binary that implements the Entire CLI external agent protocol.
+
+The current test split is:
+
+1. **Protocol compliance** lives in `external-agents-tests`.
+2. **Lifecycle integration** lives in this repo's `e2e/` harness.
+3. **Unit tests** live in each agent module.
+
+Do not add new generic protocol tests under this repo's `e2e/` directory.
 
 ## Parameters
 
-Collect these before starting (ask the user if not provided):
+Collect these before starting if the user did not provide them:
 
-| Parameter | Description | How to derive |
-|-----------|-------------|---------------|
-| `AGENT_NAME` | Human-readable name (e.g., "Windsurf") | User provides |
-| `AGENT_SLUG` | Binary suffix: `entire-agent-<AGENT_SLUG>` (kebab-case) | Kebab-case of agent name |
-| `LANGUAGE` | Implementation language (Go, Python, TypeScript, Rust) | User provides; default Go |
-| `PROJECT_DIR` | Where to create the project | Default: `./entire-agent-<AGENT_SLUG>` |
-| `CAPABILITIES` | Which optional capabilities to implement | Derived from research phase |
-| `ENTIRE_BIN` | Path to the Entire CLI binary | Default: `entire` from PATH, or `E2E_ENTIRE_BIN` env |
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `AGENT_NAME` | Human-readable name (for example, `Windsurf`) | User-provided |
+| `AGENT_SLUG` | Binary suffix for `entire-agent-<slug>` | Kebab-case of `AGENT_NAME` |
+| `LANGUAGE` | Implementation language | `Go` |
+| `PROJECT_DIR` | Agent directory to create or edit | `./agents/entire-agent-<slug>` |
+| `ENTIRE_BIN` | Path to the Entire CLI binary for lifecycle testing | `entire` from `PATH` or `E2E_ENTIRE_BIN` |
 
 ## Phase Selection
 
-This skill accepts an optional argument to run a single phase:
+- `/entire-external-agent research` runs only Phase 1.
+- `/entire-external-agent write-tests` runs only Phase 2.
+- `/entire-external-agent implement` runs only Phase 3.
+- `/entire-external-agent` runs all three phases in order.
 
-- `/entire-external-agent research` — Run only Phase 1 (research)
-- `/entire-external-agent write-tests` — Run only Phase 2 (scaffold + E2E tests)
-- `/entire-external-agent implement` — Run only Phase 3 (E2E-first TDD implementation)
-- `/entire-external-agent` (no argument) — Run all three phases sequentially
-
-If an argument is provided, skip directly to that phase's procedure. Parameters and prerequisites still apply — collect them before starting.
+If a single phase is requested, still collect the shared parameters first.
 
 ## Protocol Spec
 
 Use the protocol specification at:
 `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md`
 
-If a user provides a different protocol spec location explicitly, use that instead and pass it to each phase as `PROTOCOL_SPEC_LOCATION`.
-
-## Core Rule: E2E-First TDD
+If the user gives a different spec location explicitly, use that instead.
 
-This skill enforces strict E2E-first test-driven development. The rules:
+## Core Rule: Black-Box-First TDD
 
-1. **E2E tests are the spec.** The `e2e/` test harness defines what "working" means. The agent binary must pass all E2E tests to be considered complete.
-2. **Run E2E tests at every step.** Each implementation tier starts by running the E2E test and watching it fail. You implement until it passes. No exceptions.
-3. **Unit tests are written last.** After all E2E tiers pass, you write unit tests using real data collected from E2E runs as golden fixtures.
-4. **If you didn't watch it fail, you don't know if it tests the right thing.** Never write a test you haven't seen fail first.
-5. **Minimum viable fix.** At each E2E failure, implement only the code needed to fix that failure. Don't anticipate future tiers.
+1. **Protocol compliance is the contract.** The binary must pass the shared `external-agents-tests` suite.
+2. **Lifecycle tests prove real integration.** The repo-local `e2e/` harness covers the Entire + real-agent workflow and stays separate from generic protocol checks.
+3. **Unit tests are written last.** After protocol and lifecycle behavior are working, add unit tests to lock down parsing, hooks, and file handling.
+4. **Watch failures before fixing them.** Run the failing test first so you know what behavior the code must satisfy.
+5. **Keep the fix scoped.** Implement only the behavior needed for the current failure, then rerun.
 
 ## Pipeline
 
-Run these three phases in order. Each phase builds on the previous phase's output.
-
 ### Phase 1: Research
 
-Discover the target agent's hook mechanism, transcript format, session management, and configuration. Map native concepts to protocol subcommands. Produces `<PROJECT_DIR>/AGENT.md` with protocol mapping and E2E prerequisites.
-
-Use the Read tool to read the file `.claude/skills/entire-external-agent/research.md` and follow the procedure it contains.
+Discover the target agent's hook mechanism, transcript format, session layout, CLI entrypoints, and lifecycle prerequisites. Produce `<PROJECT_DIR>/AGENT.md` with the protocol mapping and any real-CLI requirements needed for lifecycle tests.
 
-**Expected output:** `<PROJECT_DIR>/AGENT.md` — agent research one-pager with protocol mapping and E2E test prerequisites.
+Use `.claude/skills/entire-external-agent/research.md`.
 
-**Commit gate:** After the research phase completes, create a git commit for the resulting files.
+Expected output:
+- `<PROJECT_DIR>/AGENT.md`
 
-**Gate:** If the agent lacks any mechanism for lifecycle hooks or session management, discuss with the user before proceeding. Some agents may only support a subset of the protocol.
+### Phase 2: Write Tests
 
-### Phase 2: Write-Tests
+Scaffold the binary and the test surfaces you will need:
 
-Scaffold the binary with compilable stubs and create a self-contained `e2e/` test harness in the project directory. The harness exercises the full human workflow: `entire enable`, real agent invocation, hook firing, checkpoint validation. Tests are expected to fail at this stage — they define the spec.
+- agent module structure under `<PROJECT_DIR>`
+- protocol compliance expectations compatible with `external-agents-tests`
+- lifecycle adapter wiring in this repo's `e2e/` harness
+- optional compliance fixtures if the agent benefits from stronger black-box detect or transcript assertions
 
-Use the Read tool to read the file `.claude/skills/entire-external-agent/write-tests.md` and follow the procedure it contains.
+Use `.claude/skills/entire-external-agent/write-tests.md`.
 
-**Expected output:** Complete project directory at `<PROJECT_DIR>` with compiled binary stubs and `e2e/` test harness that compiles but fails.
+Expected output:
+- compiling binary scaffold
+- any needed lifecycle adapter files under `e2e/agents/`
+- optional fixture file paths documented in `<PROJECT_DIR>/AGENT.md` or `README.md`
 
-**Commit gate:** After the scaffold compiles and the e2e harness compiles (`cd e2e && go test -c -tags=e2e`), create a git commit.
+### Phase 3: Implement
 
-### Phase 3: Implement (E2E-First, Unit Tests Last)
+Implement until:
 
-Build the real agent binary using strict E2E-first TDD. E2E tests drive development at every step — run each tier, watch it fail, implement the minimum fix, repeat. Unit tests are written only after all E2E tiers pass, using real data from E2E runs as golden fixtures.
+- the binary passes protocol compliance
+- lifecycle tests pass when the required CLIs are available
+- unit tests cover the important internal behaviors
 
-Use the Read tool to read the file `.claude/skills/entire-external-agent/implement.md` and follow the procedure it contains.
+Use `.claude/skills/entire-external-agent/implement.md`.
 
-**Expected output:** Fully implemented binary where all E2E tests pass and unit tests lock in behavior.
-
-**Note:** `AGENT.md` is a living document — Phases 2 and 3 update it when they discover new information during testing or implementation.
+Expected output:
+- fully working binary
+- passing unit tests
+- passing protocol compliance
+- passing lifecycle integration where dependencies are available
 
 ## Final Summary
 
-After all three phases, summarize:
-- Agent name and binary name
-- Language used
-- Capabilities declared
-- E2E test results (all tiers passing)
-- Unit test coverage
-- Installation instructions (`go install`, `pip install`, etc.)
-- Any remaining gaps or TODOs
+At the end, summarize:
+
+- agent name and binary name
+- implementation language
+- declared capabilities
+- protocol compliance status
+- lifecycle test status
+- unit test coverage
+- installation instructions
+- any remaining gaps
diff --git a/.claude/skills/entire-external-agent/implement.md b/.claude/skills/entire-external-agent/implement.md
index 77762f1..dd8d412 100644
--- a/.claude/skills/entire-external-agent/implement.md
+++ b/.claude/skills/entire-external-agent/implement.md
@@ -1,270 +1,137 @@
 ---
 name: implement
 description: >
-  Phase 3: Build the external agent binary using strict E2E-first TDD.
-  Use /entire-external-agent implement or /entire-external-agent:implement
-  when you only need the implementation phase.
+  Phase 3: Implement the external agent binary using protocol compliance first,
+  lifecycle integration second, and unit tests last.
 ---
 
 # Implement Procedure
 
-Build the external agent binary using strict E2E-first TDD. E2E tests drive development at every step — run each tier, watch it fail, implement the minimum fix, repeat. Unit tests are written only after all E2E tiers pass, using real data from E2E runs as golden fixtures.
+Implement the agent with black-box-first TDD.
 
-> **Warning:** This phase involves iterative E2E test cycles with real agent invocations. Expect this to take 2-4 hours depending on agent complexity and API response times.
+The order is:
+
+1. protocol compliance against `external-agents-tests`
+2. lifecycle integration in this repo's `e2e/` harness
+3. unit tests in the agent module
 
 ## Prerequisites
 
 Ensure the following are available:
-- `AGENT_NAME`, `AGENT_SLUG`, `LANGUAGE`, `PROJECT_DIR` — from orchestrator or user
-- `<PROJECT_DIR>/AGENT.md` — research one-pager with E2E test prerequisites
-- Scaffolded project that compiles and responds to `info`
-- E2E test harness at `<PROJECT_DIR>/e2e/` that compiles
-
-## Core Principle: E2E-First TDD
-
-1. **E2E tests are the spec.** The `e2e/` test harness defines what "working" means. You implement until tests pass.
-2. **Watch it fail first.** Every E2E tier starts by running the test and observing the failure. If you haven't seen the failure, you don't understand what needs fixing.
-3. **Minimum viable fix.** At each failure, implement only the code needed to make that specific assertion pass. Don't anticipate future tiers.
-4. **No unit tests during Steps 3-9.** Unit tests are written in Step 11 after all E2E tiers pass, using real data from E2E runs as golden fixtures.
-5. **Format and lint, don't unit test.** Between E2E tiers, run format/lint to keep code clean. No unit tests between tiers.
-6. **If you didn't watch it fail, you don't know if it tests the right thing.**
 
-**Do NOT write unit tests during Steps 3-9.** All unit test writing is consolidated in Step 11.
+- `AGENT_NAME`
+- `AGENT_SLUG`
+- `PROJECT_DIR`
+- `<PROJECT_DIR>/AGENT.md`
+- compiling scaffold from the write-tests phase
 
-## Procedure
+## Step 1: Read Before Coding
 
-### Step 1: Read Protocol Spec + AGENT.md
+Read:
 
-Read these files before writing any code:
+1. the protocol spec
+2. the current agent code
+3. `<PROJECT_DIR>/AGENT.md`
+4. the lifecycle adapter in `e2e/agents/<slug>.go` if it already exists
 
-1. Read `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md` — full protocol spec
-2. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/types.go` — JSON response types
-3. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/external.go` — how the CLI calls each subcommand
-4. Read `<PROJECT_DIR>/AGENT.md` — agent-specific hook mechanism, transcript format, config structure, E2E prerequisites
+## Step 2: Establish the First Failing Compliance Run
 
-### Step 2: Verify Baseline
+Build the binary and run the shared compliance suite first.
 
-Build the binary and run the first E2E test to confirm it fails for the right reason (agent behavior, not harness bug).
+If `external-agents-tests` is checked out beside this repo:
 
 ```bash
-make build && make install
-make test:e2e:run TEST=TestHookInstallAndDetect
-```
-
-**Expected:** Test fails because the agent binary returns stub data. If the test fails for a different reason (harness compilation error, missing binary, broken assertion), fix the harness first.
-
-### Step 3: E2E Tier 1 — `TestHookInstallAndDetect`
-
-**What it exercises:**
-- `detect` — agent binary detection
-- `install-hooks` — hook installation via `entire enable`
-- `are-hooks-installed` — hook presence detection
-- Basic binary invocation and JSON response format
-
-**Cycle:**
-
-1. Run: `make test:e2e:run TEST=TestHookInstallAndDetect`
-2. **Watch it FAIL** — read the failure output carefully
-3. Read the failure — what subcommand/behavior is missing?
-4. Implement the MINIMUM code to fix the failure
-5. Re-run until PASS
-6. `make build`
-7. Commit
-
-### Step 4: E2E Tier 2 — `TestSingleSessionManualCommit`
-
-The foundational test. Exercises the full agent lifecycle: start session → agent prompt → agent produces files → user commits → checkpoint created.
-
-**What it exercises:**
-- `parse-hook` for all event types (session start, turn start, turn end, session end)
-- `get-session-id` — session ID extraction from hook input
-- `get-session-dir` / `resolve-session-file` — finding session/transcript files
-- `read-session` / `write-session` — session data management
-- `read-transcript` / `chunk-transcript` / `reassemble-transcript` — transcript handling
-
-**Cycle:**
-
-1. Run: `make test:e2e:run TEST=TestSingleSessionManualCommit`
-2. **Watch it FAIL** — read the failure output carefully
-3. Read the failure — which subcommand returns wrong data or errors?
-4. Implement the MINIMUM code to fix the failure
-5. Re-run until PASS
-6. `make build`
-7. Commit
-
-### Step 5: E2E Tier 3 — `TestCheckpointDeepValidation`
-
-Validates transcript quality: JSONL validity, content hash correctness, prompt extraction accuracy.
-
-**What it exercises:**
-- `get-transcript-position` — transcript file size/position
-- `extract-modified-files` — parsing transcript for file operations
-- `extract-prompts` — parsing transcript for user messages
-- `extract-summary` — parsing transcript for AI summaries
-
-**Cycle:**
-
-1. Run: `make test:e2e:run TEST=TestCheckpointDeepValidation`
-2. **Watch it FAIL** — this test often exposes subtle transcript formatting bugs
-3. Implement the MINIMUM fix
-4. Re-run until PASS
-5. `make build`
-6. Commit
-
-### Step 6: E2E Tier 4 — `TestMultipleTurnsManualCommit`
-
-Multi-turn session management. Two sequential prompts, one commit.
-
-**What it exercises:**
-- Session persistence across multiple prompts
-- Transcript accumulation across turns
-- Checkpoint capturing both turns
-
-**Cycle:**
-
-1. Run: `make test:e2e:run TEST=TestMultipleTurnsManualCommit`
-2. **Watch it FAIL**
-3. Implement the MINIMUM fix
-4. Re-run until PASS
-5. `make build`
-6. Commit
+cd <PROJECT_DIR>
+make build
 
-### Step 7: E2E Tier 5 — `TestSessionMetadata`
-
-Agent identification in checkpoint metadata.
-
-**What it exercises:**
-- Session metadata has correct agent name
-- Session ID is properly stored
-- Agent type field is populated
-
-**Cycle:**
-
-1. Run: `make test:e2e:run TEST=TestSessionMetadata`
-2. **Watch it FAIL**
-3. Implement the MINIMUM fix
-4. Re-run until PASS
-5. `make build`
-6. Commit
-
-### Step 8: E2E Tier 6 — `TestInteractiveSession`
-
-Tmux-based interactive mode. **Skip if the agent doesn't support interactive mode** (check AGENT.md's E2E prerequisites).
-
-**What it exercises:**
-- Interactive session launch
-- Multi-step prompting within a session
-- Session end on exit
+cd ../../external-agents-tests
+AGENT_BINARY=/abs/path/to/entire-agent-<slug> go test -v -count=1 ./...
+```
 
-**Cycle:**
+If that sibling repo is not available locally, use the GitHub Action in CI as the compliance source of truth and keep local validation focused on the binary plus unit tests.
 
-1. Check AGENT.md — if interactive mode is not supported, skip this tier
-2. Run: `make test:e2e:run TEST=TestInteractiveSession`
-3. **Watch it FAIL**
-4. Implement the MINIMUM fix
-5. Re-run until PASS
-6. `make build`
-7. Commit
+Do not start by adding new protocol tests to this repo.
 
-### Step 9: E2E Tier 7 — `TestRewind`
+## Step 3: Fix Compliance Failures Incrementally
 
-Rewind functionality after a checkpoint.
+For each failing compliance assertion:
 
-**What it exercises:**
-- Rewind command works on checkpoints created by this agent
-- State is properly restored after rewind
+1. rerun the failing test
+2. inspect the exact subcommand behavior
+3. implement the minimum fix
+4. rerun until it passes
 
-**Cycle:**
+Areas the compliance suite typically drives:
 
-1. Run: `make test:e2e:run TEST=TestRewind`
-2. **Watch it FAIL**
-3. Implement the MINIMUM fix
-4. Re-run until PASS
-5. `make build`
-6. Commit
+- `info` and capability declarations
+- `detect`
+- session helpers
+- transcript chunking and reassembly
+- session read/write behavior
+- hooks capability
+- transcript analysis capability
 
-### Step 10: Full E2E Suite Pass
+## Step 4: Run Lifecycle Tests
 
-Run the complete E2E suite to catch any regressions:
+Once protocol compliance is in good shape, validate the real integration path:
 
 ```bash
-make test:e2e
+cd /path/to/repo
+make test-e2e AGENT=<slug>
 ```
 
-This runs every test, not just the ones targeted in Steps 3-9.
-
-**Important:** If some tests fail when running the full suite but pass individually, it may be a timing issue. Re-run each failing test individually before investigating:
-
-```bash
-make test:e2e:run TEST=TestFailingTestName
-```
+These tests require:
 
-Fix any real failures before proceeding. The same cycle applies: read the failure, implement the minimum fix, re-run.
+- the Entire CLI
+- the real agent CLI on `PATH`
+- `tmux` for interactive scenarios
 
-All E2E tests must pass before writing unit tests.
+If those dependencies are not available, note the gap explicitly and continue with the protocol and unit-test work.
 
-### Step 11: Write Unit Tests
+## Step 5: Fix Lifecycle Failures Incrementally
 
-Now that all E2E tiers pass, write unit tests to lock in behavior. Use real data from E2E runs (captured JSON payloads, transcript snippets, config file contents) as golden fixtures.
+Use lifecycle failures to refine:
 
-**Test files to create:**
+- the agent CLI adapter in `e2e/agents/<slug>.go`
+- prompt execution details
+- hook installation behavior after `entire enable`
+- rewind and checkpoint interactions
+- interactive session handling
 
-1. **`cmd/hooks_test.go`** (or language equivalent) — Test `install-hooks` (creates config, idempotent), `uninstall-hooks` (removes hooks), `are-hooks-installed` (detects presence). Use a temp directory to avoid touching real config.
+Keep protocol fixes in the agent binary itself. Keep real-CLI orchestration fixes in the lifecycle adapter.
 
-2. **`cmd/lifecycle_test.go`** — Test `parse-hook` for all event types. Use actual JSON payloads from E2E runs or AGENT.md examples. Test every event type mapping, null returns for unknown hook names, empty input, and malformed JSON.
+## Step 6: Add Unit Tests Last
 
-3. **`cmd/session_test.go`** — Test session subcommands (`get-session-id`, `read-session`, `write-session`) with actual JSON payloads.
+After the behavior is working end to end, add unit tests in the agent module for:
 
-4. **`cmd/transcript_test.go`** — Test `read-transcript`, `chunk-transcript`, `reassemble-transcript` with sample data. Test transcript analyzer methods if implemented. Use transcript snippets from E2E runs as golden test data.
+- hook parsing
+- transcript parsing
+- config file read-modify-write behavior
+- session file handling
+- protocol handlers
 
-5. **`cmd/info_test.go`** — Test `info` returns valid JSON with correct fields and `detect` returns expected results.
+Prefer using real payloads or fixtures captured during compliance and lifecycle runs.
 
-**Where to find golden test data:**
+## Step 7: Final Validation
 
-- E2E artifact directories contain captured transcripts, hook payloads, and config files
-- `AGENT.md` has example JSON payloads in the "Hook input" sections
-- The agent's actual config file format from E2E test repos
-
-Run: format + lint + test
-
-**Commit:** Create a git commit for the unit tests.
-
-### Step 12: Final Validation
-
-Run the complete validation:
+Run:
 
 ```bash
-make build     # Build
-make test      # Unit tests
-make test:e2e  # E2E tests
-```
+cd <PROJECT_DIR>
+make test
 
-Summarize:
-- All E2E tiers passing (list which tests pass)
-- Unit test coverage (number of test functions, what they cover)
-- Any gaps or TODOs remaining
-- Commands to build and install the binary
-
-## Standing Instructions
-
-- **Check AGENT.md first** for agent-specific information. If AGENT.md doesn't cover what you need, search external docs — but always update AGENT.md with anything new you discover.
-- **Preserve unknown config keys** when modifying agent configuration files (read-modify-write pattern).
-- **Validate JSON output** after each implementation — malformed JSON will cause the CLI to skip the agent.
-- **Handle missing files gracefully** — return appropriate error messages to stderr rather than panicking.
-
-## E2E Debugging Protocol
+cd /path/to/repo
+make test-e2e AGENT=<slug>
+```
 
-At every E2E failure, follow this protocol:
+If the local `external-agents-tests` checkout is available, rerun the full compliance suite as the final black-box pass.
 
-1. **Read the test output** — the assertion message often tells you exactly what's wrong
-2. **Check the agent binary output** — run the failing subcommand manually with the same args/stdin
-3. **Check Entire CLI logs** — look in the test repo's `.entire/logs/` directory
-4. **Implement the minimum fix** — don't over-engineer; fix only what the test demands
-5. **Re-run the failing test** — not the whole suite, just the one test
+## Output Checklist
 
-## Commit Strategy
+Summarize:
 
-After completing each tier:
-1. Build and verify the binary
-2. Run format and lint
-3. Create a git commit describing which tier was completed
+- compliance status
+- lifecycle status
+- unit-test status
+- any dependencies you could not satisfy locally
+- remaining gaps or TODOs
diff --git a/.claude/skills/entire-external-agent/write-tests.md b/.claude/skills/entire-external-agent/write-tests.md
index 8d985cb..8332db1 100644
--- a/.claude/skills/entire-external-agent/write-tests.md
+++ b/.claude/skills/entire-external-agent/write-tests.md
@@ -1,291 +1,132 @@
 ---
 name: write-tests
 description: >
-  Phase 2: Scaffold the external agent binary and add E2E tests to the shared
-  harness. Tests define the spec for the implement phase.
-  Use /entire-external-agent write-tests or /entire-external-agent:write-tests
-  when you only need test scaffolding.
+  Phase 2: Scaffold the external agent binary and add the correct testing hooks:
+  protocol compliance through external-agents-tests and lifecycle integration
+  through this repo's e2e harness.
 ---
 
 # Write-Tests Procedure
 
-Scaffold the external agent binary and add E2E tests to the shared repo-root `e2e/` harness. The harness auto-discovers all agents and exercises each one via protocol subcommands and full lifecycle integration (entire enable, agent invocation, checkpoint validation). Tests are expected to fail — they define the spec for the implement phase.
+Scaffold the external agent binary and wire it into the current testing split.
+
+Do not add new generic protocol tests under this repo's `e2e/` directory.
 
 ## Prerequisites
 
-Ensure the following are available:
-- `AGENT_NAME`, `AGENT_SLUG`, `LANGUAGE`, `PROJECT_DIR` — from orchestrator or user
-- `<PROJECT_DIR>/AGENT.md` — research one-pager with protocol mapping and E2E test prerequisites
+Ensure these are available:
 
-## Step 1: Scaffold the Binary
+- `AGENT_NAME`
+- `AGENT_SLUG`
+- `LANGUAGE`
+- `PROJECT_DIR`
+- `<PROJECT_DIR>/AGENT.md`
 
-Generate the project structure with compilable stubs. This is a condensed version of scaffolding — enough to get a binary that compiles and returns valid `info` JSON.
+## Step 1: Scaffold the Binary
 
-### Read source material at runtime
+Create a compilable binary that already exposes the protocol subcommands with valid JSON shapes.
 
-**Do not use static templates.** Read the following files at runtime to generate code that matches the current protocol version:
+Read at runtime:
 
-1. Read `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md` — subcommand specs, JSON schemas, capabilities
-2. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/types.go` — JSON response struct definitions
-3. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/external.go` — how the CLI calls each subcommand
-4. Read `<PROJECT_DIR>/AGENT.md` — agent-specific decisions (capabilities, hook format, transcript location)
+1. `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md`
+2. `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/types.go`
+3. `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/external.go`
+4. `<PROJECT_DIR>/AGENT.md`
 
-### Generate project structure (Go)
+For Go agents, prefer this shape:
 
-```
+```text
 <PROJECT_DIR>/
-  go.mod                    # Module: github.com/<user>/entire-agent-<slug>
-  main.go                   # Subcommand dispatch switch
+  go.mod
+  Makefile
+  README.md
+  AGENT.md
   cmd/
-    info.go                 # Required: info subcommand
-    detect.go               # Required: detect subcommand
-    session.go              # Required: session subcommands
-    transcript.go           # Required: transcript subcommands
-    resume.go               # Required: format-resume-command
-    hooks.go                # Capability: hooks (if declared)
-    analyzer.go             # Capability: transcript_analyzer (if declared)
-    [other capabilities]
+    entire-agent-<slug>/
+      main.go
   internal/
-    types.go                # Response types from external/types.go
-    protocol.go             # Env var helpers, constants
-  AGENT.md                  # Research one-pager (already exists)
-  README.md                 # Usage, installation, development
-  Makefile                  # build, install, test, test:e2e
-```
-
-**Only create capability files for capabilities declared in AGENT.md.**
-
-Each subcommand handler should:
-1. Parse arguments from `os.Args` or the language's arg parser
-2. Read stdin if required
-3. Return valid JSON matching the exact schema from `types.go`
-4. Use placeholder values (realistic but clearly fake, e.g., `session_id: "stub-session-000"`)
-
-### Verify the scaffold
-
-1. **Compiles without errors:** `make build`
-2. **`info` returns valid JSON:** `./entire-agent-<slug> info | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin), indent=2))"`
-3. **Unknown subcommand exits non-zero:** `./entire-agent-<slug> bogus; echo "exit: $?"`
-
-### Commit the scaffold
-
-Create a git commit for the scaffolded project.
-
-## Step 2: Read the Shared E2E Harness
-
-This repo already has a shared E2E harness at the repo root `e2e/` directory. Read these files to understand the patterns you must follow:
-
-1. `e2e/setup_test.go` — `TestMain` entry point: auto-discovers agents in `agents/`, builds binaries, adds them to PATH
-2. `e2e/testenv.go` — `TestEnv`: isolated filesystem environment with `AgentRunner`, `WriteFile`, `ReadFile`, `GitInit` helpers
-3. `e2e/harness.go` — `AgentRunner`: executes agent subcommands via `Run`, `RunJSON`, `MustSucceed`, `MustFail`
-4. `e2e/fixtures.go` — Test input builders: `HookInput`, `ParseHookInput`, `KiroTranscript` (with `AddPrompt`, `AddResponse`, `AddPromptWithFileEdit`)
-5. `e2e/entire.go` — CLI wrappers: `EntireEnable`, `EntireDisable`, `EntireRewindList`, `EntireRewind`, `EntireRunErr`
-6. `e2e/lifecycle.go` — `LifecycleEnv`: full lifecycle environment (git repo + `entire enable` + `WaitForCheckpoint` + `GetCheckpointTrailer`)
-7. `e2e/kiro_test.go` — Example subcommand tests (identity, sessions, hooks, transcript analysis)
-8. `e2e/kiro_lifecycle_test.go` — Example lifecycle tests (single/multi prompt, detect+enable, rewind, session persistence)
-
-**Key patterns to follow:**
-- All E2E files use `//go:build e2e` build tag and `package e2e`
-- `TestMain` auto-discovers agents by scanning `agents/entire-agent-*` directories for `cmd/<name>/main.go`
-- `NewTestEnv(t, "entire-agent-<slug>")` creates an isolated env with the built agent binary
-- `NewLifecycleEnv(t, "<slug>")` creates a full git repo with `entire enable` already run
-- Subcommand tests use `t.Parallel()` and `AgentRunner.RunJSON` for structured assertions
-- Lifecycle tests call `requireEntire(t)` and `requireKiroCLI(t)` (or equivalent) to skip/fail gracefully
-- `WaitForCheckpoint` polls until the `entire/checkpoints/v1` branch appears
-- Fixture builders (e.g. `KiroTranscript`) use the fluent pattern for easy test data construction
-
-## Step 3: Add Tests to the Shared E2E Harness
-
-Tests go in the existing `e2e/` directory at the repo root. The harness already provides all infrastructure — you only need to add test files and (optionally) agent-specific fixture builders.
-
-### How auto-discovery works
-
-`TestMain` in `e2e/setup_test.go` scans `agents/entire-agent-*` for directories with `cmd/<name>/main.go`, builds each binary, and stores them in `agentBinaries`. Your new agent is discovered automatically once the scaffold from Step 1 compiles.
-
-### Create `e2e/<slug>_test.go` — Subcommand Tests
-
-These exercise each protocol subcommand directly. Follow the pattern in `e2e/kiro_test.go`:
-
-```go
-//go:build e2e
-
-package e2e
-
-import "testing"
-
-// --- Identity ---
-
-func Test<Name>_Info(t *testing.T) {
-    t.Parallel()
-    env := NewTestEnv(t, "entire-agent-<slug>")
-    // Use env.Runner.RunJSON to decode the info response
-    // Assert protocol_version, name, capabilities, etc.
-}
-
-func Test<Name>_Detect_Present(t *testing.T) {
-    t.Parallel()
-    env := NewTestEnv(t, "entire-agent-<slug>")
-    // Create the agent's marker directory (e.g. .<slug>/)
-    // Assert detect returns present: true
-}
-
-func Test<Name>_Detect_Absent(t *testing.T) {
-    t.Parallel()
-    env := NewTestEnv(t, "entire-agent-<slug>")
-    // Assert detect returns present: false (no marker directory)
-}
-
-// --- Sessions ---
-// Test get-session-id, get-session-dir, resolve-session-file, write+read-session
-
-// --- Hooks ---
-// Test parse-hook for each hook type, install-hooks, uninstall-hooks, are-hooks-installed
-
-// --- Transcript ---
-// Test read-transcript, chunk+reassemble-transcript round-trip
-
-// --- Transcript Analysis (if capability declared) ---
-// Test get-transcript-position, extract-modified-files, extract-prompts, extract-summary
+    protocol/
+    <agent>/
 ```
 
-**Key patterns:**
-- Use `NewTestEnv(t, "entire-agent-<slug>")` for isolated environments
-- Create convenience constructors like `NewKiroTestEnv` if multiple tests share setup
-- Use `env.Runner.RunJSON` for structured output, `MustSucceed`/`MustFail` for exit code checks
-- All subcommand tests use `t.Parallel()` for speed
-
-### Create `e2e/<slug>_lifecycle_test.go` — Lifecycle Tests
-
-These exercise the full integration. Follow the pattern in `e2e/kiro_lifecycle_test.go`:
+The scaffold must:
 
-```go
-//go:build e2e
+- build successfully
+- return valid `info` JSON
+- exit non-zero for an unknown subcommand
 
-package e2e
+## Step 2: Prepare Protocol Compliance
 
-import "testing"
+The generic protocol suite lives in `external-agents-tests`.
 
-func TestLifecycle_<Name>_SinglePromptManualCommit(t *testing.T) {
-    requireEntire(t)
-    // require<Name>CLI(t)  — add a similar helper for your agent's CLI
-    t.Parallel()
+Your job in this repo is to make the agent easy to validate there:
 
-    env := NewLifecycleEnv(t, "<slug>")
-
-    // Run a prompt that creates a file
-    // Assert the file exists
-    // git add + commit
-    // WaitForCheckpoint
-    // Verify checkpoint trailer
-}
-```
-
-**Key patterns:**
-- Call `requireEntire(t)` (and a `require<Name>CLI(t)` helper) at the top — these skip the test gracefully when dependencies are missing
-- Use `NewLifecycleEnv(t, "<slug>")` which handles git init, seed commit, `.entire/settings.json`, and `entire enable`
-- Add a `Run<Name>Prompt` method on `LifecycleEnv` using the command from AGENT.md's E2E prerequisites
-
-### Add agent-specific fixture builders (if needed)
-
-If the agent has a custom transcript format, add a builder to `e2e/fixtures.go` following the `KiroTranscript` pattern:
-
-```go
-type <Name>Transcript struct { /* ... */ }
-func New<Name>Transcript(id string) *<Name>Transcript { /* ... */ }
-func (t *<Name>Transcript) AddPrompt(prompt string) *<Name>Transcript { /* ... */ }
-func (t *<Name>Transcript) JSON(t *testing.T) string { /* ... */ }
-```
+1. Keep the binary layout compatible with a simple build command:
+   `go build -o entire-agent-<slug> ./cmd/entire-agent-<slug>`
+2. If stronger black-box assertions are useful, add an optional fixture file under the agent module, for example:
+   `<PROJECT_DIR>/testdata/compliance.json`
+3. Document any required fixture paths in `<PROJECT_DIR>/README.md` and `<PROJECT_DIR>/AGENT.md`
 
-### Add agent-specific environment helpers (if needed)
+If a sibling checkout exists at `../external-agents-tests`, you can validate the scaffold locally with:
 
-If the agent needs custom setup (e.g., Kiro needs `.kiro/` and `.entire/tmp/`), add a convenience constructor to `e2e/testenv.go`:
-
-```go
-func New<Name>TestEnv(t *testing.T) *TestEnv {
-    t.Helper()
-    te := NewTestEnv(t, "entire-agent-<slug>")
-    te.MkdirAll(".<slug>")
-    te.MkdirAll(".entire/tmp")
-    return te
-}
+```bash
+cd ../external-agents-tests
+AGENT_BINARY=/abs/path/to/entire-agent-<slug> go test -v -count=1 ./...
 ```
 
-### Key conventions for test scenarios
-
-- **Build tag**: All E2E files must have `//go:build e2e` as the first line
-- **Package**: All files in `e2e/` use `package e2e`
-- **Naming**: Subcommand tests: `Test<Name>_<Subcommand>`. Lifecycle tests: `TestLifecycle_<Name>_<Scenario>`
-- **Timeouts**: Lifecycle tests use `WaitForCheckpoint(t, env, 30*time.Second)` for checkpoint polling
-- **Prompts**: Write prompts inline — include "Do not ask for confirmation" for agents that stall
-- **Assertions**: Use harness helpers (`AssertFileExists`, `GetCheckpointTrailer`), not raw git commands
-- **CLI operations**: Use `EntireEnable`, `EntireRewindList`, `EntireRewind` — never raw `exec.Command`
-- **Parallelism**: Subcommand tests use `t.Parallel()`. Lifecycle tests use `t.Parallel()` per test (each gets its own temp repo)
-- **Graceful skipping**: Lifecycle tests call `requireEntire(t)` to skip when the entire CLI isn't available
-
-## Step 4: Add Makefile Targets
+At this stage the tests are expected to fail. The goal is just to confirm the harness reaches the binary.
 
-### Agent-level Makefile (`<PROJECT_DIR>/Makefile`)
+## Step 3: Wire the Agent into Lifecycle Tests
 
-Add `build`, `test`, and `clean` targets for the agent binary:
+Lifecycle integration remains in this repo.
 
-```makefile
-BINARY := entire-agent-<AGENT_SLUG>
+Read these files before editing:
 
-.PHONY: build test clean
+1. `e2e/setup_test.go`
+2. `e2e/build.go`
+3. `e2e/lifecycle_test.go`
+4. `e2e/agents/agent.go`
+5. `e2e/agents/kiro.go`
+6. `e2e/testutil/repo.go`
+7. `e2e/entire/entire.go`
 
-build:
-	go build -o $(BINARY) ./cmd/entire-agent-<AGENT_SLUG>
-
-test:
-	go test ./...
-
-clean:
-	rm -f $(BINARY)
-```
+Then:
 
-### Repo-root Makefile
+1. Add `e2e/agents/<slug>.go` implementing the `Agent` interface.
+2. Register the agent in `init()` and set a concurrency gate.
+3. Implement `RunPrompt`, `StartSession`, `PromptPattern`, timeout multiplier, and any external-agent marker behavior needed by `SetupRepo`.
+4. Reuse the shared lifecycle scenarios in `e2e/lifecycle_test.go`. Add new lifecycle tests only if the new agent needs behavior that is not already covered.
 
-The repo-root `Makefile` already handles E2E test execution. Verify it includes:
+## Step 4: Verify the Scaffolding
 
-```makefile
-test-e2e:
-	cd e2e && go test -tags=e2e -v -count=1 ./...
+Run these checks:
 
-test-e2e-lifecycle:
-	cd e2e && E2E_REQUIRE_LIFECYCLE=1 go test -tags=e2e -v -count=1 -run TestLifecycle ./...
-
-test-unit:
-	@for dir in agents/entire-agent-*/; do \
-		echo "Testing $$dir..."; \
-		cd $$dir && go test ./... && cd ../..; \
-	done
+```bash
+cd <PROJECT_DIR>
+make build
+./entire-agent-<slug> info
+go test ./...
 
-test-all: test-unit test-e2e
+cd /path/to/repo/e2e
+go test -c -tags=e2e
 ```
 
-The `test-e2e` target builds all agents automatically via `TestMain` — no need to build/install first.
-
-## Step 5: Verify Tests Compile
-
-Run from the repo root:
-```bash
-cd e2e && go test -c -tags=e2e
-```
+If `../external-agents-tests` exists, also run one failing compliance pass against the built binary.
 
-This must succeed (compiles the test binary including the new agent's tests). Tests are expected to fail when executed — they define the spec for the implement phase.
+## Step 5: Commit Gate
 
-If the harness doesn't compile, fix issues before proceeding.
+Create a commit once:
 
-## Step 6: Commit
+- the binary compiles
+- `info` returns valid JSON
+- lifecycle harness compiles
+- the compliance suite can invoke the binary, even if assertions still fail
 
-Create a git commit for the new E2E tests and the scaffolded binary.
+## Output Checklist
 
-## Output
+Summarize:
 
-Summarize what was created:
-- Project structure (files created, capabilities declared)
-- E2E tests added (number of subcommand tests and lifecycle tests, what they exercise)
-- Confirmation that binary compiles and `info` returns valid JSON
-- Confirmation that E2E harness compiles with new tests (`go test -c -tags=e2e`)
-- Note that all E2E tests are expected to fail — the implement phase will make them pass
-- Commands to run: `make test-e2e` (all tests) or `cd e2e && go test -tags=e2e -v -run Test<Name>_Info ./...` (single test)
+- files created under `<PROJECT_DIR>`
+- lifecycle adapter files added or updated under `e2e/`
+- optional compliance fixture paths
+- commands run and their status
diff --git a/.github/workflows/protocol-compliance.yml b/.github/workflows/protocol-compliance.yml
new file mode 100644
index 0000000..85357a2
--- /dev/null
+++ b/.github/workflows/protocol-compliance.yml
@@ -0,0 +1,65 @@
+name: Protocol Compliance
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: read
+
+jobs:
+  discover-agents:
+    runs-on: ubuntu-latest
+    outputs:
+      count: ${{ steps.discover.outputs.count }}
+      matrix: ${{ steps.discover.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - id: discover
+        shell: bash
+        run: |
+          count=0
+          entries=""
+
+          while IFS= read -r dir; do
+            name="$(basename "$dir")"
+            if [[ -n "$entries" ]]; then
+              entries="${entries},"
+            fi
+            entries="${entries}{\"name\":\"${name}\",\"dir\":\"${dir}\",\"binary\":\"${dir}/${name}\"}"
+            count=$((count + 1))
+          done < <(find agents -mindepth 1 -maxdepth 1 -type d -name 'entire-agent-*' | sort)
+
+          echo "count=${count}" >> "$GITHUB_OUTPUT"
+          echo "matrix={\"include\":[${entries}]}" >> "$GITHUB_OUTPUT"
+
+  test-agents:
+    needs: discover-agents
+    if: ${{ needs.discover-agents.outputs.count != '0' }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.discover-agents.outputs.matrix) }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: ${{ matrix.dir }}/go.mod
+          cache-dependency-path: ${{ matrix.dir }}/go.sum
+
+      - name: Run unit tests
+        working-directory: ${{ matrix.dir }}
+        run: go test ./...
+
+      - name: Build agent binary
+        working-directory: ${{ matrix.dir }}
+        run: go build -o "${{ matrix.name }}" "./cmd/${{ matrix.name }}"
+
+      - name: Run protocol compliance suite
+        uses: entireio/external-agents-tests@main
+        with:
+          binary-path: ${{ matrix.binary }}
diff --git a/AGENTS.md b/AGENTS.md
index a694aad..bbdf8d8 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,6 +1,10 @@
 # External Agent Builder
 
-This repository includes a skill that guides you through building standalone external agent binaries for the [Entire CLI](https://github.com/entireio/cli). The skill uses E2E-first TDD and runs in three phases.
+This repository includes a skill that guides you through building standalone external agent binaries for the [Entire CLI](https://github.com/entireio/cli). The current testing split is:
+
+- Protocol compliance in `external-agents-tests`
+- Lifecycle integration in this repo's `e2e/` harness
+- Agent-specific unit tests in each `agents/entire-agent-*` module
 
 ## Available Commands
 
@@ -8,8 +12,8 @@ This repository includes a skill that guides you through building standalone ext
 |---------|-----------|-------------|
 | Full pipeline | `.claude/skills/entire-external-agent/SKILL.md` | Run all three phases sequentially |
 | Research | `.claude/skills/entire-external-agent/research.md` | Analyze the target agent's capabilities and map to the protocol |
-| Write tests | `.claude/skills/entire-external-agent/write-tests.md` | Scaffold the binary and create E2E test harness |
-| Implement | `.claude/skills/entire-external-agent/implement.md` | Build the binary using E2E-first TDD (unit tests last) |
+| Write tests | `.claude/skills/entire-external-agent/write-tests.md` | Scaffold the binary and wire protocol compliance plus lifecycle coverage |
+| Implement | `.claude/skills/entire-external-agent/implement.md` | Build the binary using protocol compliance first, lifecycle second, unit tests last |
 
 ## How to Use
 
@@ -18,6 +22,7 @@ When the user asks to "build an external agent", "create an agent binary", or "e
 1. Read `.claude/skills/entire-external-agent/SKILL.md` for the full pipeline overview
 2. Follow the three phases in order: research, write-tests, implement
 3. Each phase has a dedicated skill file with detailed instructions
+4. Keep reusable protocol checks out of this repo's `e2e/` directory. Add them to `external-agents-tests` instead.
 
 ## Tool Mapping (Codex)
 
diff --git a/Makefile b/Makefile
index 6e97767..17043c3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,13 @@
-.PHONY: test-e2e test-unit test-all test-e2e-lifecycle test-e2e-binary
+.PHONY: test-e2e test-unit test-all test-e2e-lifecycle
 
 test-e2e:
+	@$(MAKE) test-e2e-lifecycle AGENT="$(AGENT)"
+
+test-e2e-lifecycle:
 ifdef AGENT
-	cd e2e && E2E_AGENT=$(AGENT) go test -tags=e2e -v -count=1 ./...
+	cd e2e && E2E_AGENT=$(AGENT) go test -tags=e2e -v -count=1 -run TestLifecycle ./...
 else
-	cd e2e && go test -tags=e2e -v -count=1 ./...
+	cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle ./...
 endif
 
 test-unit:
@@ -13,19 +16,4 @@ test-unit:
 		cd $$dir && go test ./... && cd ../..; \
 	done
 
-test-e2e-lifecycle:
-ifdef AGENT
-	cd e2e && E2E_AGENT=$(AGENT) E2E_REQUIRE_LIFECYCLE=1 go test -tags=e2e -v -count=1 -run TestLifecycle ./...
-else
-	cd e2e && E2E_REQUIRE_LIFECYCLE=1 go test -tags=e2e -v -count=1 -run TestLifecycle ./...
-endif
-
-test-e2e-binary:
-ifdef AGENT
-	cd e2e && go test -tags=e2e -v -count=1 ./$(AGENT)/
-else
-	@echo "Usage: make test-e2e-binary AGENT=kiro"
-	@exit 1
-endif
-
-test-all: test-unit test-e2e
+test-all: test-unit test-e2e-lifecycle
diff --git a/README.md b/README.md
index 69365d6..8c08ab0 100644
--- a/README.md
+++ b/README.md
@@ -22,11 +22,11 @@ See each agent's own README for setup and usage instructions.
 
 ## Building a New External Agent
 
-This repo includes a skill that guides you through building a new external agent using an E2E-first TDD pipeline. The skill runs in three phases:
+This repo includes a skill that guides you through building a new external agent with two test layers:
 
-1. **Research** — analyzes the target AI agent's file formats, session layout, and hook mechanisms
-2. **Write tests** — generates E2E and unit tests against the external agent protocol
-3. **Implement** — builds the Go binary to pass all tests
+1. **Protocol compliance** — generic subcommand coverage from [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests)
+2. **Lifecycle integration** — repo-local `e2e/` tests that exercise `entire enable`, prompt execution, checkpoints, and rewind
+3. **Implementation** — build the binary until both layers pass, then add unit tests
 
 ### Getting Started — Zero Setup
 
@@ -41,55 +41,61 @@ Clone the repo and open it in your AI coding tool. Each tool auto-discovers the
 
 The skill files live in `.claude/skills/entire-external-agent/` if you want to read the details.
 
-## E2E Tests
+## Testing
 
-The `e2e/` directory contains a shared test harness that exercises all external agents. Tests are split into two tiers:
+Testing is intentionally split:
 
-- **Subcommand tests** (`kiro_test.go`) — exercise each protocol subcommand directly against the agent binary (identity, sessions, transcript, hooks, transcript analysis). These run without any external dependencies beyond the agent binary itself.
-- **Lifecycle tests** (`kiro_lifecycle_test.go`) — exercise the full integration flow: `entire enable`, agent prompt execution, git commit, checkpoint creation, and rewind. These require the `entire` CLI and the agent's own CLI (e.g. `kiro-cli-chat`) to be available.
+- **Generic protocol checks** run in GitHub Actions via [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests). The workflow builds each `entire-agent-*` binary in this repo and runs the shared compliance suite against it.
+- **Lifecycle tests** stay in this repo's [`e2e/`](e2e/) harness. These verify the parts that depend on Entire itself and on the real agent CLI: prompt execution, hook installation after `entire enable`, checkpoint creation, rewind behavior, and interactive sessions.
+- **Unit tests** live with each agent implementation under [`agents/`](agents/).
 
 ### Running Tests
 
 ```bash
-# Run all E2E tests (subcommand-level only; lifecycle tests skip if deps missing)
+# Run unit tests for all agents
+make test-unit
+
+# Run lifecycle integration tests from this repo
 make test-e2e
 
-# Run lifecycle tests (fails instead of skipping if entire/kiro-cli-chat are missing)
+# Same as test-e2e, kept as the explicit name
 make test-e2e-lifecycle
 
-# Run unit tests for all agents
-make test-unit
-
-# Run everything
+# Run unit + lifecycle tests locally
 make test-all
 ```
 
-### Test Harness Architecture
+Protocol compliance runs in CI through [`.github/workflows/protocol-compliance.yml`](.github/workflows/protocol-compliance.yml).
+
+### Lifecycle Harness Architecture
 
-The shared harness auto-discovers and builds all agents in `agents/` via `TestMain`:
+The lifecycle harness auto-discovers and builds all agents in `agents/` via `TestMain`:
 
 | File | Purpose |
 |------|---------|
 | `e2e/setup_test.go` | `TestMain` entry point — discovers agents, builds binaries, configures PATH |
-| `e2e/testenv.go` | `TestEnv` — isolated filesystem environment with agent binary runner |
-| `e2e/harness.go` | `AgentRunner` — executes agent subcommands, captures stdout/stderr/exit code |
-| `e2e/fixtures.go` | Test input builders: `HookInput`, `ParseHookInput`, `KiroTranscript` |
-| `e2e/entire.go` | CLI wrappers: `EntireEnable`, `EntireDisable`, `EntireRewindList`, `EntireRewind` |
-| `e2e/lifecycle.go` | `LifecycleEnv` — full lifecycle environment (git repo + `entire enable` + checkpoint helpers) |
+| `e2e/lifecycle_test.go` | Shared lifecycle scenarios run against every registered agent |
+| `e2e/agents/` | Agent adapters for the real CLIs used during lifecycle tests |
+| `e2e/entire/` | Entire CLI wrappers used by lifecycle assertions |
+| `e2e/testutil/` | Repo setup, artifact capture, git helpers, and checkpoint assertions |
 
 ### Environment Variables
 
 | Variable | Description |
 |----------|-------------|
 | `E2E_ENTIRE_BIN` | Path to the `entire` binary (defaults to `entire` from PATH) |
-| `E2E_REQUIRE_LIFECYCLE` | Set to `1` to fail (instead of skip) when lifecycle dependencies are missing |
+| `E2E_AGENT` | Filter lifecycle runs to a single registered agent |
+| `E2E_ARTIFACT_DIR` | Override lifecycle artifact output directory |
+| `E2E_KEEP_REPOS` | Preserve temp repos for debugging |
+| `E2E_CONCURRENT_TEST_LIMIT` | Override the per-agent lifecycle concurrency limit |
 
 ## Repository Layout
 
 ```
 agents/                          # Standalone external agent projects
   entire-agent-kiro/             # Kiro agent (Go binary)
-e2e/                             # Shared E2E test harness for all agents
+e2e/                             # Lifecycle integration harness
+.github/workflows/               # CI, including protocol compliance via external-agents-tests
 .claude/skills/entire-external-agent/  # Skill files (research, test-writer, implementer)
 AGENTS.md                        # Codex auto-discovery
 .cursor/rules/                   # Cursor auto-discovery
diff --git a/agents/entire-agent-kiro/README.md b/agents/entire-agent-kiro/README.md
index a18522b..0ae54e8 100644
--- a/agents/entire-agent-kiro/README.md
+++ b/agents/entire-agent-kiro/README.md
@@ -103,24 +103,15 @@ make clean    # Remove built binary
 go run ./cmd/entire-agent-kiro info
 ```
 
-## E2E Tests
+## Testing
 
-E2E tests live in the shared `e2e/` directory at the repo root (not inside this agent's directory). The harness auto-discovers and builds all agents, then runs tests against each.
+Kiro is validated in three places:
 
-### Subcommand tests (`e2e/kiro_test.go`)
+- **Unit tests** live in this module and cover the Kiro-specific implementation details.
+- **Protocol compliance** runs in GitHub Actions through [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests) against the built `entire-agent-kiro` binary.
+- **Lifecycle tests** live in the shared repo-root [`e2e/`](../../e2e/) harness and require `entire` plus `kiro-cli-chat`.
 
-Exercise each protocol subcommand directly — no external dependencies needed:
-
-- **Identity**: `info`, `detect` (present/absent)
-- **Sessions**: `get-session-id`, `get-session-dir`, `resolve-session-file`, `write-session`/`read-session` round-trip
-- **Transcript**: `read-transcript`, `chunk-transcript`/`reassemble-transcript` round-trip
-- **Hooks**: `parse-hook` (spawn, prompt-submit, pre-tool-use, stop), `install-hooks`/`uninstall-hooks`/`are-hooks-installed`, idempotent install
-- **Transcript analysis**: `get-transcript-position`, `extract-modified-files`, `extract-prompts`, `extract-summary`
-- **Other**: `format-resume-command`, unknown subcommand handling
-
-### Lifecycle tests (`e2e/kiro_lifecycle_test.go`)
-
-Full integration tests requiring `entire` CLI and `kiro-cli-chat`:
+The lifecycle suite covers:
 
 - **SinglePromptManualCommit** — agent creates file → commit → checkpoint with trailer
 - **MultiplePromptsManualCommit** — two prompts → single commit → checkpoint covers both
@@ -133,12 +124,15 @@ Full integration tests requiring `entire` CLI and `kiro-cli-chat`:
 ### Running
 
 ```bash
+# From this module:
+make test                    # Unit tests
+
 # From the repo root:
-make test-e2e                # All E2E tests (lifecycle tests skip if deps missing)
-make test-e2e-lifecycle      # Lifecycle tests only (fails if deps missing)
+make test-e2e                # Lifecycle tests
+make test-e2e-lifecycle      # Explicit lifecycle target
 
 # Run a specific test:
-cd e2e && go test -tags=e2e -v -count=1 -run TestKiro_Info ./...
+cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle_SinglePromptManualCommit ./...
 ```
 
 ## Troubleshooting
diff --git a/agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl b/agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl
new file mode 100644
index 0000000..c22140f
--- /dev/null
+++ b/agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl
@@ -0,0 +1,6 @@
+{"id":"","name":"read","args":null}
+{"id":"","name":"write","args":{"file_path":"/tmp/main.go","content":"package main"}}
+{"id":"","name":"read","args":null}
+{"id":"","name":"write","args":{"file_path":"/tmp/main.go","content":"package main"}}
+{"id":"","name":"read","args":null}
+{"id":"","name":"write","args":{"file_path":"/tmp/main.go","content":"package main"}}
diff --git a/e2e/README.md b/e2e/README.md
index 0651f8b..a670098 100644
--- a/e2e/README.md
+++ b/e2e/README.md
@@ -1,6 +1,8 @@
-# E2E Tests
+# Lifecycle Tests
 
-End-to-end tests for external agents, exercising the full lifecycle: agent prompts, git hooks, checkpoints, and rewind.
+End-to-end lifecycle tests for external agents. This harness covers the behaviors that only make sense against the real Entire CLI and the real agent CLI: `entire enable`, prompt execution, hook installation, checkpoint creation, rewind, and interactive sessions.
+
+Generic protocol compliance is no longer in this directory. Those checks run from [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests) and are wired into this repo through GitHub Actions.
 
 ## Structure
 
@@ -19,23 +21,20 @@ e2e/
 │   └── assertions.go # Test assertions (testify-based)
 ├── bootstrap/        # Pre-test agent bootstrap (CI auth setup)
 │   └── main.go       # go run ./e2e/bootstrap
+├── build.go          # Agent discovery + binary builds for lifecycle runs
 ├── setup_test.go     # TestMain: build agents, artifact dir, preflight
-├── kiro_lifecycle_test.go  # Lifecycle tests (ForEachAgent pattern)
-├── kiro_test.go      # Protocol-level tests (stdin/stdout subcommands)
-├── harness.go        # AgentRunner for protocol tests
-├── testenv.go        # TestEnv for protocol tests
-└── fixtures.go       # HookInput, KiroTranscript builders
+└── lifecycle_test.go # Shared lifecycle scenarios (ForEachAgent pattern)
 ```
 
 ## Running Tests
 
-### All E2E tests (protocol + lifecycle)
+### All lifecycle tests
 
 ```bash
 make test-e2e
 ```
 
-### Lifecycle tests only
+### Explicit lifecycle target
 
 ```bash
 make test-e2e-lifecycle
@@ -55,7 +54,6 @@ cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle_SinglePromptManualCom
 | `E2E_ENTIRE_BIN` | Path to `entire` binary. Falls back to `$PATH` lookup. |
 | `E2E_ARTIFACT_DIR` | Override artifact output directory. |
 | `E2E_KEEP_REPOS` | Set to any value to preserve temp repos after tests. |
-| `E2E_REQUIRE_LIFECYCLE` | Set to `1` to fail (not skip) when lifecycle deps are missing. |
 | `E2E_CONCURRENT_TEST_LIMIT` | Override per-agent concurrency limit (default: 2 for kiro). |
 
 ## Adding a New Agent
@@ -63,7 +61,8 @@ cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle_SinglePromptManualCom
 1. Create `e2e/agents/<name>.go` implementing the `Agent` interface.
 2. In `init()`, conditionally register based on `E2E_AGENT` env var.
 3. Call `RegisterGate("<name>", N)` to set concurrency limit.
-4. If it's an external agent, implement `ExternalAgent` interface.
+4. If it's an external agent, implement `ExternalAgent` so `SetupRepo` can pre-enable external agents in Entire settings.
+5. Keep generic protocol validation out of this directory. Add any reusable black-box protocol coverage to `external-agents-tests` instead.
 
 ## Debugging Failures
 
diff --git a/e2e/fixtures.go b/e2e/fixtures.go
deleted file mode 100644
index 22c6f40..0000000
--- a/e2e/fixtures.go
+++ /dev/null
@@ -1,50 +0,0 @@
-//go:build e2e
-
-package e2e
-
-import (
-	"encoding/json"
-	"testing"
-)
-
-// HookInput builds stdin payloads for hook-related subcommands.
-type HookInput struct {
-	HookType   string                 `json:"hook_type,omitempty"`
-	SessionID  string                 `json:"session_id,omitempty"`
-	SessionRef string                 `json:"session_ref,omitempty"`
-	Timestamp  string                 `json:"timestamp,omitempty"`
-	UserPrompt string                 `json:"user_prompt,omitempty"`
-	ToolName   string                 `json:"tool_name,omitempty"`
-	ToolUseID  string                 `json:"tool_use_id,omitempty"`
-	ToolInput  json.RawMessage        `json:"tool_input,omitempty"`
-	RawData    map[string]interface{} `json:"raw_data,omitempty"`
-}
-
-// JSON returns the JSON-encoded string for use as stdin.
-func (h HookInput) JSON(t *testing.T) string {
-	t.Helper()
-	data, err := json.Marshal(h)
-	if err != nil {
-		t.Fatalf("marshal HookInput: %v", err)
-	}
-	return string(data)
-}
-
-// ParseHookInput builds stdin payloads for the parse-hook subcommand.
-type ParseHookInput struct {
-	HookEventName string          `json:"hook_event_name,omitempty"`
-	CWD           string          `json:"cwd,omitempty"`
-	Prompt        string          `json:"prompt,omitempty"`
-	ToolName      string          `json:"tool_name,omitempty"`
-	ToolInput     json.RawMessage `json:"tool_input,omitempty"`
-}
-
-// JSON returns the JSON-encoded string.
-func (p ParseHookInput) JSON(t *testing.T) string {
-	t.Helper()
-	data, err := json.Marshal(p)
-	if err != nil {
-		t.Fatalf("marshal ParseHookInput: %v", err)
-	}
-	return string(data)
-}
diff --git a/e2e/harness.go b/e2e/harness.go
deleted file mode 100644
index c776bb7..0000000
--- a/e2e/harness.go
+++ /dev/null
@@ -1,88 +0,0 @@
-//go:build e2e
-
-package e2e
-
-import (
-	"bytes"
-	"encoding/json"
-	"errors"
-	"os/exec"
-	"testing"
-)
-
-// CommandResult holds the output of a binary invocation.
-type CommandResult struct {
-	Stdout   []byte
-	Stderr   []byte
-	ExitCode int
-	Err      error
-}
-
-// AgentRunner invokes an agent binary with subcommands.
-type AgentRunner struct {
-	BinaryPath string
-	Env        []string
-}
-
-// Run executes the agent binary with the given subcommand, args, and optional stdin.
-func (r *AgentRunner) Run(stdin string, subcommand string, args ...string) CommandResult {
-	cmdArgs := append([]string{subcommand}, args...)
-	cmd := exec.Command(r.BinaryPath, cmdArgs...)
-	cmd.Stdin = bytes.NewBufferString(stdin)
-	cmd.Env = r.Env
-
-	var stdout, stderr bytes.Buffer
-	cmd.Stdout = &stdout
-	cmd.Stderr = &stderr
-
-	err := cmd.Run()
-	exitCode := 0
-	if err != nil {
-		var exitErr *exec.ExitError
-		if errors.As(err, &exitErr) {
-			exitCode = exitErr.ExitCode()
-		} else {
-			exitCode = -1
-		}
-	}
-
-	return CommandResult{
-		Stdout:   stdout.Bytes(),
-		Stderr:   stderr.Bytes(),
-		ExitCode: exitCode,
-		Err:      err,
-	}
-}
-
-// RunJSON executes the subcommand and JSON-decodes stdout into dest.
-func (r *AgentRunner) RunJSON(t *testing.T, dest any, stdin string, subcommand string, args ...string) CommandResult {
-	t.Helper()
-	result := r.Run(stdin, subcommand, args...)
-	if result.ExitCode != 0 {
-		t.Fatalf("%s %s failed (exit %d): %s", r.BinaryPath, subcommand, result.ExitCode, result.Stderr)
-	}
-	if err := json.Unmarshal(result.Stdout, dest); err != nil {
-		t.Fatalf("failed to decode JSON from %s %s: %v\nstdout: %s", r.BinaryPath, subcommand, err, result.Stdout)
-	}
-	return result
-}
-
-// MustSucceed asserts the subcommand exits with code 0.
-func (r *AgentRunner) MustSucceed(t *testing.T, stdin string, subcommand string, args ...string) CommandResult {
-	t.Helper()
-	result := r.Run(stdin, subcommand, args...)
-	if result.ExitCode != 0 {
-		t.Fatalf("%s %s: expected exit 0, got %d\nstderr: %s", r.BinaryPath, subcommand, result.ExitCode, result.Stderr)
-	}
-	return result
-}
-
-// MustFail asserts the subcommand exits with a non-zero code.
-func (r *AgentRunner) MustFail(t *testing.T, stdin string, subcommand string, args ...string) CommandResult {
-	t.Helper()
-	result := r.Run(stdin, subcommand, args...)
-	if result.ExitCode == 0 {
-		t.Fatalf("%s %s: expected non-zero exit, got 0\nstdout: %s", r.BinaryPath, subcommand, result.Stdout)
-	}
-	return result
-}
diff --git a/e2e/kiro/fixtures_test.go b/e2e/kiro/fixtures_test.go
deleted file mode 100644
index 9b3aa55..0000000
--- a/e2e/kiro/fixtures_test.go
+++ /dev/null
@@ -1,105 +0,0 @@
-//go:build e2e
-
-package kiro
-
-import (
-	"encoding/json"
-	"testing"
-
-	e2e "github.com/entireio/external-agents/e2e"
-)
-
-// KiroTranscript builds Kiro-format transcript files for testing.
-type KiroTranscript struct {
-	ConversationID string             `json:"conversation_id"`
-	History        []kiroHistoryEntry `json:"history"`
-}
-
-type kiroHistoryEntry struct {
-	User      kiroUserMessage `json:"user"`
-	Assistant json.RawMessage `json:"assistant"`
-}
-
-type kiroUserMessage struct {
-	Content   json.RawMessage `json:"content"`
-	Timestamp string          `json:"timestamp,omitempty"`
-}
-
-// NewKiroTranscript creates a new transcript builder.
-func NewKiroTranscript(id string) *KiroTranscript {
-	return &KiroTranscript{ConversationID: id}
-}
-
-func marshalPromptContent(prompt string) json.RawMessage {
-	content, _ := json.Marshal(map[string]interface{}{
-		"Prompt": map[string]string{"prompt": prompt},
-	})
-	return content
-}
-
-// AddPrompt adds a user prompt entry with no assistant response.
-func (kt *KiroTranscript) AddPrompt(prompt string) *KiroTranscript {
-	kt.History = append(kt.History, kiroHistoryEntry{
-		User: kiroUserMessage{Content: marshalPromptContent(prompt)},
-	})
-	return kt
-}
-
-// AddPromptWithFileEdit adds a user prompt paired with an assistant response that contains a file edit tool use.
-func (kt *KiroTranscript) AddPromptWithFileEdit(prompt, filePath string) *KiroTranscript {
-	toolUse := map[string]interface{}{
-		"ToolUse": map[string]interface{}{
-			"message_id": "msg-001",
-			"tool_uses": []map[string]interface{}{
-				{
-					"id":   "tool-001",
-					"name": "fs_write",
-					"args": map[string]string{"path": filePath},
-				},
-			},
-		},
-	}
-	assistantContent, _ := json.Marshal(toolUse)
-
-	kt.History = append(kt.History, kiroHistoryEntry{
-		User:      kiroUserMessage{Content: marshalPromptContent(prompt)},
-		Assistant: assistantContent,
-	})
-	return kt
-}
-
-// AddResponse adds a user prompt paired with an assistant text response.
-func (kt *KiroTranscript) AddResponse(prompt, response string) *KiroTranscript {
-	userContent := marshalPromptContent(prompt)
-
-	responseContent := map[string]interface{}{
-		"Response": map[string]interface{}{
-			"message_id": "msg-resp",
-			"content":    response,
-		},
-	}
-	assistantContent, _ := json.Marshal(responseContent)
-
-	kt.History = append(kt.History, kiroHistoryEntry{
-		User:      kiroUserMessage{Content: userContent},
-		Assistant: assistantContent,
-	})
-	return kt
-}
-
-// JSON returns the JSON-encoded transcript string.
-func (kt *KiroTranscript) JSON(t *testing.T) string {
-	t.Helper()
-	data, err := json.Marshal(kt)
-	if err != nil {
-		t.Fatalf("marshal KiroTranscript: %v", err)
-	}
-	return string(data)
-}
-
-// WriteToFile writes the transcript to a file and returns the absolute path.
-func (kt *KiroTranscript) WriteToFile(t *testing.T, env *e2e.TestEnv, relPath string) string {
-	t.Helper()
-	env.WriteFile(relPath, kt.JSON(t))
-	return env.AbsPath(relPath)
-}
diff --git a/e2e/kiro/kiro_test.go b/e2e/kiro/kiro_test.go
deleted file mode 100644
index d4d3011..0000000
--- a/e2e/kiro/kiro_test.go
+++ /dev/null
@@ -1,613 +0,0 @@
-//go:build e2e
-
-package kiro
-
-import (
-	"encoding/json"
-	"strings"
-	"testing"
-
-	e2e "github.com/entireio/external-agents/e2e"
-)
-
-// --- Identity ---
-
-func TestKiro_Info(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	var resp struct {
-		ProtocolVersion int      `json:"protocol_version"`
-		Name            string   `json:"name"`
-		Type            string   `json:"type"`
-		Description     string   `json:"description"`
-		IsPreview       bool     `json:"is_preview"`
-		ProtectedDirs   []string `json:"protected_dirs"`
-		HookNames       []string `json:"hook_names"`
-		Capabilities    struct {
-			Hooks              bool `json:"hooks"`
-			TranscriptAnalyzer bool `json:"transcript_analyzer"`
-		} `json:"capabilities"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "info")
-
-	if resp.ProtocolVersion != 1 {
-		t.Errorf("protocol_version = %d, want 1", resp.ProtocolVersion)
-	}
-	if resp.Name != "kiro" {
-		t.Errorf("name = %q, want %q", resp.Name, "kiro")
-	}
-	if resp.Type != "Kiro" {
-		t.Errorf("type = %q, want %q", resp.Type, "Kiro")
-	}
-	if !resp.Capabilities.Hooks {
-		t.Error("capabilities.hooks should be true")
-	}
-	if !resp.Capabilities.TranscriptAnalyzer {
-		t.Error("capabilities.transcript_analyzer should be true")
-	}
-	if len(resp.HookNames) != 5 {
-		t.Errorf("hook_names count = %d, want 5", len(resp.HookNames))
-	}
-	if len(resp.ProtectedDirs) != 1 || resp.ProtectedDirs[0] != ".kiro" {
-		t.Errorf("protected_dirs = %v, want [.kiro]", resp.ProtectedDirs)
-	}
-}
-
-func TestKiro_Detect_Present(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t) // has .kiro/
-
-	var resp struct {
-		Present bool `json:"present"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "detect")
-
-	if !resp.Present {
-		t.Error("detect should return present=true when .kiro/ exists")
-	}
-}
-
-func TestKiro_Detect_Absent(t *testing.T) {
-	t.Parallel()
-	env := e2e.NewTestEnvWithBinary(t, kiroBinary) // no .kiro/
-
-	var resp struct {
-		Present bool `json:"present"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "detect")
-
-	if resp.Present {
-		t.Error("detect should return present=false when .kiro/ is absent")
-	}
-}
-
-// --- Session Management ---
-
-func TestKiro_GetSessionID(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	input := e2e.HookInput{SessionID: "test-session-123"}
-
-	var resp struct {
-		SessionID string `json:"session_id"`
-	}
-	env.Runner.RunJSON(t, &resp, input.JSON(t), "get-session-id")
-
-	if resp.SessionID != "test-session-123" {
-		t.Errorf("session_id = %q, want %q", resp.SessionID, "test-session-123")
-	}
-}
-
-func TestKiro_GetSessionID_Generated(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	input := e2e.HookInput{}
-
-	var resp struct {
-		SessionID string `json:"session_id"`
-	}
-	env.Runner.RunJSON(t, &resp, input.JSON(t), "get-session-id")
-
-	if resp.SessionID == "" {
-		t.Error("session_id should not be empty when no ID provided")
-	}
-}
-
-func TestKiro_GetSessionDir(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	var resp struct {
-		SessionDir string `json:"session_dir"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "get-session-dir", "-repo-path", env.Dir)
-
-	want := env.AbsPath(".entire/tmp")
-	if resp.SessionDir != want {
-		t.Errorf("session_dir = %q, want %q", resp.SessionDir, want)
-	}
-}
-
-func TestKiro_ResolveSessionFile(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	sessionDir := env.AbsPath(".entire/tmp")
-
-	var resp struct {
-		SessionFile string `json:"session_file"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "resolve-session-file",
-		"-session-dir", sessionDir,
-		"-session-id", "abc-123")
-
-	want := sessionDir + "/abc-123.json"
-	if resp.SessionFile != want {
-		t.Errorf("session_file = %q, want %q", resp.SessionFile, want)
-	}
-}
-
-func TestKiro_WriteAndReadSession(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	sessionRef := env.AbsPath(".entire/tmp/sess-write-test.json")
-
-	// Write a session
-	writeInput := map[string]interface{}{
-		"session_id":  "sess-write-test",
-		"agent_name":  "kiro",
-		"repo_path":   env.Dir,
-		"session_ref": sessionRef,
-		"start_time":  "2026-01-01T00:00:00Z",
-		"native_data": []byte(`{"hello":"world"}`),
-	}
-	writeJSON, _ := json.Marshal(writeInput)
-	env.Runner.MustSucceed(t, string(writeJSON), "write-session")
-
-	// Verify the file was written
-	if !env.FileExists(".entire/tmp/sess-write-test.json") {
-		t.Fatal("session file was not written")
-	}
-
-	// Read it back
-	readInput := e2e.HookInput{
-		SessionID:  "sess-write-test",
-		SessionRef: sessionRef,
-	}
-	var resp struct {
-		SessionID  string `json:"session_id"`
-		AgentName  string `json:"agent_name"`
-		NativeData []byte `json:"native_data"`
-	}
-	env.Runner.RunJSON(t, &resp, readInput.JSON(t), "read-session")
-
-	if resp.SessionID != "sess-write-test" {
-		t.Errorf("session_id = %q, want %q", resp.SessionID, "sess-write-test")
-	}
-	if resp.AgentName != "kiro" {
-		t.Errorf("agent_name = %q, want %q", resp.AgentName, "kiro")
-	}
-}
-
-// --- Transcript ---
-
-func TestKiro_ReadTranscript(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	transcript := NewKiroTranscript("conv-1").AddPrompt("hello").AddResponse("summarize", "done")
-	transcriptPath := transcript.WriteToFile(t, env, "transcript.json")
-
-	result := env.Runner.MustSucceed(t, "", "read-transcript", "-session-ref", transcriptPath)
-	if len(result.Stdout) == 0 {
-		t.Error("read-transcript returned empty stdout")
-	}
-
-	// Should be valid JSON
-	var parsed map[string]interface{}
-	if err := json.Unmarshal(result.Stdout, &parsed); err != nil {
-		t.Fatalf("read-transcript output is not valid JSON: %v", err)
-	}
-}
-
-func TestKiro_ChunkTranscript(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	content := strings.Repeat("abcdefghij", 10) // 100 bytes
-
-	var resp struct {
-		Chunks [][]byte `json:"chunks"`
-	}
-	env.Runner.RunJSON(t, &resp, content, "chunk-transcript", "-max-size", "30")
-
-	if len(resp.Chunks) < 3 {
-		t.Errorf("expected at least 3 chunks for 100 bytes with max-size 30, got %d", len(resp.Chunks))
-	}
-}
-
-func TestKiro_ReassembleTranscript(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	// First chunk the content
-	content := "hello world this is a test transcript"
-	var chunkResp struct {
-		Chunks [][]byte `json:"chunks"`
-	}
-	env.Runner.RunJSON(t, &chunkResp, content, "chunk-transcript", "-max-size", "10")
-
-	// Now reassemble
-	reassembleInput, _ := json.Marshal(chunkResp)
-	result := env.Runner.MustSucceed(t, string(reassembleInput), "reassemble-transcript")
-
-	if string(result.Stdout) != content {
-		t.Errorf("reassembled = %q, want %q", result.Stdout, content)
-	}
-}
-
-// --- Hooks ---
-
-func TestKiro_ParseHook_Spawn(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	var resp struct {
-		Type      int    `json:"type"`
-		SessionID string `json:"session_id"`
-		Timestamp string `json:"timestamp"`
-	}
-	env.Runner.RunJSON(t, &resp, "{}", "parse-hook", "-hook", "agent-spawn")
-
-	if resp.Type != 1 {
-		t.Errorf("type = %d, want 1", resp.Type)
-	}
-	if resp.SessionID == "" {
-		t.Error("session_id should not be empty")
-	}
-	if resp.Timestamp == "" {
-		t.Error("timestamp should not be empty")
-	}
-}
-
-func TestKiro_ParseHook_PromptSubmit(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	input := e2e.ParseHookInput{Prompt: "do the thing"}
-
-	var resp struct {
-		Type      int    `json:"type"`
-		SessionID string `json:"session_id"`
-		Prompt    string `json:"prompt"`
-	}
-	env.Runner.RunJSON(t, &resp, input.JSON(t), "parse-hook", "-hook", "user-prompt-submit")
-
-	if resp.Type != 2 {
-		t.Errorf("type = %d, want 2", resp.Type)
-	}
-	if resp.Prompt != "do the thing" {
-		t.Errorf("prompt = %q, want %q", resp.Prompt, "do the thing")
-	}
-}
-
-func TestKiro_ParseHook_PreToolUse(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	result := env.Runner.MustSucceed(t, "{}", "parse-hook", "-hook", "pre-tool-use")
-
-	if got := strings.TrimSpace(string(result.Stdout)); got != "null" {
-		t.Errorf("pre-tool-use should return null, got %q", got)
-	}
-}
-
-func TestKiro_ParseHook_Stop(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	input := e2e.ParseHookInput{CWD: env.Dir}
-
-	var resp struct {
-		Type      int    `json:"type"`
-		SessionID string `json:"session_id"`
-	}
-	env.Runner.RunJSON(t, &resp, input.JSON(t), "parse-hook", "-hook", "stop")
-
-	if resp.Type != 3 {
-		t.Errorf("type = %d, want 3", resp.Type)
-	}
-	if resp.SessionID == "" {
-		t.Error("session_id should not be empty")
-	}
-}
-
-func TestKiro_InstallHooks(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	var resp struct {
-		HooksInstalled int `json:"hooks_installed"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "install-hooks")
-
-	if resp.HooksInstalled == 0 {
-		t.Error("hooks_installed should be > 0")
-	}
-
-	// Verify files were created
-	if !env.FileExists(".kiro/agents/entire.json") {
-		t.Error(".kiro/agents/entire.json should exist after install")
-	}
-	if !env.FileExists(".kiro/hooks/entire-stop.kiro.hook") {
-		t.Error(".kiro/hooks/entire-stop.kiro.hook should exist after install")
-	}
-	if !env.FileExists(".kiro/hooks/entire-prompt-submit.kiro.hook") {
-		t.Error(".kiro/hooks/entire-prompt-submit.kiro.hook should exist after install")
-	}
-}
-
-func TestKiro_UninstallHooks(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	// Install first
-	env.Runner.MustSucceed(t, "", "install-hooks")
-
-	// Verify installed
-	if !env.FileExists(".kiro/agents/entire.json") {
-		t.Fatal("hooks should be installed before uninstall test")
-	}
-
-	// Uninstall
-	env.Runner.MustSucceed(t, "", "uninstall-hooks")
-
-	if env.FileExists(".kiro/agents/entire.json") {
-		t.Error(".kiro/agents/entire.json should be removed after uninstall")
-	}
-	if env.FileExists(".kiro/hooks/entire-stop.kiro.hook") {
-		t.Error(".kiro/hooks/entire-stop.kiro.hook should be removed after uninstall")
-	}
-}
-
-func TestKiro_AreHooksInstalled_No(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	var resp struct {
-		Installed bool `json:"installed"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "are-hooks-installed")
-
-	if resp.Installed {
-		t.Error("hooks should not be installed in fresh env")
-	}
-}
-
-func TestKiro_AreHooksInstalled_Yes(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	env.Runner.MustSucceed(t, "", "install-hooks")
-
-	var resp struct {
-		Installed bool `json:"installed"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "are-hooks-installed")
-
-	if !resp.Installed {
-		t.Error("hooks should be installed after install-hooks")
-	}
-}
-
-// --- Transcript Analysis ---
-
-func TestKiro_GetTranscriptPosition(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	transcript := NewKiroTranscript("conv-pos").
-		AddPrompt("first").
-		AddPrompt("second").
-		AddResponse("third", "response")
-	path := transcript.WriteToFile(t, env, "pos-transcript.json")
-
-	var resp struct {
-		Position int `json:"position"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "get-transcript-position", "-path", path)
-
-	if resp.Position != 3 {
-		t.Errorf("position = %d, want 3", resp.Position)
-	}
-}
-
-func TestKiro_GetTranscriptPosition_Missing(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	var resp struct {
-		Position int `json:"position"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "get-transcript-position", "-path", env.AbsPath("nonexistent.json"))
-
-	if resp.Position != 0 {
-		t.Errorf("position for missing file = %d, want 0", resp.Position)
-	}
-}
-
-func TestKiro_ExtractModifiedFiles(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	transcript := NewKiroTranscript("conv-files").
-		AddPromptWithFileEdit("create file", "/tmp/foo.go").
-		AddPromptWithFileEdit("edit file", "/tmp/bar.go").
-		AddPrompt("no edits here")
-	path := transcript.WriteToFile(t, env, "files-transcript.json")
-
-	var resp struct {
-		Files           []string `json:"files"`
-		CurrentPosition int      `json:"current_position"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "extract-modified-files", "-path", path, "-offset", "0")
-
-	if len(resp.Files) != 2 {
-		t.Errorf("files count = %d, want 2: %v", len(resp.Files), resp.Files)
-	}
-	if resp.CurrentPosition != 3 {
-		t.Errorf("current_position = %d, want 3", resp.CurrentPosition)
-	}
-}
-
-func TestKiro_ExtractPrompts(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	transcript := NewKiroTranscript("conv-prompts").
-		AddPrompt("first prompt").
-		AddResponse("second prompt", "some response").
-		AddPrompt("third prompt")
-	path := transcript.WriteToFile(t, env, "prompts-transcript.json")
-
-	var resp struct {
-		Prompts []string `json:"prompts"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "extract-prompts", "-session-ref", path, "-offset", "0")
-
-	if len(resp.Prompts) != 3 {
-		t.Errorf("prompts count = %d, want 3: %v", len(resp.Prompts), resp.Prompts)
-	}
-}
-
-func TestKiro_ExtractPrompts_WithOffset(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	transcript := NewKiroTranscript("conv-prompts-offset").
-		AddPrompt("first").
-		AddPrompt("second").
-		AddPrompt("third")
-	path := transcript.WriteToFile(t, env, "prompts-offset.json")
-
-	var resp struct {
-		Prompts []string `json:"prompts"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "extract-prompts", "-session-ref", path, "-offset", "2")
-
-	if len(resp.Prompts) != 1 {
-		t.Errorf("prompts count = %d, want 1: %v", len(resp.Prompts), resp.Prompts)
-	}
-}
-
-func TestKiro_ExtractSummary(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	transcript := NewKiroTranscript("conv-summary").
-		AddResponse("do the thing", "I completed the task successfully")
-	path := transcript.WriteToFile(t, env, "summary-transcript.json")
-
-	var resp struct {
-		Summary    string `json:"summary"`
-		HasSummary bool   `json:"has_summary"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "extract-summary", "-session-ref", path)
-
-	if !resp.HasSummary {
-		t.Error("has_summary should be true")
-	}
-	if resp.Summary != "I completed the task successfully" {
-		t.Errorf("summary = %q, want %q", resp.Summary, "I completed the task successfully")
-	}
-}
-
-func TestKiro_ExtractSummary_Empty(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	transcript := NewKiroTranscript("conv-no-summary").AddPrompt("hello")
-	path := transcript.WriteToFile(t, env, "no-summary.json")
-
-	var resp struct {
-		Summary    string `json:"summary"`
-		HasSummary bool   `json:"has_summary"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "extract-summary", "-session-ref", path)
-
-	if resp.HasSummary {
-		t.Error("has_summary should be false for prompt-only transcript")
-	}
-}
-
-// --- Other ---
-
-func TestKiro_FormatResumeCommand(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	var resp struct {
-		Command string `json:"command"`
-	}
-	env.Runner.RunJSON(t, &resp, "", "format-resume-command", "-session-id", "session-xyz")
-
-	if resp.Command == "" {
-		t.Error("command should not be empty")
-	}
-	if !strings.Contains(resp.Command, "resume") {
-		t.Errorf("command %q should contain 'resume'", resp.Command)
-	}
-}
-
-func TestKiro_UnknownSubcommand(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	result := env.Runner.MustFail(t, "", "nonexistent-command")
-	if !strings.Contains(string(result.Stderr), "unknown subcommand") {
-		t.Errorf("stderr should mention 'unknown subcommand', got: %s", result.Stderr)
-	}
-}
-
-func TestKiro_NoSubcommand(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	// Run with no args — the binary expects at least a subcommand
-	result := env.Runner.Run("", "")
-	// The binary wraps os.Args[1] so passing empty string gives "unknown subcommand: "
-	// which exits non-zero. Either way it should not succeed cleanly.
-	// Actually passing "" as subcommand will invoke the binary with "" as the arg.
-	if result.ExitCode == 0 {
-		t.Error("expected non-zero exit for empty subcommand")
-	}
-}
-
-// --- Install + Idempotency ---
-
-func TestKiro_InstallHooks_Idempotent(t *testing.T) {
-	t.Parallel()
-	env := NewKiroTestEnv(t)
-
-	// First install
-	var resp1 struct {
-		HooksInstalled int `json:"hooks_installed"`
-	}
-	env.Runner.RunJSON(t, &resp1, "", "install-hooks")
-	if resp1.HooksInstalled == 0 {
-		t.Fatal("first install should install hooks")
-	}
-
-	// Second install should be a no-op (returns 0 installed)
-	var resp2 struct {
-		HooksInstalled int `json:"hooks_installed"`
-	}
-	env.Runner.RunJSON(t, &resp2, "", "install-hooks")
-	if resp2.HooksInstalled != 0 {
-		t.Errorf("second install should be idempotent (0 hooks), got %d", resp2.HooksInstalled)
-	}
-}
diff --git a/e2e/kiro/setup_test.go b/e2e/kiro/setup_test.go
deleted file mode 100644
index 84e10c6..0000000
--- a/e2e/kiro/setup_test.go
+++ /dev/null
@@ -1,37 +0,0 @@
-//go:build e2e
-
-package kiro
-
-import (
-	"fmt"
-	"os"
-	"testing"
-
-	e2e "github.com/entireio/external-agents/e2e"
-)
-
-// kiroBinary holds the path to the built entire-agent-kiro binary.
-var kiroBinary string
-
-func TestMain(m *testing.M) {
-	tmpDir, err := os.MkdirTemp("", "e2e-kiro-*")
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "failed to create temp dir: %v\n", err)
-		os.Exit(1)
-	}
-	defer os.RemoveAll(tmpDir)
-
-	fmt.Println("Building entire-agent-kiro...")
-	binPath, err := e2e.BuildAgent("entire-agent-kiro", tmpDir)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "failed to build entire-agent-kiro: %v\n", err)
-		os.Exit(1)
-	}
-	kiroBinary = binPath
-	fmt.Printf("Built entire-agent-kiro -> %s\n", binPath)
-
-	// Isolate git config to prevent user's ~/.gitconfig from interfering.
-	os.Setenv("GIT_CONFIG_GLOBAL", "/dev/null")
-
-	os.Exit(m.Run())
-}
diff --git a/e2e/kiro/testenv_test.go b/e2e/kiro/testenv_test.go
deleted file mode 100644
index 5e60330..0000000
--- a/e2e/kiro/testenv_test.go
+++ /dev/null
@@ -1,26 +0,0 @@
-//go:build e2e
-
-package kiro
-
-import (
-	"testing"
-
-	e2e "github.com/entireio/external-agents/e2e"
-)
-
-// NewKiroTestEnv creates a test environment with .kiro/ and .entire/tmp/ directories.
-func NewKiroTestEnv(t *testing.T) *e2e.TestEnv {
-	t.Helper()
-	te := e2e.NewTestEnvWithBinary(t, kiroBinary)
-	te.MkdirAll(".kiro")
-	te.MkdirAll(".entire/tmp")
-	return te
-}
-
-// NewKiroGitEnv creates a Kiro test environment with git init.
-func NewKiroGitEnv(t *testing.T) *e2e.TestEnv {
-	t.Helper()
-	te := NewKiroTestEnv(t)
-	te.GitInit()
-	return te
-}
diff --git a/e2e/lifecycle_test.go b/e2e/lifecycle_test.go
index 2358518..63a0e43 100644
--- a/e2e/lifecycle_test.go
+++ b/e2e/lifecycle_test.go
@@ -4,7 +4,9 @@ package e2e
 
 import (
 	"context"
+	"encoding/json"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"testing"
 	"time"
@@ -80,22 +82,7 @@ func TestLifecycle_HooksInstalledAfterEnable(t *testing.T) {
 			t.Skipf("%s binary not built", agentBinName)
 		}
 
-		runner := &AgentRunner{
-			BinaryPath: binPath,
-			Env: []string{
-				"ENTIRE_REPO_ROOT=" + s.Dir,
-				"HOME=" + os.Getenv("HOME"),
-				"PATH=" + os.Getenv("PATH"),
-				"LANG=en_US.UTF-8",
-			},
-		}
-
-		var resp struct {
-			Installed bool `json:"installed"`
-		}
-		runner.RunJSON(t, &resp, "", "are-hooks-installed")
-
-		assert.True(t, resp.Installed, "hooks should be installed after entire enable")
+		assert.True(t, hooksInstalled(t, binPath, s.Dir), "hooks should be installed after entire enable")
 	})
 }
 
@@ -222,3 +209,27 @@ func TestLifecycle_InteractiveSession(t *testing.T) {
 		testutil.WaitForCheckpoint(t, s, 30*time.Second)
 	})
 }
+
+func hooksInstalled(t *testing.T, binPath, repoRoot string) bool {
+	t.Helper()
+
+	cmd := exec.Command(binPath, "are-hooks-installed")
+	cmd.Env = append(os.Environ(),
+		"ENTIRE_REPO_ROOT="+repoRoot,
+		"LANG=en_US.UTF-8",
+	)
+
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("%s are-hooks-installed failed: %v\n%s", binPath, err, out)
+	}
+
+	var resp struct {
+		Installed bool `json:"installed"`
+	}
+	if err := json.Unmarshal(out, &resp); err != nil {
+		t.Fatalf("parse are-hooks-installed response: %v\nraw output: %s", err, out)
+	}
+
+	return resp.Installed
+}
diff --git a/e2e/testenv.go b/e2e/testenv.go
deleted file mode 100644
index db52ebf..0000000
--- a/e2e/testenv.go
+++ /dev/null
@@ -1,145 +0,0 @@
-//go:build e2e
-
-package e2e
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"testing"
-)
-
-// TestEnv provides an isolated filesystem environment for E2E tests.
-type TestEnv struct {
-	t       *testing.T
-	Dir     string
-	HomeDir string
-	Runner  *AgentRunner
-}
-
-// NewTestEnv creates a bare test environment with ENTIRE_REPO_ROOT and isolated HOME.
-func NewTestEnv(t *testing.T, agentName string) *TestEnv {
-	t.Helper()
-
-	binPath, ok := AgentBinaries[agentName]
-	if !ok {
-		t.Fatalf("agent binary not found: %s (available: %v)", agentName, agentBinaryNames())
-	}
-
-	dir := t.TempDir()
-	homeDir := t.TempDir()
-
-	env := baseEnv(dir, homeDir)
-
-	return &TestEnv{
-		t:       t,
-		Dir:     dir,
-		HomeDir: homeDir,
-		Runner: &AgentRunner{
-			BinaryPath: binPath,
-			Env:        env,
-		},
-	}
-}
-
-// NewTestEnvWithBinary creates a test environment using an explicit binary path.
-// Use this from subpackages that build their own agent binary in TestMain.
-func NewTestEnvWithBinary(t *testing.T, binPath string) *TestEnv {
-	t.Helper()
-
-	dir := t.TempDir()
-	homeDir := t.TempDir()
-
-	env := baseEnv(dir, homeDir)
-
-	return &TestEnv{
-		t:       t,
-		Dir:     dir,
-		HomeDir: homeDir,
-		Runner: &AgentRunner{
-			BinaryPath: binPath,
-			Env:        env,
-		},
-	}
-}
-
-// WriteFile writes content to a path relative to the test environment root.
-func (e *TestEnv) WriteFile(relPath, content string) {
-	e.t.Helper()
-	abs := filepath.Join(e.Dir, relPath)
-	if err := os.MkdirAll(filepath.Dir(abs), 0o750); err != nil {
-		e.t.Fatalf("mkdir for %s: %v", relPath, err)
-	}
-	if err := os.WriteFile(abs, []byte(content), 0o600); err != nil {
-		e.t.Fatalf("write %s: %v", relPath, err)
-	}
-}
-
-// WriteJSON writes a JSON-encoded value to a path relative to the test root.
-func (e *TestEnv) WriteJSON(relPath string, v any) {
-	e.t.Helper()
-	data, err := json.Marshal(v)
-	if err != nil {
-		e.t.Fatalf("marshal JSON for %s: %v", relPath, err)
-	}
-	e.WriteFile(relPath, string(data))
-}
-
-// ReadFile reads a file relative to the test environment root.
-func (e *TestEnv) ReadFile(relPath string) string {
-	e.t.Helper()
-	data, err := os.ReadFile(filepath.Join(e.Dir, relPath))
-	if err != nil {
-		e.t.Fatalf("read %s: %v", relPath, err)
-	}
-	return string(data)
-}
-
-// FileExists checks whether a relative path exists in the test environment.
-func (e *TestEnv) FileExists(relPath string) bool {
-	_, err := os.Stat(filepath.Join(e.Dir, relPath))
-	return err == nil
-}
-
-// MkdirAll creates a directory (and parents) relative to the test root.
-func (e *TestEnv) MkdirAll(relPath string) {
-	e.t.Helper()
-	if err := os.MkdirAll(filepath.Join(e.Dir, relPath), 0o750); err != nil {
-		e.t.Fatalf("mkdir %s: %v", relPath, err)
-	}
-}
-
-// GitInit initializes a git repo in the test environment root.
-func (e *TestEnv) GitInit() {
-	e.t.Helper()
-	cmd := exec.Command("git", "init")
-	cmd.Dir = e.Dir
-	cmd.Env = e.Runner.Env
-	if out, err := cmd.CombinedOutput(); err != nil {
-		e.t.Fatalf("git init failed: %v\n%s", err, out)
-	}
-}
-
-// AbsPath returns the absolute path for a relative path in the test environment.
-func (e *TestEnv) AbsPath(relPath string) string {
-	return filepath.Join(e.Dir, relPath)
-}
-
-func baseEnv(repoRoot, homeDir string) []string {
-	return []string{
-		fmt.Sprintf("ENTIRE_REPO_ROOT=%s", repoRoot),
-		fmt.Sprintf("HOME=%s", homeDir),
-		fmt.Sprintf("PATH=%s", os.Getenv("PATH")),
-		"LANG=en_US.UTF-8",
-	}
-}
-
-func agentBinaryNames() []string {
-	names := make([]string, 0, len(AgentBinaries))
-	for name := range AgentBinaries {
-		names = append(names, name)
-	}
-	return names
-}

From 65c422ab9ad18208c131707809291b122c32696f Mon Sep 17 00:00:00 2001
From: Andrea Nodari <andrea@entire.io>
Date: Tue, 24 Mar 2026 16:59:44 +0100
Subject: [PATCH 2/2] Address bugbot comment

---
 e2e/lifecycle_test.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/e2e/lifecycle_test.go b/e2e/lifecycle_test.go
index 63a0e43..eb127bf 100644
--- a/e2e/lifecycle_test.go
+++ b/e2e/lifecycle_test.go
@@ -219,9 +219,12 @@ func hooksInstalled(t *testing.T, binPath, repoRoot string) bool {
 		"LANG=en_US.UTF-8",
 	)
 
-	out, err := cmd.CombinedOutput()
+	out, err := cmd.Output()
 	if err != nil {
-		t.Fatalf("%s are-hooks-installed failed: %v\n%s", binPath, err, out)
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			t.Fatalf("%s are-hooks-installed failed: %v\nstdout: %s\nstderr: %s", binPath, err, out, exitErr.Stderr)
+		}
+		t.Fatalf("%s are-hooks-installed failed: %v\nstdout: %s", binPath, err, out)
 	}
 
 	var resp struct {