From 1fdb39c822ec85ae216f957bd3926b66796af34d Mon Sep 17 00:00:00 2001 From: Andrea Nodari Date: Tue, 24 Mar 2026 15:48:43 +0100 Subject: [PATCH 1/2] Add external-agents-tests --- .claude/skills/entire-external-agent/SKILL.md | 126 ++-- .../skills/entire-external-agent/implement.md | 299 +++------ .../entire-external-agent/write-tests.md | 325 +++------- .github/workflows/protocol-compliance.yml | 65 ++ AGENTS.md | 11 +- Makefile | 26 +- README.md | 52 +- agents/entire-agent-kiro/README.md | 30 +- .../kiro/.entire/tmp/kiro-tool-calls.jsonl | 6 + e2e/README.md | 21 +- e2e/fixtures.go | 50 -- e2e/harness.go | 88 --- e2e/kiro/fixtures_test.go | 105 --- e2e/kiro/kiro_test.go | 613 ------------------ e2e/kiro/setup_test.go | 37 -- e2e/kiro/testenv_test.go | 26 - e2e/lifecycle_test.go | 43 +- e2e/testenv.go | 145 ----- 18 files changed, 398 insertions(+), 1670 deletions(-) create mode 100644 .github/workflows/protocol-compliance.yml create mode 100644 agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl delete mode 100644 e2e/fixtures.go delete mode 100644 e2e/harness.go delete mode 100644 e2e/kiro/fixtures_test.go delete mode 100644 e2e/kiro/kiro_test.go delete mode 100644 e2e/kiro/setup_test.go delete mode 100644 e2e/kiro/testenv_test.go delete mode 100644 e2e/testenv.go diff --git a/.claude/skills/entire-external-agent/SKILL.md b/.claude/skills/entire-external-agent/SKILL.md index 7bd84ea..883c26a 100644 --- a/.claude/skills/entire-external-agent/SKILL.md +++ b/.claude/skills/entire-external-agent/SKILL.md @@ -2,101 +2,111 @@ name: entire-external-agent description: > Run all three external agent binary phases sequentially: research, write-tests, - and implement using E2E-first TDD (unit tests written last). - Accepts an optional argument to run a single phase: research, write-tests, or implement. - Usage: /entire-external-agent [phase] — omit phase to run full pipeline. - Use when the user says "build external agent", "create agent binary", - "external agent plugin", or wants to run the full pipeline end-to-end. + and implement using black-box-first TDD across protocol compliance, lifecycle + integration, and unit tests. Accepts an optional argument to run a single phase: + research, write-tests, or implement. --- # External Agent Binary — Full Pipeline -Build a standalone external agent binary that implements the Entire CLI's external agent protocol using E2E-first TDD. Parameters are collected once and reused across all phases. +Build a standalone external agent binary that implements the Entire CLI external agent protocol. + +The current test split is: + +1. **Protocol compliance** lives in `external-agents-tests`. +2. **Lifecycle integration** lives in this repo's `e2e/` harness. +3. **Unit tests** live in each agent module. + +Do not add new generic protocol tests under this repo's `e2e/` directory. ## Parameters -Collect these before starting (ask the user if not provided): +Collect these before starting if the user did not provide them: -| Parameter | Description | How to derive | -|-----------|-------------|---------------| -| `AGENT_NAME` | Human-readable name (e.g., "Windsurf") | User provides | -| `AGENT_SLUG` | Binary suffix: `entire-agent-` (kebab-case) | Kebab-case of agent name | -| `LANGUAGE` | Implementation language (Go, Python, TypeScript, Rust) | User provides; default Go | -| `PROJECT_DIR` | Where to create the project | Default: `./entire-agent-` | -| `CAPABILITIES` | Which optional capabilities to implement | Derived from research phase | -| `ENTIRE_BIN` | Path to the Entire CLI binary | Default: `entire` from PATH, or `E2E_ENTIRE_BIN` env | +| Parameter | Description | Default | +|-----------|-------------|---------| +| `AGENT_NAME` | Human-readable name (for example, `Windsurf`) | User-provided | +| `AGENT_SLUG` | Binary suffix for `entire-agent-` | Kebab-case of `AGENT_NAME` | +| `LANGUAGE` | Implementation language | `Go` | +| `PROJECT_DIR` | Agent directory to create or edit | `./agents/entire-agent-` | +| `ENTIRE_BIN` | Path to the Entire CLI binary for lifecycle testing | `entire` from `PATH` or `E2E_ENTIRE_BIN` | ## Phase Selection -This skill accepts an optional argument to run a single phase: +- `/entire-external-agent research` runs only Phase 1. +- `/entire-external-agent write-tests` runs only Phase 2. +- `/entire-external-agent implement` runs only Phase 3. +- `/entire-external-agent` runs all three phases in order. -- `/entire-external-agent research` — Run only Phase 1 (research) -- `/entire-external-agent write-tests` — Run only Phase 2 (scaffold + E2E tests) -- `/entire-external-agent implement` — Run only Phase 3 (E2E-first TDD implementation) -- `/entire-external-agent` (no argument) — Run all three phases sequentially - -If an argument is provided, skip directly to that phase's procedure. Parameters and prerequisites still apply — collect them before starting. +If a single phase is requested, still collect the shared parameters first. ## Protocol Spec Use the protocol specification at: `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md` -If a user provides a different protocol spec location explicitly, use that instead and pass it to each phase as `PROTOCOL_SPEC_LOCATION`. - -## Core Rule: E2E-First TDD +If the user gives a different spec location explicitly, use that instead. -This skill enforces strict E2E-first test-driven development. The rules: +## Core Rule: Black-Box-First TDD -1. **E2E tests are the spec.** The `e2e/` test harness defines what "working" means. The agent binary must pass all E2E tests to be considered complete. -2. **Run E2E tests at every step.** Each implementation tier starts by running the E2E test and watching it fail. You implement until it passes. No exceptions. -3. **Unit tests are written last.** After all E2E tiers pass, you write unit tests using real data collected from E2E runs as golden fixtures. -4. **If you didn't watch it fail, you don't know if it tests the right thing.** Never write a test you haven't seen fail first. -5. **Minimum viable fix.** At each E2E failure, implement only the code needed to fix that failure. Don't anticipate future tiers. +1. **Protocol compliance is the contract.** The binary must pass the shared `external-agents-tests` suite. +2. **Lifecycle tests prove real integration.** The repo-local `e2e/` harness covers the Entire + real-agent workflow and stays separate from generic protocol checks. +3. **Unit tests are written last.** After protocol and lifecycle behavior are working, add unit tests to lock down parsing, hooks, and file handling. +4. **Watch failures before fixing them.** Run the failing test first so you know what behavior the code must satisfy. +5. **Keep the fix scoped.** Implement only the behavior needed for the current failure, then rerun. ## Pipeline -Run these three phases in order. Each phase builds on the previous phase's output. - ### Phase 1: Research -Discover the target agent's hook mechanism, transcript format, session management, and configuration. Map native concepts to protocol subcommands. Produces `/AGENT.md` with protocol mapping and E2E prerequisites. - -Use the Read tool to read the file `.claude/skills/entire-external-agent/research.md` and follow the procedure it contains. +Discover the target agent's hook mechanism, transcript format, session layout, CLI entrypoints, and lifecycle prerequisites. Produce `/AGENT.md` with the protocol mapping and any real-CLI requirements needed for lifecycle tests. -**Expected output:** `/AGENT.md` — agent research one-pager with protocol mapping and E2E test prerequisites. +Use `.claude/skills/entire-external-agent/research.md`. -**Commit gate:** After the research phase completes, create a git commit for the resulting files. +Expected output: +- `/AGENT.md` -**Gate:** If the agent lacks any mechanism for lifecycle hooks or session management, discuss with the user before proceeding. Some agents may only support a subset of the protocol. +### Phase 2: Write Tests -### Phase 2: Write-Tests +Scaffold the binary and the test surfaces you will need: -Scaffold the binary with compilable stubs and create a self-contained `e2e/` test harness in the project directory. The harness exercises the full human workflow: `entire enable`, real agent invocation, hook firing, checkpoint validation. Tests are expected to fail at this stage — they define the spec. +- agent module structure under `` +- protocol compliance expectations compatible with `external-agents-tests` +- lifecycle adapter wiring in this repo's `e2e/` harness +- optional compliance fixtures if the agent benefits from stronger black-box detect or transcript assertions -Use the Read tool to read the file `.claude/skills/entire-external-agent/write-tests.md` and follow the procedure it contains. +Use `.claude/skills/entire-external-agent/write-tests.md`. -**Expected output:** Complete project directory at `` with compiled binary stubs and `e2e/` test harness that compiles but fails. +Expected output: +- compiling binary scaffold +- any needed lifecycle adapter files under `e2e/agents/` +- optional fixture file paths documented in `/AGENT.md` or `README.md` -**Commit gate:** After the scaffold compiles and the e2e harness compiles (`cd e2e && go test -c -tags=e2e`), create a git commit. +### Phase 3: Implement -### Phase 3: Implement (E2E-First, Unit Tests Last) +Implement until: -Build the real agent binary using strict E2E-first TDD. E2E tests drive development at every step — run each tier, watch it fail, implement the minimum fix, repeat. Unit tests are written only after all E2E tiers pass, using real data from E2E runs as golden fixtures. +- the binary passes protocol compliance +- lifecycle tests pass when the required CLIs are available +- unit tests cover the important internal behaviors -Use the Read tool to read the file `.claude/skills/entire-external-agent/implement.md` and follow the procedure it contains. +Use `.claude/skills/entire-external-agent/implement.md`. -**Expected output:** Fully implemented binary where all E2E tests pass and unit tests lock in behavior. - -**Note:** `AGENT.md` is a living document — Phases 2 and 3 update it when they discover new information during testing or implementation. +Expected output: +- fully working binary +- passing unit tests +- passing protocol compliance +- passing lifecycle integration where dependencies are available ## Final Summary -After all three phases, summarize: -- Agent name and binary name -- Language used -- Capabilities declared -- E2E test results (all tiers passing) -- Unit test coverage -- Installation instructions (`go install`, `pip install`, etc.) -- Any remaining gaps or TODOs +At the end, summarize: + +- agent name and binary name +- implementation language +- declared capabilities +- protocol compliance status +- lifecycle test status +- unit test coverage +- installation instructions +- any remaining gaps diff --git a/.claude/skills/entire-external-agent/implement.md b/.claude/skills/entire-external-agent/implement.md index 77762f1..dd8d412 100644 --- a/.claude/skills/entire-external-agent/implement.md +++ b/.claude/skills/entire-external-agent/implement.md @@ -1,270 +1,137 @@ --- name: implement description: > - Phase 3: Build the external agent binary using strict E2E-first TDD. - Use /entire-external-agent implement or /entire-external-agent:implement - when you only need the implementation phase. + Phase 3: Implement the external agent binary using protocol compliance first, + lifecycle integration second, and unit tests last. --- # Implement Procedure -Build the external agent binary using strict E2E-first TDD. E2E tests drive development at every step — run each tier, watch it fail, implement the minimum fix, repeat. Unit tests are written only after all E2E tiers pass, using real data from E2E runs as golden fixtures. +Implement the agent with black-box-first TDD. -> **Warning:** This phase involves iterative E2E test cycles with real agent invocations. Expect this to take 2-4 hours depending on agent complexity and API response times. +The order is: + +1. protocol compliance against `external-agents-tests` +2. lifecycle integration in this repo's `e2e/` harness +3. unit tests in the agent module ## Prerequisites Ensure the following are available: -- `AGENT_NAME`, `AGENT_SLUG`, `LANGUAGE`, `PROJECT_DIR` — from orchestrator or user -- `/AGENT.md` — research one-pager with E2E test prerequisites -- Scaffolded project that compiles and responds to `info` -- E2E test harness at `/e2e/` that compiles - -## Core Principle: E2E-First TDD - -1. **E2E tests are the spec.** The `e2e/` test harness defines what "working" means. You implement until tests pass. -2. **Watch it fail first.** Every E2E tier starts by running the test and observing the failure. If you haven't seen the failure, you don't understand what needs fixing. -3. **Minimum viable fix.** At each failure, implement only the code needed to make that specific assertion pass. Don't anticipate future tiers. -4. **No unit tests during Steps 3-9.** Unit tests are written in Step 11 after all E2E tiers pass, using real data from E2E runs as golden fixtures. -5. **Format and lint, don't unit test.** Between E2E tiers, run format/lint to keep code clean. No unit tests between tiers. -6. **If you didn't watch it fail, you don't know if it tests the right thing.** -**Do NOT write unit tests during Steps 3-9.** All unit test writing is consolidated in Step 11. +- `AGENT_NAME` +- `AGENT_SLUG` +- `PROJECT_DIR` +- `/AGENT.md` +- compiling scaffold from the write-tests phase -## Procedure +## Step 1: Read Before Coding -### Step 1: Read Protocol Spec + AGENT.md +Read: -Read these files before writing any code: +1. the protocol spec +2. the current agent code +3. `/AGENT.md` +4. the lifecycle adapter in `e2e/agents/.go` if it already exists -1. Read `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md` — full protocol spec -2. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/types.go` — JSON response types -3. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/external.go` — how the CLI calls each subcommand -4. Read `/AGENT.md` — agent-specific hook mechanism, transcript format, config structure, E2E prerequisites +## Step 2: Establish the First Failing Compliance Run -### Step 2: Verify Baseline +Build the binary and run the shared compliance suite first. -Build the binary and run the first E2E test to confirm it fails for the right reason (agent behavior, not harness bug). +If `external-agents-tests` is checked out beside this repo: ```bash -make build && make install -make test:e2e:run TEST=TestHookInstallAndDetect -``` - -**Expected:** Test fails because the agent binary returns stub data. If the test fails for a different reason (harness compilation error, missing binary, broken assertion), fix the harness first. - -### Step 3: E2E Tier 1 — `TestHookInstallAndDetect` - -**What it exercises:** -- `detect` — agent binary detection -- `install-hooks` — hook installation via `entire enable` -- `are-hooks-installed` — hook presence detection -- Basic binary invocation and JSON response format - -**Cycle:** - -1. Run: `make test:e2e:run TEST=TestHookInstallAndDetect` -2. **Watch it FAIL** — read the failure output carefully -3. Read the failure — what subcommand/behavior is missing? -4. Implement the MINIMUM code to fix the failure -5. Re-run until PASS -6. `make build` -7. Commit - -### Step 4: E2E Tier 2 — `TestSingleSessionManualCommit` - -The foundational test. Exercises the full agent lifecycle: start session → agent prompt → agent produces files → user commits → checkpoint created. - -**What it exercises:** -- `parse-hook` for all event types (session start, turn start, turn end, session end) -- `get-session-id` — session ID extraction from hook input -- `get-session-dir` / `resolve-session-file` — finding session/transcript files -- `read-session` / `write-session` — session data management -- `read-transcript` / `chunk-transcript` / `reassemble-transcript` — transcript handling - -**Cycle:** - -1. Run: `make test:e2e:run TEST=TestSingleSessionManualCommit` -2. **Watch it FAIL** — read the failure output carefully -3. Read the failure — which subcommand returns wrong data or errors? -4. Implement the MINIMUM code to fix the failure -5. Re-run until PASS -6. `make build` -7. Commit - -### Step 5: E2E Tier 3 — `TestCheckpointDeepValidation` - -Validates transcript quality: JSONL validity, content hash correctness, prompt extraction accuracy. - -**What it exercises:** -- `get-transcript-position` — transcript file size/position -- `extract-modified-files` — parsing transcript for file operations -- `extract-prompts` — parsing transcript for user messages -- `extract-summary` — parsing transcript for AI summaries - -**Cycle:** - -1. Run: `make test:e2e:run TEST=TestCheckpointDeepValidation` -2. **Watch it FAIL** — this test often exposes subtle transcript formatting bugs -3. Implement the MINIMUM fix -4. Re-run until PASS -5. `make build` -6. Commit - -### Step 6: E2E Tier 4 — `TestMultipleTurnsManualCommit` - -Multi-turn session management. Two sequential prompts, one commit. - -**What it exercises:** -- Session persistence across multiple prompts -- Transcript accumulation across turns -- Checkpoint capturing both turns - -**Cycle:** - -1. Run: `make test:e2e:run TEST=TestMultipleTurnsManualCommit` -2. **Watch it FAIL** -3. Implement the MINIMUM fix -4. Re-run until PASS -5. `make build` -6. Commit +cd +make build -### Step 7: E2E Tier 5 — `TestSessionMetadata` - -Agent identification in checkpoint metadata. - -**What it exercises:** -- Session metadata has correct agent name -- Session ID is properly stored -- Agent type field is populated - -**Cycle:** - -1. Run: `make test:e2e:run TEST=TestSessionMetadata` -2. **Watch it FAIL** -3. Implement the MINIMUM fix -4. Re-run until PASS -5. `make build` -6. Commit - -### Step 8: E2E Tier 6 — `TestInteractiveSession` - -Tmux-based interactive mode. **Skip if the agent doesn't support interactive mode** (check AGENT.md's E2E prerequisites). - -**What it exercises:** -- Interactive session launch -- Multi-step prompting within a session -- Session end on exit +cd ../../external-agents-tests +AGENT_BINARY=/abs/path/to/entire-agent- go test -v -count=1 ./... +``` -**Cycle:** +If that sibling repo is not available locally, use the GitHub Action in CI as the compliance source of truth and keep local validation focused on the binary plus unit tests. -1. Check AGENT.md — if interactive mode is not supported, skip this tier -2. Run: `make test:e2e:run TEST=TestInteractiveSession` -3. **Watch it FAIL** -4. Implement the MINIMUM fix -5. Re-run until PASS -6. `make build` -7. Commit +Do not start by adding new protocol tests to this repo. -### Step 9: E2E Tier 7 — `TestRewind` +## Step 3: Fix Compliance Failures Incrementally -Rewind functionality after a checkpoint. +For each failing compliance assertion: -**What it exercises:** -- Rewind command works on checkpoints created by this agent -- State is properly restored after rewind +1. rerun the failing test +2. inspect the exact subcommand behavior +3. implement the minimum fix +4. rerun until it passes -**Cycle:** +Areas the compliance suite typically drives: -1. Run: `make test:e2e:run TEST=TestRewind` -2. **Watch it FAIL** -3. Implement the MINIMUM fix -4. Re-run until PASS -5. `make build` -6. Commit +- `info` and capability declarations +- `detect` +- session helpers +- transcript chunking and reassembly +- session read/write behavior +- hooks capability +- transcript analysis capability -### Step 10: Full E2E Suite Pass +## Step 4: Run Lifecycle Tests -Run the complete E2E suite to catch any regressions: +Once protocol compliance is in good shape, validate the real integration path: ```bash -make test:e2e +cd /path/to/repo +make test-e2e AGENT= ``` -This runs every test, not just the ones targeted in Steps 3-9. - -**Important:** If some tests fail when running the full suite but pass individually, it may be a timing issue. Re-run each failing test individually before investigating: - -```bash -make test:e2e:run TEST=TestFailingTestName -``` +These tests require: -Fix any real failures before proceeding. The same cycle applies: read the failure, implement the minimum fix, re-run. +- the Entire CLI +- the real agent CLI on `PATH` +- `tmux` for interactive scenarios -All E2E tests must pass before writing unit tests. +If those dependencies are not available, note the gap explicitly and continue with the protocol and unit-test work. -### Step 11: Write Unit Tests +## Step 5: Fix Lifecycle Failures Incrementally -Now that all E2E tiers pass, write unit tests to lock in behavior. Use real data from E2E runs (captured JSON payloads, transcript snippets, config file contents) as golden fixtures. +Use lifecycle failures to refine: -**Test files to create:** +- the agent CLI adapter in `e2e/agents/.go` +- prompt execution details +- hook installation behavior after `entire enable` +- rewind and checkpoint interactions +- interactive session handling -1. **`cmd/hooks_test.go`** (or language equivalent) — Test `install-hooks` (creates config, idempotent), `uninstall-hooks` (removes hooks), `are-hooks-installed` (detects presence). Use a temp directory to avoid touching real config. +Keep protocol fixes in the agent binary itself. Keep real-CLI orchestration fixes in the lifecycle adapter. -2. **`cmd/lifecycle_test.go`** — Test `parse-hook` for all event types. Use actual JSON payloads from E2E runs or AGENT.md examples. Test every event type mapping, null returns for unknown hook names, empty input, and malformed JSON. +## Step 6: Add Unit Tests Last -3. **`cmd/session_test.go`** — Test session subcommands (`get-session-id`, `read-session`, `write-session`) with actual JSON payloads. +After the behavior is working end to end, add unit tests in the agent module for: -4. **`cmd/transcript_test.go`** — Test `read-transcript`, `chunk-transcript`, `reassemble-transcript` with sample data. Test transcript analyzer methods if implemented. Use transcript snippets from E2E runs as golden test data. +- hook parsing +- transcript parsing +- config file read-modify-write behavior +- session file handling +- protocol handlers -5. **`cmd/info_test.go`** — Test `info` returns valid JSON with correct fields and `detect` returns expected results. +Prefer using real payloads or fixtures captured during compliance and lifecycle runs. -**Where to find golden test data:** +## Step 7: Final Validation -- E2E artifact directories contain captured transcripts, hook payloads, and config files -- `AGENT.md` has example JSON payloads in the "Hook input" sections -- The agent's actual config file format from E2E test repos - -Run: format + lint + test - -**Commit:** Create a git commit for the unit tests. - -### Step 12: Final Validation - -Run the complete validation: +Run: ```bash -make build # Build -make test # Unit tests -make test:e2e # E2E tests -``` +cd +make test -Summarize: -- All E2E tiers passing (list which tests pass) -- Unit test coverage (number of test functions, what they cover) -- Any gaps or TODOs remaining -- Commands to build and install the binary - -## Standing Instructions - -- **Check AGENT.md first** for agent-specific information. If AGENT.md doesn't cover what you need, search external docs — but always update AGENT.md with anything new you discover. -- **Preserve unknown config keys** when modifying agent configuration files (read-modify-write pattern). -- **Validate JSON output** after each implementation — malformed JSON will cause the CLI to skip the agent. -- **Handle missing files gracefully** — return appropriate error messages to stderr rather than panicking. - -## E2E Debugging Protocol +cd /path/to/repo +make test-e2e AGENT= +``` -At every E2E failure, follow this protocol: +If the local `external-agents-tests` checkout is available, rerun the full compliance suite as the final black-box pass. -1. **Read the test output** — the assertion message often tells you exactly what's wrong -2. **Check the agent binary output** — run the failing subcommand manually with the same args/stdin -3. **Check Entire CLI logs** — look in the test repo's `.entire/logs/` directory -4. **Implement the minimum fix** — don't over-engineer; fix only what the test demands -5. **Re-run the failing test** — not the whole suite, just the one test +## Output Checklist -## Commit Strategy +Summarize: -After completing each tier: -1. Build and verify the binary -2. Run format and lint -3. Create a git commit describing which tier was completed +- compliance status +- lifecycle status +- unit-test status +- any dependencies you could not satisfy locally +- remaining gaps or TODOs diff --git a/.claude/skills/entire-external-agent/write-tests.md b/.claude/skills/entire-external-agent/write-tests.md index 8d985cb..8332db1 100644 --- a/.claude/skills/entire-external-agent/write-tests.md +++ b/.claude/skills/entire-external-agent/write-tests.md @@ -1,291 +1,132 @@ --- name: write-tests description: > - Phase 2: Scaffold the external agent binary and add E2E tests to the shared - harness. Tests define the spec for the implement phase. - Use /entire-external-agent write-tests or /entire-external-agent:write-tests - when you only need test scaffolding. + Phase 2: Scaffold the external agent binary and add the correct testing hooks: + protocol compliance through external-agents-tests and lifecycle integration + through this repo's e2e harness. --- # Write-Tests Procedure -Scaffold the external agent binary and add E2E tests to the shared repo-root `e2e/` harness. The harness auto-discovers all agents and exercises each one via protocol subcommands and full lifecycle integration (entire enable, agent invocation, checkpoint validation). Tests are expected to fail — they define the spec for the implement phase. +Scaffold the external agent binary and wire it into the current testing split. + +Do not add new generic protocol tests under this repo's `e2e/` directory. ## Prerequisites -Ensure the following are available: -- `AGENT_NAME`, `AGENT_SLUG`, `LANGUAGE`, `PROJECT_DIR` — from orchestrator or user -- `/AGENT.md` — research one-pager with protocol mapping and E2E test prerequisites +Ensure these are available: -## Step 1: Scaffold the Binary +- `AGENT_NAME` +- `AGENT_SLUG` +- `LANGUAGE` +- `PROJECT_DIR` +- `/AGENT.md` -Generate the project structure with compilable stubs. This is a condensed version of scaffolding — enough to get a binary that compiles and returns valid `info` JSON. +## Step 1: Scaffold the Binary -### Read source material at runtime +Create a compilable binary that already exposes the protocol subcommands with valid JSON shapes. -**Do not use static templates.** Read the following files at runtime to generate code that matches the current protocol version: +Read at runtime: -1. Read `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md` — subcommand specs, JSON schemas, capabilities -2. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/types.go` — JSON response struct definitions -3. Read `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/external.go` — how the CLI calls each subcommand -4. Read `/AGENT.md` — agent-specific decisions (capabilities, hook format, transcript location) +1. `https://github.com/entireio/cli/blob/main/docs/architecture/external-agent-protocol.md` +2. `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/types.go` +3. `https://github.com/entireio/cli/blob/main/cmd/entire/cli/agent/external/external.go` +4. `/AGENT.md` -### Generate project structure (Go) +For Go agents, prefer this shape: -``` +```text / - go.mod # Module: github.com//entire-agent- - main.go # Subcommand dispatch switch + go.mod + Makefile + README.md + AGENT.md cmd/ - info.go # Required: info subcommand - detect.go # Required: detect subcommand - session.go # Required: session subcommands - transcript.go # Required: transcript subcommands - resume.go # Required: format-resume-command - hooks.go # Capability: hooks (if declared) - analyzer.go # Capability: transcript_analyzer (if declared) - [other capabilities] + entire-agent-/ + main.go internal/ - types.go # Response types from external/types.go - protocol.go # Env var helpers, constants - AGENT.md # Research one-pager (already exists) - README.md # Usage, installation, development - Makefile # build, install, test, test:e2e -``` - -**Only create capability files for capabilities declared in AGENT.md.** - -Each subcommand handler should: -1. Parse arguments from `os.Args` or the language's arg parser -2. Read stdin if required -3. Return valid JSON matching the exact schema from `types.go` -4. Use placeholder values (realistic but clearly fake, e.g., `session_id: "stub-session-000"`) - -### Verify the scaffold - -1. **Compiles without errors:** `make build` -2. **`info` returns valid JSON:** `./entire-agent- info | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin), indent=2))"` -3. **Unknown subcommand exits non-zero:** `./entire-agent- bogus; echo "exit: $?"` - -### Commit the scaffold - -Create a git commit for the scaffolded project. - -## Step 2: Read the Shared E2E Harness - -This repo already has a shared E2E harness at the repo root `e2e/` directory. Read these files to understand the patterns you must follow: - -1. `e2e/setup_test.go` — `TestMain` entry point: auto-discovers agents in `agents/`, builds binaries, adds them to PATH -2. `e2e/testenv.go` — `TestEnv`: isolated filesystem environment with `AgentRunner`, `WriteFile`, `ReadFile`, `GitInit` helpers -3. `e2e/harness.go` — `AgentRunner`: executes agent subcommands via `Run`, `RunJSON`, `MustSucceed`, `MustFail` -4. `e2e/fixtures.go` — Test input builders: `HookInput`, `ParseHookInput`, `KiroTranscript` (with `AddPrompt`, `AddResponse`, `AddPromptWithFileEdit`) -5. `e2e/entire.go` — CLI wrappers: `EntireEnable`, `EntireDisable`, `EntireRewindList`, `EntireRewind`, `EntireRunErr` -6. `e2e/lifecycle.go` — `LifecycleEnv`: full lifecycle environment (git repo + `entire enable` + `WaitForCheckpoint` + `GetCheckpointTrailer`) -7. `e2e/kiro_test.go` — Example subcommand tests (identity, sessions, hooks, transcript analysis) -8. `e2e/kiro_lifecycle_test.go` — Example lifecycle tests (single/multi prompt, detect+enable, rewind, session persistence) - -**Key patterns to follow:** -- All E2E files use `//go:build e2e` build tag and `package e2e` -- `TestMain` auto-discovers agents by scanning `agents/entire-agent-*` directories for `cmd//main.go` -- `NewTestEnv(t, "entire-agent-")` creates an isolated env with the built agent binary -- `NewLifecycleEnv(t, "")` creates a full git repo with `entire enable` already run -- Subcommand tests use `t.Parallel()` and `AgentRunner.RunJSON` for structured assertions -- Lifecycle tests call `requireEntire(t)` and `requireKiroCLI(t)` (or equivalent) to skip/fail gracefully -- `WaitForCheckpoint` polls until the `entire/checkpoints/v1` branch appears -- Fixture builders (e.g. `KiroTranscript`) use the fluent pattern for easy test data construction - -## Step 3: Add Tests to the Shared E2E Harness - -Tests go in the existing `e2e/` directory at the repo root. The harness already provides all infrastructure — you only need to add test files and (optionally) agent-specific fixture builders. - -### How auto-discovery works - -`TestMain` in `e2e/setup_test.go` scans `agents/entire-agent-*` for directories with `cmd//main.go`, builds each binary, and stores them in `agentBinaries`. Your new agent is discovered automatically once the scaffold from Step 1 compiles. - -### Create `e2e/_test.go` — Subcommand Tests - -These exercise each protocol subcommand directly. Follow the pattern in `e2e/kiro_test.go`: - -```go -//go:build e2e - -package e2e - -import "testing" - -// --- Identity --- - -func Test_Info(t *testing.T) { - t.Parallel() - env := NewTestEnv(t, "entire-agent-") - // Use env.Runner.RunJSON to decode the info response - // Assert protocol_version, name, capabilities, etc. -} - -func Test_Detect_Present(t *testing.T) { - t.Parallel() - env := NewTestEnv(t, "entire-agent-") - // Create the agent's marker directory (e.g. ./) - // Assert detect returns present: true -} - -func Test_Detect_Absent(t *testing.T) { - t.Parallel() - env := NewTestEnv(t, "entire-agent-") - // Assert detect returns present: false (no marker directory) -} - -// --- Sessions --- -// Test get-session-id, get-session-dir, resolve-session-file, write+read-session - -// --- Hooks --- -// Test parse-hook for each hook type, install-hooks, uninstall-hooks, are-hooks-installed - -// --- Transcript --- -// Test read-transcript, chunk+reassemble-transcript round-trip - -// --- Transcript Analysis (if capability declared) --- -// Test get-transcript-position, extract-modified-files, extract-prompts, extract-summary + protocol/ + / ``` -**Key patterns:** -- Use `NewTestEnv(t, "entire-agent-")` for isolated environments -- Create convenience constructors like `NewKiroTestEnv` if multiple tests share setup -- Use `env.Runner.RunJSON` for structured output, `MustSucceed`/`MustFail` for exit code checks -- All subcommand tests use `t.Parallel()` for speed - -### Create `e2e/_lifecycle_test.go` — Lifecycle Tests - -These exercise the full integration. Follow the pattern in `e2e/kiro_lifecycle_test.go`: +The scaffold must: -```go -//go:build e2e +- build successfully +- return valid `info` JSON +- exit non-zero for an unknown subcommand -package e2e +## Step 2: Prepare Protocol Compliance -import "testing" +The generic protocol suite lives in `external-agents-tests`. -func TestLifecycle__SinglePromptManualCommit(t *testing.T) { - requireEntire(t) - // requireCLI(t) — add a similar helper for your agent's CLI - t.Parallel() +Your job in this repo is to make the agent easy to validate there: - env := NewLifecycleEnv(t, "") - - // Run a prompt that creates a file - // Assert the file exists - // git add + commit - // WaitForCheckpoint - // Verify checkpoint trailer -} -``` - -**Key patterns:** -- Call `requireEntire(t)` (and a `requireCLI(t)` helper) at the top — these skip the test gracefully when dependencies are missing -- Use `NewLifecycleEnv(t, "")` which handles git init, seed commit, `.entire/settings.json`, and `entire enable` -- Add a `RunPrompt` method on `LifecycleEnv` using the command from AGENT.md's E2E prerequisites - -### Add agent-specific fixture builders (if needed) - -If the agent has a custom transcript format, add a builder to `e2e/fixtures.go` following the `KiroTranscript` pattern: - -```go -type Transcript struct { /* ... */ } -func NewTranscript(id string) *Transcript { /* ... */ } -func (t *Transcript) AddPrompt(prompt string) *Transcript { /* ... */ } -func (t *Transcript) JSON(t *testing.T) string { /* ... */ } -``` +1. Keep the binary layout compatible with a simple build command: + `go build -o entire-agent- ./cmd/entire-agent-` +2. If stronger black-box assertions are useful, add an optional fixture file under the agent module, for example: + `/testdata/compliance.json` +3. Document any required fixture paths in `/README.md` and `/AGENT.md` -### Add agent-specific environment helpers (if needed) +If a sibling checkout exists at `../external-agents-tests`, you can validate the scaffold locally with: -If the agent needs custom setup (e.g., Kiro needs `.kiro/` and `.entire/tmp/`), add a convenience constructor to `e2e/testenv.go`: - -```go -func NewTestEnv(t *testing.T) *TestEnv { - t.Helper() - te := NewTestEnv(t, "entire-agent-") - te.MkdirAll(".") - te.MkdirAll(".entire/tmp") - return te -} +```bash +cd ../external-agents-tests +AGENT_BINARY=/abs/path/to/entire-agent- go test -v -count=1 ./... ``` -### Key conventions for test scenarios - -- **Build tag**: All E2E files must have `//go:build e2e` as the first line -- **Package**: All files in `e2e/` use `package e2e` -- **Naming**: Subcommand tests: `Test_`. Lifecycle tests: `TestLifecycle__` -- **Timeouts**: Lifecycle tests use `WaitForCheckpoint(t, env, 30*time.Second)` for checkpoint polling -- **Prompts**: Write prompts inline — include "Do not ask for confirmation" for agents that stall -- **Assertions**: Use harness helpers (`AssertFileExists`, `GetCheckpointTrailer`), not raw git commands -- **CLI operations**: Use `EntireEnable`, `EntireRewindList`, `EntireRewind` — never raw `exec.Command` -- **Parallelism**: Subcommand tests use `t.Parallel()`. Lifecycle tests use `t.Parallel()` per test (each gets its own temp repo) -- **Graceful skipping**: Lifecycle tests call `requireEntire(t)` to skip when the entire CLI isn't available - -## Step 4: Add Makefile Targets +At this stage the tests are expected to fail. The goal is just to confirm the harness reaches the binary. -### Agent-level Makefile (`/Makefile`) +## Step 3: Wire the Agent into Lifecycle Tests -Add `build`, `test`, and `clean` targets for the agent binary: +Lifecycle integration remains in this repo. -```makefile -BINARY := entire-agent- +Read these files before editing: -.PHONY: build test clean +1. `e2e/setup_test.go` +2. `e2e/build.go` +3. `e2e/lifecycle_test.go` +4. `e2e/agents/agent.go` +5. `e2e/agents/kiro.go` +6. `e2e/testutil/repo.go` +7. `e2e/entire/entire.go` -build: - go build -o $(BINARY) ./cmd/entire-agent- - -test: - go test ./... - -clean: - rm -f $(BINARY) -``` +Then: -### Repo-root Makefile +1. Add `e2e/agents/.go` implementing the `Agent` interface. +2. Register the agent in `init()` and set a concurrency gate. +3. Implement `RunPrompt`, `StartSession`, `PromptPattern`, timeout multiplier, and any external-agent marker behavior needed by `SetupRepo`. +4. Reuse the shared lifecycle scenarios in `e2e/lifecycle_test.go`. Add new lifecycle tests only if the new agent needs behavior that is not already covered. -The repo-root `Makefile` already handles E2E test execution. Verify it includes: +## Step 4: Verify the Scaffolding -```makefile -test-e2e: - cd e2e && go test -tags=e2e -v -count=1 ./... +Run these checks: -test-e2e-lifecycle: - cd e2e && E2E_REQUIRE_LIFECYCLE=1 go test -tags=e2e -v -count=1 -run TestLifecycle ./... - -test-unit: - @for dir in agents/entire-agent-*/; do \ - echo "Testing $$dir..."; \ - cd $$dir && go test ./... && cd ../..; \ - done +```bash +cd +make build +./entire-agent- info +go test ./... -test-all: test-unit test-e2e +cd /path/to/repo/e2e +go test -c -tags=e2e ``` -The `test-e2e` target builds all agents automatically via `TestMain` — no need to build/install first. - -## Step 5: Verify Tests Compile - -Run from the repo root: -```bash -cd e2e && go test -c -tags=e2e -``` +If `../external-agents-tests` exists, also run one failing compliance pass against the built binary. -This must succeed (compiles the test binary including the new agent's tests). Tests are expected to fail when executed — they define the spec for the implement phase. +## Step 5: Commit Gate -If the harness doesn't compile, fix issues before proceeding. +Create a commit once: -## Step 6: Commit +- the binary compiles +- `info` returns valid JSON +- lifecycle harness compiles +- the compliance suite can invoke the binary, even if assertions still fail -Create a git commit for the new E2E tests and the scaffolded binary. +## Output Checklist -## Output +Summarize: -Summarize what was created: -- Project structure (files created, capabilities declared) -- E2E tests added (number of subcommand tests and lifecycle tests, what they exercise) -- Confirmation that binary compiles and `info` returns valid JSON -- Confirmation that E2E harness compiles with new tests (`go test -c -tags=e2e`) -- Note that all E2E tests are expected to fail — the implement phase will make them pass -- Commands to run: `make test-e2e` (all tests) or `cd e2e && go test -tags=e2e -v -run Test_Info ./...` (single test) +- files created under `` +- lifecycle adapter files added or updated under `e2e/` +- optional compliance fixture paths +- commands run and their status diff --git a/.github/workflows/protocol-compliance.yml b/.github/workflows/protocol-compliance.yml new file mode 100644 index 0000000..85357a2 --- /dev/null +++ b/.github/workflows/protocol-compliance.yml @@ -0,0 +1,65 @@ +name: Protocol Compliance + +on: + pull_request: + push: + branches: + - main + +permissions: + contents: read + +jobs: + discover-agents: + runs-on: ubuntu-latest + outputs: + count: ${{ steps.discover.outputs.count }} + matrix: ${{ steps.discover.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + + - id: discover + shell: bash + run: | + count=0 + entries="" + + while IFS= read -r dir; do + name="$(basename "$dir")" + if [[ -n "$entries" ]]; then + entries="${entries}," + fi + entries="${entries}{\"name\":\"${name}\",\"dir\":\"${dir}\",\"binary\":\"${dir}/${name}\"}" + count=$((count + 1)) + done < <(find agents -mindepth 1 -maxdepth 1 -type d -name 'entire-agent-*' | sort) + + echo "count=${count}" >> "$GITHUB_OUTPUT" + echo "matrix={\"include\":[${entries}]}" >> "$GITHUB_OUTPUT" + + test-agents: + needs: discover-agents + if: ${{ needs.discover-agents.outputs.count != '0' }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.discover-agents.outputs.matrix) }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: ${{ matrix.dir }}/go.mod + cache-dependency-path: ${{ matrix.dir }}/go.sum + + - name: Run unit tests + working-directory: ${{ matrix.dir }} + run: go test ./... + + - name: Build agent binary + working-directory: ${{ matrix.dir }} + run: go build -o "${{ matrix.name }}" "./cmd/${{ matrix.name }}" + + - name: Run protocol compliance suite + uses: entireio/external-agents-tests@main + with: + binary-path: ${{ matrix.binary }} diff --git a/AGENTS.md b/AGENTS.md index a694aad..bbdf8d8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,6 +1,10 @@ # External Agent Builder -This repository includes a skill that guides you through building standalone external agent binaries for the [Entire CLI](https://github.com/entireio/cli). The skill uses E2E-first TDD and runs in three phases. +This repository includes a skill that guides you through building standalone external agent binaries for the [Entire CLI](https://github.com/entireio/cli). The current testing split is: + +- Protocol compliance in `external-agents-tests` +- Lifecycle integration in this repo's `e2e/` harness +- Agent-specific unit tests in each `agents/entire-agent-*` module ## Available Commands @@ -8,8 +12,8 @@ This repository includes a skill that guides you through building standalone ext |---------|-----------|-------------| | Full pipeline | `.claude/skills/entire-external-agent/SKILL.md` | Run all three phases sequentially | | Research | `.claude/skills/entire-external-agent/research.md` | Analyze the target agent's capabilities and map to the protocol | -| Write tests | `.claude/skills/entire-external-agent/write-tests.md` | Scaffold the binary and create E2E test harness | -| Implement | `.claude/skills/entire-external-agent/implement.md` | Build the binary using E2E-first TDD (unit tests last) | +| Write tests | `.claude/skills/entire-external-agent/write-tests.md` | Scaffold the binary and wire protocol compliance plus lifecycle coverage | +| Implement | `.claude/skills/entire-external-agent/implement.md` | Build the binary using protocol compliance first, lifecycle second, unit tests last | ## How to Use @@ -18,6 +22,7 @@ When the user asks to "build an external agent", "create an agent binary", or "e 1. Read `.claude/skills/entire-external-agent/SKILL.md` for the full pipeline overview 2. Follow the three phases in order: research, write-tests, implement 3. Each phase has a dedicated skill file with detailed instructions +4. Keep reusable protocol checks out of this repo's `e2e/` directory. Add them to `external-agents-tests` instead. ## Tool Mapping (Codex) diff --git a/Makefile b/Makefile index 6e97767..17043c3 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,13 @@ -.PHONY: test-e2e test-unit test-all test-e2e-lifecycle test-e2e-binary +.PHONY: test-e2e test-unit test-all test-e2e-lifecycle test-e2e: + @$(MAKE) test-e2e-lifecycle AGENT="$(AGENT)" + +test-e2e-lifecycle: ifdef AGENT - cd e2e && E2E_AGENT=$(AGENT) go test -tags=e2e -v -count=1 ./... + cd e2e && E2E_AGENT=$(AGENT) go test -tags=e2e -v -count=1 -run TestLifecycle ./... else - cd e2e && go test -tags=e2e -v -count=1 ./... + cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle ./... endif test-unit: @@ -13,19 +16,4 @@ test-unit: cd $$dir && go test ./... && cd ../..; \ done -test-e2e-lifecycle: -ifdef AGENT - cd e2e && E2E_AGENT=$(AGENT) E2E_REQUIRE_LIFECYCLE=1 go test -tags=e2e -v -count=1 -run TestLifecycle ./... -else - cd e2e && E2E_REQUIRE_LIFECYCLE=1 go test -tags=e2e -v -count=1 -run TestLifecycle ./... -endif - -test-e2e-binary: -ifdef AGENT - cd e2e && go test -tags=e2e -v -count=1 ./$(AGENT)/ -else - @echo "Usage: make test-e2e-binary AGENT=kiro" - @exit 1 -endif - -test-all: test-unit test-e2e +test-all: test-unit test-e2e-lifecycle diff --git a/README.md b/README.md index 69365d6..8c08ab0 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,11 @@ See each agent's own README for setup and usage instructions. ## Building a New External Agent -This repo includes a skill that guides you through building a new external agent using an E2E-first TDD pipeline. The skill runs in three phases: +This repo includes a skill that guides you through building a new external agent with two test layers: -1. **Research** — analyzes the target AI agent's file formats, session layout, and hook mechanisms -2. **Write tests** — generates E2E and unit tests against the external agent protocol -3. **Implement** — builds the Go binary to pass all tests +1. **Protocol compliance** — generic subcommand coverage from [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests) +2. **Lifecycle integration** — repo-local `e2e/` tests that exercise `entire enable`, prompt execution, checkpoints, and rewind +3. **Implementation** — build the binary until both layers pass, then add unit tests ### Getting Started — Zero Setup @@ -41,55 +41,61 @@ Clone the repo and open it in your AI coding tool. Each tool auto-discovers the The skill files live in `.claude/skills/entire-external-agent/` if you want to read the details. -## E2E Tests +## Testing -The `e2e/` directory contains a shared test harness that exercises all external agents. Tests are split into two tiers: +Testing is intentionally split: -- **Subcommand tests** (`kiro_test.go`) — exercise each protocol subcommand directly against the agent binary (identity, sessions, transcript, hooks, transcript analysis). These run without any external dependencies beyond the agent binary itself. -- **Lifecycle tests** (`kiro_lifecycle_test.go`) — exercise the full integration flow: `entire enable`, agent prompt execution, git commit, checkpoint creation, and rewind. These require the `entire` CLI and the agent's own CLI (e.g. `kiro-cli-chat`) to be available. +- **Generic protocol checks** run in GitHub Actions via [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests). The workflow builds each `entire-agent-*` binary in this repo and runs the shared compliance suite against it. +- **Lifecycle tests** stay in this repo's [`e2e/`](e2e/) harness. These verify the parts that depend on Entire itself and on the real agent CLI: prompt execution, hook installation after `entire enable`, checkpoint creation, rewind behavior, and interactive sessions. +- **Unit tests** live with each agent implementation under [`agents/`](agents/). ### Running Tests ```bash -# Run all E2E tests (subcommand-level only; lifecycle tests skip if deps missing) +# Run unit tests for all agents +make test-unit + +# Run lifecycle integration tests from this repo make test-e2e -# Run lifecycle tests (fails instead of skipping if entire/kiro-cli-chat are missing) +# Same as test-e2e, kept as the explicit name make test-e2e-lifecycle -# Run unit tests for all agents -make test-unit - -# Run everything +# Run unit + lifecycle tests locally make test-all ``` -### Test Harness Architecture +Protocol compliance runs in CI through [`.github/workflows/protocol-compliance.yml`](.github/workflows/protocol-compliance.yml). + +### Lifecycle Harness Architecture -The shared harness auto-discovers and builds all agents in `agents/` via `TestMain`: +The lifecycle harness auto-discovers and builds all agents in `agents/` via `TestMain`: | File | Purpose | |------|---------| | `e2e/setup_test.go` | `TestMain` entry point — discovers agents, builds binaries, configures PATH | -| `e2e/testenv.go` | `TestEnv` — isolated filesystem environment with agent binary runner | -| `e2e/harness.go` | `AgentRunner` — executes agent subcommands, captures stdout/stderr/exit code | -| `e2e/fixtures.go` | Test input builders: `HookInput`, `ParseHookInput`, `KiroTranscript` | -| `e2e/entire.go` | CLI wrappers: `EntireEnable`, `EntireDisable`, `EntireRewindList`, `EntireRewind` | -| `e2e/lifecycle.go` | `LifecycleEnv` — full lifecycle environment (git repo + `entire enable` + checkpoint helpers) | +| `e2e/lifecycle_test.go` | Shared lifecycle scenarios run against every registered agent | +| `e2e/agents/` | Agent adapters for the real CLIs used during lifecycle tests | +| `e2e/entire/` | Entire CLI wrappers used by lifecycle assertions | +| `e2e/testutil/` | Repo setup, artifact capture, git helpers, and checkpoint assertions | ### Environment Variables | Variable | Description | |----------|-------------| | `E2E_ENTIRE_BIN` | Path to the `entire` binary (defaults to `entire` from PATH) | -| `E2E_REQUIRE_LIFECYCLE` | Set to `1` to fail (instead of skip) when lifecycle dependencies are missing | +| `E2E_AGENT` | Filter lifecycle runs to a single registered agent | +| `E2E_ARTIFACT_DIR` | Override lifecycle artifact output directory | +| `E2E_KEEP_REPOS` | Preserve temp repos for debugging | +| `E2E_CONCURRENT_TEST_LIMIT` | Override the per-agent lifecycle concurrency limit | ## Repository Layout ``` agents/ # Standalone external agent projects entire-agent-kiro/ # Kiro agent (Go binary) -e2e/ # Shared E2E test harness for all agents +e2e/ # Lifecycle integration harness +.github/workflows/ # CI, including protocol compliance via external-agents-tests .claude/skills/entire-external-agent/ # Skill files (research, test-writer, implementer) AGENTS.md # Codex auto-discovery .cursor/rules/ # Cursor auto-discovery diff --git a/agents/entire-agent-kiro/README.md b/agents/entire-agent-kiro/README.md index a18522b..0ae54e8 100644 --- a/agents/entire-agent-kiro/README.md +++ b/agents/entire-agent-kiro/README.md @@ -103,24 +103,15 @@ make clean # Remove built binary go run ./cmd/entire-agent-kiro info ``` -## E2E Tests +## Testing -E2E tests live in the shared `e2e/` directory at the repo root (not inside this agent's directory). The harness auto-discovers and builds all agents, then runs tests against each. +Kiro is validated in three places: -### Subcommand tests (`e2e/kiro_test.go`) +- **Unit tests** live in this module and cover the Kiro-specific implementation details. +- **Protocol compliance** runs in GitHub Actions through [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests) against the built `entire-agent-kiro` binary. +- **Lifecycle tests** live in the shared repo-root [`e2e/`](../../e2e/) harness and require `entire` plus `kiro-cli-chat`. -Exercise each protocol subcommand directly — no external dependencies needed: - -- **Identity**: `info`, `detect` (present/absent) -- **Sessions**: `get-session-id`, `get-session-dir`, `resolve-session-file`, `write-session`/`read-session` round-trip -- **Transcript**: `read-transcript`, `chunk-transcript`/`reassemble-transcript` round-trip -- **Hooks**: `parse-hook` (spawn, prompt-submit, pre-tool-use, stop), `install-hooks`/`uninstall-hooks`/`are-hooks-installed`, idempotent install -- **Transcript analysis**: `get-transcript-position`, `extract-modified-files`, `extract-prompts`, `extract-summary` -- **Other**: `format-resume-command`, unknown subcommand handling - -### Lifecycle tests (`e2e/kiro_lifecycle_test.go`) - -Full integration tests requiring `entire` CLI and `kiro-cli-chat`: +The lifecycle suite covers: - **SinglePromptManualCommit** — agent creates file → commit → checkpoint with trailer - **MultiplePromptsManualCommit** — two prompts → single commit → checkpoint covers both @@ -133,12 +124,15 @@ Full integration tests requiring `entire` CLI and `kiro-cli-chat`: ### Running ```bash +# From this module: +make test # Unit tests + # From the repo root: -make test-e2e # All E2E tests (lifecycle tests skip if deps missing) -make test-e2e-lifecycle # Lifecycle tests only (fails if deps missing) +make test-e2e # Lifecycle tests +make test-e2e-lifecycle # Explicit lifecycle target # Run a specific test: -cd e2e && go test -tags=e2e -v -count=1 -run TestKiro_Info ./... +cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle_SinglePromptManualCommit ./... ``` ## Troubleshooting diff --git a/agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl b/agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl new file mode 100644 index 0000000..c22140f --- /dev/null +++ b/agents/entire-agent-kiro/internal/kiro/.entire/tmp/kiro-tool-calls.jsonl @@ -0,0 +1,6 @@ +{"id":"","name":"read","args":null} +{"id":"","name":"write","args":{"file_path":"/tmp/main.go","content":"package main"}} +{"id":"","name":"read","args":null} +{"id":"","name":"write","args":{"file_path":"/tmp/main.go","content":"package main"}} +{"id":"","name":"read","args":null} +{"id":"","name":"write","args":{"file_path":"/tmp/main.go","content":"package main"}} diff --git a/e2e/README.md b/e2e/README.md index 0651f8b..a670098 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -1,6 +1,8 @@ -# E2E Tests +# Lifecycle Tests -End-to-end tests for external agents, exercising the full lifecycle: agent prompts, git hooks, checkpoints, and rewind. +End-to-end lifecycle tests for external agents. This harness covers the behaviors that only make sense against the real Entire CLI and the real agent CLI: `entire enable`, prompt execution, hook installation, checkpoint creation, rewind, and interactive sessions. + +Generic protocol compliance is no longer in this directory. Those checks run from [`entireio/external-agents-tests`](https://github.com/entireio/external-agents-tests) and are wired into this repo through GitHub Actions. ## Structure @@ -19,23 +21,20 @@ e2e/ │ └── assertions.go # Test assertions (testify-based) ├── bootstrap/ # Pre-test agent bootstrap (CI auth setup) │ └── main.go # go run ./e2e/bootstrap +├── build.go # Agent discovery + binary builds for lifecycle runs ├── setup_test.go # TestMain: build agents, artifact dir, preflight -├── kiro_lifecycle_test.go # Lifecycle tests (ForEachAgent pattern) -├── kiro_test.go # Protocol-level tests (stdin/stdout subcommands) -├── harness.go # AgentRunner for protocol tests -├── testenv.go # TestEnv for protocol tests -└── fixtures.go # HookInput, KiroTranscript builders +└── lifecycle_test.go # Shared lifecycle scenarios (ForEachAgent pattern) ``` ## Running Tests -### All E2E tests (protocol + lifecycle) +### All lifecycle tests ```bash make test-e2e ``` -### Lifecycle tests only +### Explicit lifecycle target ```bash make test-e2e-lifecycle @@ -55,7 +54,6 @@ cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle_SinglePromptManualCom | `E2E_ENTIRE_BIN` | Path to `entire` binary. Falls back to `$PATH` lookup. | | `E2E_ARTIFACT_DIR` | Override artifact output directory. | | `E2E_KEEP_REPOS` | Set to any value to preserve temp repos after tests. | -| `E2E_REQUIRE_LIFECYCLE` | Set to `1` to fail (not skip) when lifecycle deps are missing. | | `E2E_CONCURRENT_TEST_LIMIT` | Override per-agent concurrency limit (default: 2 for kiro). | ## Adding a New Agent @@ -63,7 +61,8 @@ cd e2e && go test -tags=e2e -v -count=1 -run TestLifecycle_SinglePromptManualCom 1. Create `e2e/agents/.go` implementing the `Agent` interface. 2. In `init()`, conditionally register based on `E2E_AGENT` env var. 3. Call `RegisterGate("", N)` to set concurrency limit. -4. If it's an external agent, implement `ExternalAgent` interface. +4. If it's an external agent, implement `ExternalAgent` so `SetupRepo` can pre-enable external agents in Entire settings. +5. Keep generic protocol validation out of this directory. Add any reusable black-box protocol coverage to `external-agents-tests` instead. ## Debugging Failures diff --git a/e2e/fixtures.go b/e2e/fixtures.go deleted file mode 100644 index 22c6f40..0000000 --- a/e2e/fixtures.go +++ /dev/null @@ -1,50 +0,0 @@ -//go:build e2e - -package e2e - -import ( - "encoding/json" - "testing" -) - -// HookInput builds stdin payloads for hook-related subcommands. -type HookInput struct { - HookType string `json:"hook_type,omitempty"` - SessionID string `json:"session_id,omitempty"` - SessionRef string `json:"session_ref,omitempty"` - Timestamp string `json:"timestamp,omitempty"` - UserPrompt string `json:"user_prompt,omitempty"` - ToolName string `json:"tool_name,omitempty"` - ToolUseID string `json:"tool_use_id,omitempty"` - ToolInput json.RawMessage `json:"tool_input,omitempty"` - RawData map[string]interface{} `json:"raw_data,omitempty"` -} - -// JSON returns the JSON-encoded string for use as stdin. -func (h HookInput) JSON(t *testing.T) string { - t.Helper() - data, err := json.Marshal(h) - if err != nil { - t.Fatalf("marshal HookInput: %v", err) - } - return string(data) -} - -// ParseHookInput builds stdin payloads for the parse-hook subcommand. -type ParseHookInput struct { - HookEventName string `json:"hook_event_name,omitempty"` - CWD string `json:"cwd,omitempty"` - Prompt string `json:"prompt,omitempty"` - ToolName string `json:"tool_name,omitempty"` - ToolInput json.RawMessage `json:"tool_input,omitempty"` -} - -// JSON returns the JSON-encoded string. -func (p ParseHookInput) JSON(t *testing.T) string { - t.Helper() - data, err := json.Marshal(p) - if err != nil { - t.Fatalf("marshal ParseHookInput: %v", err) - } - return string(data) -} diff --git a/e2e/harness.go b/e2e/harness.go deleted file mode 100644 index c776bb7..0000000 --- a/e2e/harness.go +++ /dev/null @@ -1,88 +0,0 @@ -//go:build e2e - -package e2e - -import ( - "bytes" - "encoding/json" - "errors" - "os/exec" - "testing" -) - -// CommandResult holds the output of a binary invocation. -type CommandResult struct { - Stdout []byte - Stderr []byte - ExitCode int - Err error -} - -// AgentRunner invokes an agent binary with subcommands. -type AgentRunner struct { - BinaryPath string - Env []string -} - -// Run executes the agent binary with the given subcommand, args, and optional stdin. -func (r *AgentRunner) Run(stdin string, subcommand string, args ...string) CommandResult { - cmdArgs := append([]string{subcommand}, args...) - cmd := exec.Command(r.BinaryPath, cmdArgs...) - cmd.Stdin = bytes.NewBufferString(stdin) - cmd.Env = r.Env - - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - exitCode := 0 - if err != nil { - var exitErr *exec.ExitError - if errors.As(err, &exitErr) { - exitCode = exitErr.ExitCode() - } else { - exitCode = -1 - } - } - - return CommandResult{ - Stdout: stdout.Bytes(), - Stderr: stderr.Bytes(), - ExitCode: exitCode, - Err: err, - } -} - -// RunJSON executes the subcommand and JSON-decodes stdout into dest. -func (r *AgentRunner) RunJSON(t *testing.T, dest any, stdin string, subcommand string, args ...string) CommandResult { - t.Helper() - result := r.Run(stdin, subcommand, args...) - if result.ExitCode != 0 { - t.Fatalf("%s %s failed (exit %d): %s", r.BinaryPath, subcommand, result.ExitCode, result.Stderr) - } - if err := json.Unmarshal(result.Stdout, dest); err != nil { - t.Fatalf("failed to decode JSON from %s %s: %v\nstdout: %s", r.BinaryPath, subcommand, err, result.Stdout) - } - return result -} - -// MustSucceed asserts the subcommand exits with code 0. -func (r *AgentRunner) MustSucceed(t *testing.T, stdin string, subcommand string, args ...string) CommandResult { - t.Helper() - result := r.Run(stdin, subcommand, args...) - if result.ExitCode != 0 { - t.Fatalf("%s %s: expected exit 0, got %d\nstderr: %s", r.BinaryPath, subcommand, result.ExitCode, result.Stderr) - } - return result -} - -// MustFail asserts the subcommand exits with a non-zero code. -func (r *AgentRunner) MustFail(t *testing.T, stdin string, subcommand string, args ...string) CommandResult { - t.Helper() - result := r.Run(stdin, subcommand, args...) - if result.ExitCode == 0 { - t.Fatalf("%s %s: expected non-zero exit, got 0\nstdout: %s", r.BinaryPath, subcommand, result.Stdout) - } - return result -} diff --git a/e2e/kiro/fixtures_test.go b/e2e/kiro/fixtures_test.go deleted file mode 100644 index 9b3aa55..0000000 --- a/e2e/kiro/fixtures_test.go +++ /dev/null @@ -1,105 +0,0 @@ -//go:build e2e - -package kiro - -import ( - "encoding/json" - "testing" - - e2e "github.com/entireio/external-agents/e2e" -) - -// KiroTranscript builds Kiro-format transcript files for testing. -type KiroTranscript struct { - ConversationID string `json:"conversation_id"` - History []kiroHistoryEntry `json:"history"` -} - -type kiroHistoryEntry struct { - User kiroUserMessage `json:"user"` - Assistant json.RawMessage `json:"assistant"` -} - -type kiroUserMessage struct { - Content json.RawMessage `json:"content"` - Timestamp string `json:"timestamp,omitempty"` -} - -// NewKiroTranscript creates a new transcript builder. -func NewKiroTranscript(id string) *KiroTranscript { - return &KiroTranscript{ConversationID: id} -} - -func marshalPromptContent(prompt string) json.RawMessage { - content, _ := json.Marshal(map[string]interface{}{ - "Prompt": map[string]string{"prompt": prompt}, - }) - return content -} - -// AddPrompt adds a user prompt entry with no assistant response. -func (kt *KiroTranscript) AddPrompt(prompt string) *KiroTranscript { - kt.History = append(kt.History, kiroHistoryEntry{ - User: kiroUserMessage{Content: marshalPromptContent(prompt)}, - }) - return kt -} - -// AddPromptWithFileEdit adds a user prompt paired with an assistant response that contains a file edit tool use. -func (kt *KiroTranscript) AddPromptWithFileEdit(prompt, filePath string) *KiroTranscript { - toolUse := map[string]interface{}{ - "ToolUse": map[string]interface{}{ - "message_id": "msg-001", - "tool_uses": []map[string]interface{}{ - { - "id": "tool-001", - "name": "fs_write", - "args": map[string]string{"path": filePath}, - }, - }, - }, - } - assistantContent, _ := json.Marshal(toolUse) - - kt.History = append(kt.History, kiroHistoryEntry{ - User: kiroUserMessage{Content: marshalPromptContent(prompt)}, - Assistant: assistantContent, - }) - return kt -} - -// AddResponse adds a user prompt paired with an assistant text response. -func (kt *KiroTranscript) AddResponse(prompt, response string) *KiroTranscript { - userContent := marshalPromptContent(prompt) - - responseContent := map[string]interface{}{ - "Response": map[string]interface{}{ - "message_id": "msg-resp", - "content": response, - }, - } - assistantContent, _ := json.Marshal(responseContent) - - kt.History = append(kt.History, kiroHistoryEntry{ - User: kiroUserMessage{Content: userContent}, - Assistant: assistantContent, - }) - return kt -} - -// JSON returns the JSON-encoded transcript string. -func (kt *KiroTranscript) JSON(t *testing.T) string { - t.Helper() - data, err := json.Marshal(kt) - if err != nil { - t.Fatalf("marshal KiroTranscript: %v", err) - } - return string(data) -} - -// WriteToFile writes the transcript to a file and returns the absolute path. -func (kt *KiroTranscript) WriteToFile(t *testing.T, env *e2e.TestEnv, relPath string) string { - t.Helper() - env.WriteFile(relPath, kt.JSON(t)) - return env.AbsPath(relPath) -} diff --git a/e2e/kiro/kiro_test.go b/e2e/kiro/kiro_test.go deleted file mode 100644 index d4d3011..0000000 --- a/e2e/kiro/kiro_test.go +++ /dev/null @@ -1,613 +0,0 @@ -//go:build e2e - -package kiro - -import ( - "encoding/json" - "strings" - "testing" - - e2e "github.com/entireio/external-agents/e2e" -) - -// --- Identity --- - -func TestKiro_Info(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - var resp struct { - ProtocolVersion int `json:"protocol_version"` - Name string `json:"name"` - Type string `json:"type"` - Description string `json:"description"` - IsPreview bool `json:"is_preview"` - ProtectedDirs []string `json:"protected_dirs"` - HookNames []string `json:"hook_names"` - Capabilities struct { - Hooks bool `json:"hooks"` - TranscriptAnalyzer bool `json:"transcript_analyzer"` - } `json:"capabilities"` - } - env.Runner.RunJSON(t, &resp, "", "info") - - if resp.ProtocolVersion != 1 { - t.Errorf("protocol_version = %d, want 1", resp.ProtocolVersion) - } - if resp.Name != "kiro" { - t.Errorf("name = %q, want %q", resp.Name, "kiro") - } - if resp.Type != "Kiro" { - t.Errorf("type = %q, want %q", resp.Type, "Kiro") - } - if !resp.Capabilities.Hooks { - t.Error("capabilities.hooks should be true") - } - if !resp.Capabilities.TranscriptAnalyzer { - t.Error("capabilities.transcript_analyzer should be true") - } - if len(resp.HookNames) != 5 { - t.Errorf("hook_names count = %d, want 5", len(resp.HookNames)) - } - if len(resp.ProtectedDirs) != 1 || resp.ProtectedDirs[0] != ".kiro" { - t.Errorf("protected_dirs = %v, want [.kiro]", resp.ProtectedDirs) - } -} - -func TestKiro_Detect_Present(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) // has .kiro/ - - var resp struct { - Present bool `json:"present"` - } - env.Runner.RunJSON(t, &resp, "", "detect") - - if !resp.Present { - t.Error("detect should return present=true when .kiro/ exists") - } -} - -func TestKiro_Detect_Absent(t *testing.T) { - t.Parallel() - env := e2e.NewTestEnvWithBinary(t, kiroBinary) // no .kiro/ - - var resp struct { - Present bool `json:"present"` - } - env.Runner.RunJSON(t, &resp, "", "detect") - - if resp.Present { - t.Error("detect should return present=false when .kiro/ is absent") - } -} - -// --- Session Management --- - -func TestKiro_GetSessionID(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - input := e2e.HookInput{SessionID: "test-session-123"} - - var resp struct { - SessionID string `json:"session_id"` - } - env.Runner.RunJSON(t, &resp, input.JSON(t), "get-session-id") - - if resp.SessionID != "test-session-123" { - t.Errorf("session_id = %q, want %q", resp.SessionID, "test-session-123") - } -} - -func TestKiro_GetSessionID_Generated(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - input := e2e.HookInput{} - - var resp struct { - SessionID string `json:"session_id"` - } - env.Runner.RunJSON(t, &resp, input.JSON(t), "get-session-id") - - if resp.SessionID == "" { - t.Error("session_id should not be empty when no ID provided") - } -} - -func TestKiro_GetSessionDir(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - var resp struct { - SessionDir string `json:"session_dir"` - } - env.Runner.RunJSON(t, &resp, "", "get-session-dir", "-repo-path", env.Dir) - - want := env.AbsPath(".entire/tmp") - if resp.SessionDir != want { - t.Errorf("session_dir = %q, want %q", resp.SessionDir, want) - } -} - -func TestKiro_ResolveSessionFile(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - sessionDir := env.AbsPath(".entire/tmp") - - var resp struct { - SessionFile string `json:"session_file"` - } - env.Runner.RunJSON(t, &resp, "", "resolve-session-file", - "-session-dir", sessionDir, - "-session-id", "abc-123") - - want := sessionDir + "/abc-123.json" - if resp.SessionFile != want { - t.Errorf("session_file = %q, want %q", resp.SessionFile, want) - } -} - -func TestKiro_WriteAndReadSession(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - sessionRef := env.AbsPath(".entire/tmp/sess-write-test.json") - - // Write a session - writeInput := map[string]interface{}{ - "session_id": "sess-write-test", - "agent_name": "kiro", - "repo_path": env.Dir, - "session_ref": sessionRef, - "start_time": "2026-01-01T00:00:00Z", - "native_data": []byte(`{"hello":"world"}`), - } - writeJSON, _ := json.Marshal(writeInput) - env.Runner.MustSucceed(t, string(writeJSON), "write-session") - - // Verify the file was written - if !env.FileExists(".entire/tmp/sess-write-test.json") { - t.Fatal("session file was not written") - } - - // Read it back - readInput := e2e.HookInput{ - SessionID: "sess-write-test", - SessionRef: sessionRef, - } - var resp struct { - SessionID string `json:"session_id"` - AgentName string `json:"agent_name"` - NativeData []byte `json:"native_data"` - } - env.Runner.RunJSON(t, &resp, readInput.JSON(t), "read-session") - - if resp.SessionID != "sess-write-test" { - t.Errorf("session_id = %q, want %q", resp.SessionID, "sess-write-test") - } - if resp.AgentName != "kiro" { - t.Errorf("agent_name = %q, want %q", resp.AgentName, "kiro") - } -} - -// --- Transcript --- - -func TestKiro_ReadTranscript(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - transcript := NewKiroTranscript("conv-1").AddPrompt("hello").AddResponse("summarize", "done") - transcriptPath := transcript.WriteToFile(t, env, "transcript.json") - - result := env.Runner.MustSucceed(t, "", "read-transcript", "-session-ref", transcriptPath) - if len(result.Stdout) == 0 { - t.Error("read-transcript returned empty stdout") - } - - // Should be valid JSON - var parsed map[string]interface{} - if err := json.Unmarshal(result.Stdout, &parsed); err != nil { - t.Fatalf("read-transcript output is not valid JSON: %v", err) - } -} - -func TestKiro_ChunkTranscript(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - content := strings.Repeat("abcdefghij", 10) // 100 bytes - - var resp struct { - Chunks [][]byte `json:"chunks"` - } - env.Runner.RunJSON(t, &resp, content, "chunk-transcript", "-max-size", "30") - - if len(resp.Chunks) < 3 { - t.Errorf("expected at least 3 chunks for 100 bytes with max-size 30, got %d", len(resp.Chunks)) - } -} - -func TestKiro_ReassembleTranscript(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - // First chunk the content - content := "hello world this is a test transcript" - var chunkResp struct { - Chunks [][]byte `json:"chunks"` - } - env.Runner.RunJSON(t, &chunkResp, content, "chunk-transcript", "-max-size", "10") - - // Now reassemble - reassembleInput, _ := json.Marshal(chunkResp) - result := env.Runner.MustSucceed(t, string(reassembleInput), "reassemble-transcript") - - if string(result.Stdout) != content { - t.Errorf("reassembled = %q, want %q", result.Stdout, content) - } -} - -// --- Hooks --- - -func TestKiro_ParseHook_Spawn(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - var resp struct { - Type int `json:"type"` - SessionID string `json:"session_id"` - Timestamp string `json:"timestamp"` - } - env.Runner.RunJSON(t, &resp, "{}", "parse-hook", "-hook", "agent-spawn") - - if resp.Type != 1 { - t.Errorf("type = %d, want 1", resp.Type) - } - if resp.SessionID == "" { - t.Error("session_id should not be empty") - } - if resp.Timestamp == "" { - t.Error("timestamp should not be empty") - } -} - -func TestKiro_ParseHook_PromptSubmit(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - input := e2e.ParseHookInput{Prompt: "do the thing"} - - var resp struct { - Type int `json:"type"` - SessionID string `json:"session_id"` - Prompt string `json:"prompt"` - } - env.Runner.RunJSON(t, &resp, input.JSON(t), "parse-hook", "-hook", "user-prompt-submit") - - if resp.Type != 2 { - t.Errorf("type = %d, want 2", resp.Type) - } - if resp.Prompt != "do the thing" { - t.Errorf("prompt = %q, want %q", resp.Prompt, "do the thing") - } -} - -func TestKiro_ParseHook_PreToolUse(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - result := env.Runner.MustSucceed(t, "{}", "parse-hook", "-hook", "pre-tool-use") - - if got := strings.TrimSpace(string(result.Stdout)); got != "null" { - t.Errorf("pre-tool-use should return null, got %q", got) - } -} - -func TestKiro_ParseHook_Stop(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - input := e2e.ParseHookInput{CWD: env.Dir} - - var resp struct { - Type int `json:"type"` - SessionID string `json:"session_id"` - } - env.Runner.RunJSON(t, &resp, input.JSON(t), "parse-hook", "-hook", "stop") - - if resp.Type != 3 { - t.Errorf("type = %d, want 3", resp.Type) - } - if resp.SessionID == "" { - t.Error("session_id should not be empty") - } -} - -func TestKiro_InstallHooks(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - var resp struct { - HooksInstalled int `json:"hooks_installed"` - } - env.Runner.RunJSON(t, &resp, "", "install-hooks") - - if resp.HooksInstalled == 0 { - t.Error("hooks_installed should be > 0") - } - - // Verify files were created - if !env.FileExists(".kiro/agents/entire.json") { - t.Error(".kiro/agents/entire.json should exist after install") - } - if !env.FileExists(".kiro/hooks/entire-stop.kiro.hook") { - t.Error(".kiro/hooks/entire-stop.kiro.hook should exist after install") - } - if !env.FileExists(".kiro/hooks/entire-prompt-submit.kiro.hook") { - t.Error(".kiro/hooks/entire-prompt-submit.kiro.hook should exist after install") - } -} - -func TestKiro_UninstallHooks(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - // Install first - env.Runner.MustSucceed(t, "", "install-hooks") - - // Verify installed - if !env.FileExists(".kiro/agents/entire.json") { - t.Fatal("hooks should be installed before uninstall test") - } - - // Uninstall - env.Runner.MustSucceed(t, "", "uninstall-hooks") - - if env.FileExists(".kiro/agents/entire.json") { - t.Error(".kiro/agents/entire.json should be removed after uninstall") - } - if env.FileExists(".kiro/hooks/entire-stop.kiro.hook") { - t.Error(".kiro/hooks/entire-stop.kiro.hook should be removed after uninstall") - } -} - -func TestKiro_AreHooksInstalled_No(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - var resp struct { - Installed bool `json:"installed"` - } - env.Runner.RunJSON(t, &resp, "", "are-hooks-installed") - - if resp.Installed { - t.Error("hooks should not be installed in fresh env") - } -} - -func TestKiro_AreHooksInstalled_Yes(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - env.Runner.MustSucceed(t, "", "install-hooks") - - var resp struct { - Installed bool `json:"installed"` - } - env.Runner.RunJSON(t, &resp, "", "are-hooks-installed") - - if !resp.Installed { - t.Error("hooks should be installed after install-hooks") - } -} - -// --- Transcript Analysis --- - -func TestKiro_GetTranscriptPosition(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - transcript := NewKiroTranscript("conv-pos"). - AddPrompt("first"). - AddPrompt("second"). - AddResponse("third", "response") - path := transcript.WriteToFile(t, env, "pos-transcript.json") - - var resp struct { - Position int `json:"position"` - } - env.Runner.RunJSON(t, &resp, "", "get-transcript-position", "-path", path) - - if resp.Position != 3 { - t.Errorf("position = %d, want 3", resp.Position) - } -} - -func TestKiro_GetTranscriptPosition_Missing(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - var resp struct { - Position int `json:"position"` - } - env.Runner.RunJSON(t, &resp, "", "get-transcript-position", "-path", env.AbsPath("nonexistent.json")) - - if resp.Position != 0 { - t.Errorf("position for missing file = %d, want 0", resp.Position) - } -} - -func TestKiro_ExtractModifiedFiles(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - transcript := NewKiroTranscript("conv-files"). - AddPromptWithFileEdit("create file", "/tmp/foo.go"). - AddPromptWithFileEdit("edit file", "/tmp/bar.go"). - AddPrompt("no edits here") - path := transcript.WriteToFile(t, env, "files-transcript.json") - - var resp struct { - Files []string `json:"files"` - CurrentPosition int `json:"current_position"` - } - env.Runner.RunJSON(t, &resp, "", "extract-modified-files", "-path", path, "-offset", "0") - - if len(resp.Files) != 2 { - t.Errorf("files count = %d, want 2: %v", len(resp.Files), resp.Files) - } - if resp.CurrentPosition != 3 { - t.Errorf("current_position = %d, want 3", resp.CurrentPosition) - } -} - -func TestKiro_ExtractPrompts(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - transcript := NewKiroTranscript("conv-prompts"). - AddPrompt("first prompt"). - AddResponse("second prompt", "some response"). - AddPrompt("third prompt") - path := transcript.WriteToFile(t, env, "prompts-transcript.json") - - var resp struct { - Prompts []string `json:"prompts"` - } - env.Runner.RunJSON(t, &resp, "", "extract-prompts", "-session-ref", path, "-offset", "0") - - if len(resp.Prompts) != 3 { - t.Errorf("prompts count = %d, want 3: %v", len(resp.Prompts), resp.Prompts) - } -} - -func TestKiro_ExtractPrompts_WithOffset(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - transcript := NewKiroTranscript("conv-prompts-offset"). - AddPrompt("first"). - AddPrompt("second"). - AddPrompt("third") - path := transcript.WriteToFile(t, env, "prompts-offset.json") - - var resp struct { - Prompts []string `json:"prompts"` - } - env.Runner.RunJSON(t, &resp, "", "extract-prompts", "-session-ref", path, "-offset", "2") - - if len(resp.Prompts) != 1 { - t.Errorf("prompts count = %d, want 1: %v", len(resp.Prompts), resp.Prompts) - } -} - -func TestKiro_ExtractSummary(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - transcript := NewKiroTranscript("conv-summary"). - AddResponse("do the thing", "I completed the task successfully") - path := transcript.WriteToFile(t, env, "summary-transcript.json") - - var resp struct { - Summary string `json:"summary"` - HasSummary bool `json:"has_summary"` - } - env.Runner.RunJSON(t, &resp, "", "extract-summary", "-session-ref", path) - - if !resp.HasSummary { - t.Error("has_summary should be true") - } - if resp.Summary != "I completed the task successfully" { - t.Errorf("summary = %q, want %q", resp.Summary, "I completed the task successfully") - } -} - -func TestKiro_ExtractSummary_Empty(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - transcript := NewKiroTranscript("conv-no-summary").AddPrompt("hello") - path := transcript.WriteToFile(t, env, "no-summary.json") - - var resp struct { - Summary string `json:"summary"` - HasSummary bool `json:"has_summary"` - } - env.Runner.RunJSON(t, &resp, "", "extract-summary", "-session-ref", path) - - if resp.HasSummary { - t.Error("has_summary should be false for prompt-only transcript") - } -} - -// --- Other --- - -func TestKiro_FormatResumeCommand(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - var resp struct { - Command string `json:"command"` - } - env.Runner.RunJSON(t, &resp, "", "format-resume-command", "-session-id", "session-xyz") - - if resp.Command == "" { - t.Error("command should not be empty") - } - if !strings.Contains(resp.Command, "resume") { - t.Errorf("command %q should contain 'resume'", resp.Command) - } -} - -func TestKiro_UnknownSubcommand(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - result := env.Runner.MustFail(t, "", "nonexistent-command") - if !strings.Contains(string(result.Stderr), "unknown subcommand") { - t.Errorf("stderr should mention 'unknown subcommand', got: %s", result.Stderr) - } -} - -func TestKiro_NoSubcommand(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - // Run with no args — the binary expects at least a subcommand - result := env.Runner.Run("", "") - // The binary wraps os.Args[1] so passing empty string gives "unknown subcommand: " - // which exits non-zero. Either way it should not succeed cleanly. - // Actually passing "" as subcommand will invoke the binary with "" as the arg. - if result.ExitCode == 0 { - t.Error("expected non-zero exit for empty subcommand") - } -} - -// --- Install + Idempotency --- - -func TestKiro_InstallHooks_Idempotent(t *testing.T) { - t.Parallel() - env := NewKiroTestEnv(t) - - // First install - var resp1 struct { - HooksInstalled int `json:"hooks_installed"` - } - env.Runner.RunJSON(t, &resp1, "", "install-hooks") - if resp1.HooksInstalled == 0 { - t.Fatal("first install should install hooks") - } - - // Second install should be a no-op (returns 0 installed) - var resp2 struct { - HooksInstalled int `json:"hooks_installed"` - } - env.Runner.RunJSON(t, &resp2, "", "install-hooks") - if resp2.HooksInstalled != 0 { - t.Errorf("second install should be idempotent (0 hooks), got %d", resp2.HooksInstalled) - } -} diff --git a/e2e/kiro/setup_test.go b/e2e/kiro/setup_test.go deleted file mode 100644 index 84e10c6..0000000 --- a/e2e/kiro/setup_test.go +++ /dev/null @@ -1,37 +0,0 @@ -//go:build e2e - -package kiro - -import ( - "fmt" - "os" - "testing" - - e2e "github.com/entireio/external-agents/e2e" -) - -// kiroBinary holds the path to the built entire-agent-kiro binary. -var kiroBinary string - -func TestMain(m *testing.M) { - tmpDir, err := os.MkdirTemp("", "e2e-kiro-*") - if err != nil { - fmt.Fprintf(os.Stderr, "failed to create temp dir: %v\n", err) - os.Exit(1) - } - defer os.RemoveAll(tmpDir) - - fmt.Println("Building entire-agent-kiro...") - binPath, err := e2e.BuildAgent("entire-agent-kiro", tmpDir) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to build entire-agent-kiro: %v\n", err) - os.Exit(1) - } - kiroBinary = binPath - fmt.Printf("Built entire-agent-kiro -> %s\n", binPath) - - // Isolate git config to prevent user's ~/.gitconfig from interfering. - os.Setenv("GIT_CONFIG_GLOBAL", "/dev/null") - - os.Exit(m.Run()) -} diff --git a/e2e/kiro/testenv_test.go b/e2e/kiro/testenv_test.go deleted file mode 100644 index 5e60330..0000000 --- a/e2e/kiro/testenv_test.go +++ /dev/null @@ -1,26 +0,0 @@ -//go:build e2e - -package kiro - -import ( - "testing" - - e2e "github.com/entireio/external-agents/e2e" -) - -// NewKiroTestEnv creates a test environment with .kiro/ and .entire/tmp/ directories. -func NewKiroTestEnv(t *testing.T) *e2e.TestEnv { - t.Helper() - te := e2e.NewTestEnvWithBinary(t, kiroBinary) - te.MkdirAll(".kiro") - te.MkdirAll(".entire/tmp") - return te -} - -// NewKiroGitEnv creates a Kiro test environment with git init. -func NewKiroGitEnv(t *testing.T) *e2e.TestEnv { - t.Helper() - te := NewKiroTestEnv(t) - te.GitInit() - return te -} diff --git a/e2e/lifecycle_test.go b/e2e/lifecycle_test.go index 2358518..63a0e43 100644 --- a/e2e/lifecycle_test.go +++ b/e2e/lifecycle_test.go @@ -4,7 +4,9 @@ package e2e import ( "context" + "encoding/json" "os" + "os/exec" "path/filepath" "testing" "time" @@ -80,22 +82,7 @@ func TestLifecycle_HooksInstalledAfterEnable(t *testing.T) { t.Skipf("%s binary not built", agentBinName) } - runner := &AgentRunner{ - BinaryPath: binPath, - Env: []string{ - "ENTIRE_REPO_ROOT=" + s.Dir, - "HOME=" + os.Getenv("HOME"), - "PATH=" + os.Getenv("PATH"), - "LANG=en_US.UTF-8", - }, - } - - var resp struct { - Installed bool `json:"installed"` - } - runner.RunJSON(t, &resp, "", "are-hooks-installed") - - assert.True(t, resp.Installed, "hooks should be installed after entire enable") + assert.True(t, hooksInstalled(t, binPath, s.Dir), "hooks should be installed after entire enable") }) } @@ -222,3 +209,27 @@ func TestLifecycle_InteractiveSession(t *testing.T) { testutil.WaitForCheckpoint(t, s, 30*time.Second) }) } + +func hooksInstalled(t *testing.T, binPath, repoRoot string) bool { + t.Helper() + + cmd := exec.Command(binPath, "are-hooks-installed") + cmd.Env = append(os.Environ(), + "ENTIRE_REPO_ROOT="+repoRoot, + "LANG=en_US.UTF-8", + ) + + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("%s are-hooks-installed failed: %v\n%s", binPath, err, out) + } + + var resp struct { + Installed bool `json:"installed"` + } + if err := json.Unmarshal(out, &resp); err != nil { + t.Fatalf("parse are-hooks-installed response: %v\nraw output: %s", err, out) + } + + return resp.Installed +} diff --git a/e2e/testenv.go b/e2e/testenv.go deleted file mode 100644 index db52ebf..0000000 --- a/e2e/testenv.go +++ /dev/null @@ -1,145 +0,0 @@ -//go:build e2e - -package e2e - -import ( - "encoding/json" - "fmt" - "os" - "os/exec" - "path/filepath" - "testing" -) - -// TestEnv provides an isolated filesystem environment for E2E tests. -type TestEnv struct { - t *testing.T - Dir string - HomeDir string - Runner *AgentRunner -} - -// NewTestEnv creates a bare test environment with ENTIRE_REPO_ROOT and isolated HOME. -func NewTestEnv(t *testing.T, agentName string) *TestEnv { - t.Helper() - - binPath, ok := AgentBinaries[agentName] - if !ok { - t.Fatalf("agent binary not found: %s (available: %v)", agentName, agentBinaryNames()) - } - - dir := t.TempDir() - homeDir := t.TempDir() - - env := baseEnv(dir, homeDir) - - return &TestEnv{ - t: t, - Dir: dir, - HomeDir: homeDir, - Runner: &AgentRunner{ - BinaryPath: binPath, - Env: env, - }, - } -} - -// NewTestEnvWithBinary creates a test environment using an explicit binary path. -// Use this from subpackages that build their own agent binary in TestMain. -func NewTestEnvWithBinary(t *testing.T, binPath string) *TestEnv { - t.Helper() - - dir := t.TempDir() - homeDir := t.TempDir() - - env := baseEnv(dir, homeDir) - - return &TestEnv{ - t: t, - Dir: dir, - HomeDir: homeDir, - Runner: &AgentRunner{ - BinaryPath: binPath, - Env: env, - }, - } -} - -// WriteFile writes content to a path relative to the test environment root. -func (e *TestEnv) WriteFile(relPath, content string) { - e.t.Helper() - abs := filepath.Join(e.Dir, relPath) - if err := os.MkdirAll(filepath.Dir(abs), 0o750); err != nil { - e.t.Fatalf("mkdir for %s: %v", relPath, err) - } - if err := os.WriteFile(abs, []byte(content), 0o600); err != nil { - e.t.Fatalf("write %s: %v", relPath, err) - } -} - -// WriteJSON writes a JSON-encoded value to a path relative to the test root. -func (e *TestEnv) WriteJSON(relPath string, v any) { - e.t.Helper() - data, err := json.Marshal(v) - if err != nil { - e.t.Fatalf("marshal JSON for %s: %v", relPath, err) - } - e.WriteFile(relPath, string(data)) -} - -// ReadFile reads a file relative to the test environment root. -func (e *TestEnv) ReadFile(relPath string) string { - e.t.Helper() - data, err := os.ReadFile(filepath.Join(e.Dir, relPath)) - if err != nil { - e.t.Fatalf("read %s: %v", relPath, err) - } - return string(data) -} - -// FileExists checks whether a relative path exists in the test environment. -func (e *TestEnv) FileExists(relPath string) bool { - _, err := os.Stat(filepath.Join(e.Dir, relPath)) - return err == nil -} - -// MkdirAll creates a directory (and parents) relative to the test root. -func (e *TestEnv) MkdirAll(relPath string) { - e.t.Helper() - if err := os.MkdirAll(filepath.Join(e.Dir, relPath), 0o750); err != nil { - e.t.Fatalf("mkdir %s: %v", relPath, err) - } -} - -// GitInit initializes a git repo in the test environment root. -func (e *TestEnv) GitInit() { - e.t.Helper() - cmd := exec.Command("git", "init") - cmd.Dir = e.Dir - cmd.Env = e.Runner.Env - if out, err := cmd.CombinedOutput(); err != nil { - e.t.Fatalf("git init failed: %v\n%s", err, out) - } -} - -// AbsPath returns the absolute path for a relative path in the test environment. -func (e *TestEnv) AbsPath(relPath string) string { - return filepath.Join(e.Dir, relPath) -} - -func baseEnv(repoRoot, homeDir string) []string { - return []string{ - fmt.Sprintf("ENTIRE_REPO_ROOT=%s", repoRoot), - fmt.Sprintf("HOME=%s", homeDir), - fmt.Sprintf("PATH=%s", os.Getenv("PATH")), - "LANG=en_US.UTF-8", - } -} - -func agentBinaryNames() []string { - names := make([]string, 0, len(AgentBinaries)) - for name := range AgentBinaries { - names = append(names, name) - } - return names -} From 65c422ab9ad18208c131707809291b122c32696f Mon Sep 17 00:00:00 2001 From: Andrea Nodari Date: Tue, 24 Mar 2026 16:59:44 +0100 Subject: [PATCH 2/2] Address bugbot comment --- e2e/lifecycle_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/e2e/lifecycle_test.go b/e2e/lifecycle_test.go index 63a0e43..eb127bf 100644 --- a/e2e/lifecycle_test.go +++ b/e2e/lifecycle_test.go @@ -219,9 +219,12 @@ func hooksInstalled(t *testing.T, binPath, repoRoot string) bool { "LANG=en_US.UTF-8", ) - out, err := cmd.CombinedOutput() + out, err := cmd.Output() if err != nil { - t.Fatalf("%s are-hooks-installed failed: %v\n%s", binPath, err, out) + if exitErr, ok := err.(*exec.ExitError); ok { + t.Fatalf("%s are-hooks-installed failed: %v\nstdout: %s\nstderr: %s", binPath, err, out, exitErr.Stderr) + } + t.Fatalf("%s are-hooks-installed failed: %v\nstdout: %s", binPath, err, out) } var resp struct {