diff --git a/.githooks/pre-commit b/.githooks/pre-commit index eb9de546..755610e3 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -1,21 +1,29 @@ #!/usr/bin/env bash # Pre-commit hook: test gates + auto-rebuild console build artifacts # -# 1. Python unit tests — when launcher/ or pilot/hooks/ changed -# 2. Console unit tests — when console/ changed -# 3. Console typecheck + build + stage artifacts — when console/src/ changed +# 1a. Python unit tests — when launcher/ or pilot/hooks/ changed +# 1b. Installer tests — when installer/ changed +# 2. Console unit tests — when console/ changed +# 3. Console typecheck + build + stage artifacts — when console/src/ changed set -eo pipefail # --- 1. Python unit tests --- -PYTHON_CHANGED=$(git diff --cached --name-only -- 'launcher/' 'pilot/hooks/' | head -1) +LAUNCHER_CHANGED=$(git diff --cached --name-only -- 'launcher/' 'pilot/hooks/' | head -1) +INSTALLER_CHANGED=$(git diff --cached --name-only -- 'installer/' | head -1) -if [ -n "$PYTHON_CHANGED" ]; then +if [ -n "$LAUNCHER_CHANGED" ]; then echo "[pre-commit] Python source changed — running unit tests..." - uv run pytest launcher/tests/ pilot/hooks/tests/ -q --tb=short 2>&1 | tail -5 + uv run pytest launcher/tests/ pilot/hooks/tests/ -q --tb=short 2>&1 | tail -20 echo "[pre-commit] Python unit tests passed." fi +if [ -n "$INSTALLER_CHANGED" ]; then + echo "[pre-commit] Installer changed — running installer unit tests..." + uv run pytest installer/tests/unit/ -q --tb=short 2>&1 | tail -20 + echo "[pre-commit] Installer unit tests passed." +fi + # --- 2. Console unit tests --- CONSOLE_CHANGED=$(git diff --cached --name-only -- 'console/src/' 'console/scripts/' 'console/package.json' 'console/tsconfig.json' 'console/vite.config.ts' | head -1) @@ -27,12 +35,12 @@ if [ -n "$CONSOLE_CHANGED" ]; then fi echo "[pre-commit] Console source changed — running unit tests..." - (cd console && bun test 2>&1) | tail -5 + (cd console && bun test 2>&1) | tail -20 echo "[pre-commit] Console unit tests passed." # --- 3. Console typecheck + build + stage artifacts --- echo "[pre-commit] Running typecheck..." - (cd console && bun run typecheck 2>&1) | tail -5 + (cd console && bun run typecheck 2>&1) | tail -20 echo "[pre-commit] Console typecheck passed." echo "[pre-commit] Rebuilding console artifacts..." diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 76d74446..3b66a171 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -133,8 +133,8 @@ jobs: - name: Run unit tests with coverage run: | - python3 -m pytest installer/tests/unit/ launcher/tests/unit/ -v \ - --cov=installer --cov=launcher \ + python3 -m pytest installer/tests/unit/ launcher/tests/unit/ pilot/hooks/tests/ -v \ + --cov=installer --cov=launcher --cov=pilot.hooks \ --cov-report=term --cov-report=xml console-tests: diff --git a/console/tests/ui/search-removal.test.ts b/console/tests/ui/search-removal.test.ts index 81a46e0b..68d86c68 100644 --- a/console/tests/ui/search-removal.test.ts +++ b/console/tests/ui/search-removal.test.ts @@ -7,14 +7,9 @@ */ import { describe, it, expect } from "bun:test"; -import { readFileSync, existsSync } from "fs"; +import { readFileSync } from "fs"; describe("Search view removal", () => { - it("Search view directory no longer exists", () => { - const searchViewExists = existsSync("src/ui/viewer/views/Search"); - expect(searchViewExists).toBe(false); - }); - it("views index.ts does not export SearchView", () => { const source = readFileSync("src/ui/viewer/views/index.ts", "utf-8"); expect(source).not.toContain("SearchView"); @@ -58,6 +53,7 @@ describe("Search view removal", () => { ); expect(source).not.toContain("Go to Search"); expect(source).not.toContain("navigate:/search"); + expect(source).not.toContain('"g r"'); }); it("Dashboard renders 4 workspace cards including VexorStatus", () => { diff --git a/installer/steps/claude_files.py b/installer/steps/claude_files.py index 3a70fe19..91796c6e 100644 --- a/installer/steps/claude_files.py +++ b/installer/steps/claude_files.py @@ -11,7 +11,6 @@ from installer.downloads import ( DownloadConfig, FileInfo, - download_file, download_files_parallel, get_repo_files, ) @@ -20,7 +19,6 @@ cleanup_managed_files, load_manifest, merge_app_config, - merge_settings, save_manifest, ) @@ -77,9 +75,7 @@ def _should_skip_file(file_path: str) -> bool: def _categorize_file(file_path: str) -> str: """Determine which category a file belongs to.""" - if file_path == "pilot/settings.json" or file_path.endswith("/settings.json"): - return "settings" - elif "/commands/" in file_path: + if "/commands/" in file_path: return "commands" elif "/rules/" in file_path: return "rules" @@ -182,7 +178,6 @@ def _categorize_files(self, pilot_files: list[FileInfo], ctx: InstallContext) -> "commands": [], "rules": [], "pilot_plugin": [], - "settings": [], } for file_info in pilot_files: @@ -281,7 +276,6 @@ def _install_categories( "commands": "slash commands", "rules": "standard rules", "pilot_plugin": "Pilot plugin files", - "settings": "settings", } for category, file_infos in categories.items(): @@ -311,21 +305,6 @@ def _install_category_files( failed: list[str] = [] def install_files() -> None: - if category == "settings": - for file_info in file_infos: - file_path = file_info.path - dest_file = self._get_dest_path(category, file_path, ctx) - success = self._install_settings( - file_path, - dest_file, - config, - ) - if success: - installed.append(str(dest_file)) - else: - failed.append(file_path) - return - dest_paths = [self._get_dest_path(category, fi.path, ctx) for fi in file_infos] results = download_files_parallel(file_infos, dest_paths, config) @@ -358,8 +337,6 @@ def _get_dest_path(self, category: str, file_path: str, ctx: InstallContext) -> elif category == "pilot_plugin": rel_path = Path(file_path).relative_to("pilot") return home_pilot_plugin_dir / rel_path - elif category == "settings": - return home_claude_dir / SETTINGS_FILE else: return ctx.project_dir / file_path @@ -374,6 +351,8 @@ def _post_install_processing(self, ctx: InstallContext, ui: Any) -> None: if not ctx.local_mode: self._update_hooks_config(home_pilot_plugin_dir) + self._update_plugin_settings(home_pilot_plugin_dir) + self._migrate_old_settings() self._merge_app_config() self._cleanup_stale_rules(ctx) self._save_pilot_manifest(ctx) @@ -531,58 +510,62 @@ def _report_results(self, ui: Any, file_count: int, failed_files: list[str]) -> if len(failed_files) > 5: ui.print(f" ... and {len(failed_files) - 5} more") - def _install_settings( - self, - source_path: str, - dest_path: Path, - config: DownloadConfig, - ) -> bool: - """Download, merge, and install settings to ~/.claude/settings.json. + def _update_plugin_settings(self, plugin_dir: Path) -> None: + """Patch paths in the plugin settings.json after installation.""" + settings_path = plugin_dir / SETTINGS_FILE + if not settings_path.exists(): + return + try: + content = settings_path.read_text() + patched = patch_claude_paths(content) + if patched != content: + settings_path.write_text(patched) + except (OSError, IOError): + pass - Uses three-way merge to preserve user customizations: - - baseline (~/.claude/.pilot-settings-baseline.json) = what Pilot installed last time - - current (~/.claude/settings.json) = what's on disk now (may have user changes) - - incoming (downloaded settings.json) = new Pilot settings + def _migrate_old_settings(self) -> None: + """Remove Pilot-managed entries from old ~/.claude/settings.json. + + Previous versions merged settings into ~/.claude/settings.json. + Now settings live in the plugin directory. This extracts any + user-only customizations and removes Pilot-managed entries. """ - import tempfile + home_claude_dir = Path.home() / ".claude" + baseline_path = home_claude_dir / SETTINGS_BASELINE_FILE + settings_path = home_claude_dir / SETTINGS_FILE - with tempfile.TemporaryDirectory() as tmpdir: - temp_file = Path(tmpdir) / "settings.json" - if not download_file(source_path, temp_file, config): - return False + if not baseline_path.exists(): + return - try: - raw_content = temp_file.read_text() - processed_content = patch_claude_paths(process_settings(raw_content)) - incoming: dict[str, Any] = json.loads(processed_content) - - dest_path.parent.mkdir(parents=True, exist_ok=True) - baseline_path = dest_path.parent / SETTINGS_BASELINE_FILE - - current: dict[str, Any] | None = None - baseline: dict[str, Any] | None = None - - if dest_path.exists(): - try: - current = json.loads(dest_path.read_text()) - except (json.JSONDecodeError, OSError, IOError): - current = None - - if baseline_path.exists(): - try: - baseline = json.loads(baseline_path.read_text()) - except (json.JSONDecodeError, OSError, IOError): - baseline = None - - if current is not None: - merged = merge_settings(baseline, current, incoming) - else: - merged = incoming + try: + baseline = json.loads(baseline_path.read_text()) + except (json.JSONDecodeError, OSError, IOError): + baseline_path.unlink(missing_ok=True) + return - dest_path.write_text(json.dumps(merged, indent=2) + "\n") + if not settings_path.exists(): + baseline_path.unlink(missing_ok=True) + return - baseline_path.write_text(json.dumps(incoming, indent=2) + "\n") + try: + current = json.loads(settings_path.read_text()) + except (json.JSONDecodeError, OSError, IOError): + baseline_path.unlink(missing_ok=True) + return - return True - except (json.JSONDecodeError, OSError, IOError): - return False + user_settings: dict[str, Any] = {} + for key, value in current.items(): + if key not in baseline: + user_settings[key] = value + elif value != baseline[key]: + user_settings[key] = value + + try: + if user_settings: + settings_path.write_text(json.dumps(user_settings, indent=2) + "\n") + else: + settings_path.unlink() + except (OSError, IOError): + pass + + baseline_path.unlink(missing_ok=True) diff --git a/installer/steps/dependencies.py b/installer/steps/dependencies.py index 46e64a7b..4d744315 100644 --- a/installer/steps/dependencies.py +++ b/installer/steps/dependencies.py @@ -93,14 +93,20 @@ def install_python_tools() -> bool: def _get_forced_claude_version() -> str | None: - """Check ~/.claude/settings.json for FORCE_CLAUDE_VERSION in env section.""" - settings_path = Path.home() / ".claude" / "settings.json" - if settings_path.exists(): - try: - settings = json.loads(settings_path.read_text()) - return settings.get("env", {}).get("FORCE_CLAUDE_VERSION") - except (json.JSONDecodeError, OSError): - pass + """Check settings files for FORCE_CLAUDE_VERSION in env section.""" + paths = [ + Path.home() / ".claude" / "pilot" / "settings.json", + Path.home() / ".claude" / "settings.json", + ] + for settings_path in paths: + if settings_path.exists(): + try: + settings = json.loads(settings_path.read_text()) + version = settings.get("env", {}).get("FORCE_CLAUDE_VERSION") + if version: + return version + except (json.JSONDecodeError, OSError): + pass return None @@ -369,6 +375,7 @@ def install_vexor(use_local: bool = False, ui: Any = None) -> bool: On macOS arm64, installs from fork with MLX support for Apple Silicon GPU. On other platforms, installs the standard CPU-based local embeddings. + Model pre-download is best-effort; vexor downloads it on first use if needed. """ if use_local: if is_macos_arm64(): @@ -381,7 +388,10 @@ def install_vexor(use_local: bool = False, ui: Any = None) -> bool: if not _run_bash_with_retry("uv tool install 'vexor[local]'"): return False _configure_vexor_local() - return _setup_vexor_local_model(ui) + if not _setup_vexor_local_model(ui): + if ui: + ui.info("Embedding model will download on first use") + return True else: if command_exists("vexor"): _configure_vexor_defaults() @@ -403,7 +413,10 @@ def _install_vexor_mlx(ui: Any = None) -> bool: if not _run_bash_with_retry("uv tool install 'vexor[local]' --reinstall"): return False _configure_vexor_local() - return _setup_vexor_local_model(ui) + if not _setup_vexor_local_model(ui): + if ui: + ui.info("Embedding model will download on first use") + return True if not _install_vexor_from_local(vexor_dir, extra="local-mlx"): if ui: @@ -411,10 +424,16 @@ def _install_vexor_mlx(ui: Any = None) -> bool: if not _run_bash_with_retry("uv tool install 'vexor[local]' --reinstall"): return False _configure_vexor_local() - return _setup_vexor_local_model(ui) + if not _setup_vexor_local_model(ui): + if ui: + ui.info("Embedding model will download on first use") + return True _configure_vexor_local(device="mlx") - return _setup_vexor_local_model(ui, device="mlx") + if not _setup_vexor_local_model(ui, device="mlx"): + if ui: + ui.info("Embedding model will download on first use") + return True def uninstall_mcp_cli() -> bool: diff --git a/installer/tests/unit/steps/test_claude_files.py b/installer/tests/unit/steps/test_claude_files.py index 457695b1..5edf6c11 100644 --- a/installer/tests/unit/steps/test_claude_files.py +++ b/installer/tests/unit/steps/test_claude_files.py @@ -160,8 +160,8 @@ def test_claude_files_run_installs_files(self): assert (home_dir / ".claude" / "rules" / "rule.md").exists() - def test_claude_files_installs_settings(self): - """ClaudeFilesStep installs settings to ~/.claude/settings.json.""" + def test_claude_files_installs_settings_to_plugin_dir(self): + """ClaudeFilesStep installs settings to ~/.claude/pilot/settings.json.""" from installer.context import InstallContext from installer.steps.claude_files import ClaudeFilesStep from installer.ui import Console @@ -188,8 +188,137 @@ def test_claude_files_installs_settings(self): with patch("installer.steps.claude_files.Path.home", return_value=home_dir): step.run(ctx) - assert (home_dir / ".claude" / "settings.json").exists() - assert not (dest_dir / ".claude" / "settings.local.json").exists() + assert (home_dir / ".claude" / "pilot" / "settings.json").exists() + assert not (home_dir / ".claude" / "settings.json").exists() + + +class TestSettingsMigration: + """Test migration of old merged ~/.claude/settings.json to plugin settings.""" + + def test_migrates_user_customizations_from_old_settings(self): + """User-added keys survive migration; Pilot-managed keys are removed.""" + from installer.context import InstallContext + from installer.steps.claude_files import ClaudeFilesStep + from installer.ui import Console + + step = ClaudeFilesStep() + with tempfile.TemporaryDirectory() as tmpdir: + home_dir = Path(tmpdir) / "home" + home_dir.mkdir() + + home_claude = home_dir / ".claude" + home_claude.mkdir() + + baseline = {"env": {"A": "1"}, "permissions": {"allow": ["Bash"]}} + (home_claude / ".pilot-settings-baseline.json").write_text( + json.dumps(baseline) + ) + + current = { + "env": {"A": "changed"}, + "permissions": {"allow": ["Bash"]}, + "myKey": "user-value", + } + (home_claude / "settings.json").write_text(json.dumps(current)) + + source_pilot = Path(tmpdir) / "source" / "pilot" + source_pilot.mkdir(parents=True) + (source_pilot / "settings.json").write_text( + json.dumps({"env": {"A": "1"}, "permissions": {"allow": ["Bash"]}}) + ) + + dest_dir = Path(tmpdir) / "dest" + dest_dir.mkdir() + + ctx = InstallContext( + project_dir=dest_dir, + ui=Console(non_interactive=True), + local_mode=True, + local_repo_dir=Path(tmpdir) / "source", + ) + + with patch("installer.steps.claude_files.Path.home", return_value=home_dir): + step.run(ctx) + + settings_path = home_claude / "settings.json" + assert settings_path.exists() + migrated = json.loads(settings_path.read_text()) + assert migrated["myKey"] == "user-value" + assert migrated["env"] == {"A": "changed"} + assert "permissions" not in migrated + assert not (home_claude / ".pilot-settings-baseline.json").exists() + + def test_migration_removes_settings_when_no_user_changes(self): + """If user made no changes, old settings.json is deleted entirely.""" + from installer.context import InstallContext + from installer.steps.claude_files import ClaudeFilesStep + from installer.ui import Console + + step = ClaudeFilesStep() + with tempfile.TemporaryDirectory() as tmpdir: + home_dir = Path(tmpdir) / "home" + home_dir.mkdir() + + home_claude = home_dir / ".claude" + home_claude.mkdir() + + baseline = {"env": {"A": "1"}} + (home_claude / ".pilot-settings-baseline.json").write_text( + json.dumps(baseline) + ) + (home_claude / "settings.json").write_text(json.dumps(baseline)) + + source_pilot = Path(tmpdir) / "source" / "pilot" + source_pilot.mkdir(parents=True) + (source_pilot / "settings.json").write_text(json.dumps({"env": {"A": "1"}})) + + dest_dir = Path(tmpdir) / "dest" + dest_dir.mkdir() + + ctx = InstallContext( + project_dir=dest_dir, + ui=Console(non_interactive=True), + local_mode=True, + local_repo_dir=Path(tmpdir) / "source", + ) + + with patch("installer.steps.claude_files.Path.home", return_value=home_dir): + step.run(ctx) + + assert not (home_claude / "settings.json").exists() + assert not (home_claude / ".pilot-settings-baseline.json").exists() + + def test_no_migration_without_baseline(self): + """Without baseline file, migration is skipped (clean install).""" + from installer.context import InstallContext + from installer.steps.claude_files import ClaudeFilesStep + from installer.ui import Console + + step = ClaudeFilesStep() + with tempfile.TemporaryDirectory() as tmpdir: + home_dir = Path(tmpdir) / "home" + home_dir.mkdir() + (home_dir / ".claude").mkdir() + + source_pilot = Path(tmpdir) / "source" / "pilot" + source_pilot.mkdir(parents=True) + (source_pilot / "settings.json").write_text(json.dumps({"env": {"A": "1"}})) + + dest_dir = Path(tmpdir) / "dest" + dest_dir.mkdir() + + ctx = InstallContext( + project_dir=dest_dir, + ui=Console(non_interactive=True), + local_mode=True, + local_repo_dir=Path(tmpdir) / "source", + ) + + with patch("installer.steps.claude_files.Path.home", return_value=home_dir): + step.run(ctx) + + assert (home_dir / ".claude" / "pilot" / "settings.json").exists() + assert not (home_dir / ".claude" / "settings.json").exists() class TestClaudeFilesCustomRulesPreservation: diff --git a/installer/tests/unit/steps/test_dependencies.py b/installer/tests/unit/steps/test_dependencies.py index a737ffe7..4aa5b9e0 100644 --- a/installer/tests/unit/steps/test_dependencies.py +++ b/installer/tests/unit/steps/test_dependencies.py @@ -336,6 +336,55 @@ def test_configure_vexor_defaults_merges_existing(self): assert config["model"] == "text-embedding-3-small" + @patch("installer.steps.dependencies._setup_vexor_local_model") + @patch("installer.steps.dependencies._configure_vexor_local") + @patch("installer.steps.dependencies._run_bash_with_retry") + @patch("installer.steps.dependencies.is_macos_arm64") + @patch("installer.steps.dependencies._is_vexor_local_model_installed") + @patch("installer.steps.dependencies.command_exists") + def test_install_vexor_local_succeeds_when_model_download_fails( + self, mock_cmd, mock_model, mock_mac, mock_bash, mock_config, mock_setup + ): + """install_vexor returns True when vexor installed but model pre-download fails.""" + from installer.steps.dependencies import install_vexor + + mock_cmd.return_value = False + mock_model.return_value = False + mock_mac.return_value = False + mock_bash.return_value = True + mock_config.return_value = True + mock_setup.return_value = False + + mock_ui = MagicMock() + result = install_vexor(use_local=True, ui=mock_ui) + + assert result is True + mock_ui.info.assert_called_once_with("Embedding model will download on first use") + + @patch("installer.steps.dependencies._setup_vexor_local_model") + @patch("installer.steps.dependencies._configure_vexor_local") + @patch("installer.steps.dependencies._run_bash_with_retry") + @patch("installer.steps.dependencies.is_macos_arm64") + @patch("installer.steps.dependencies._is_vexor_local_model_installed") + @patch("installer.steps.dependencies.command_exists") + def test_install_vexor_local_fails_when_binary_install_fails( + self, mock_cmd, mock_model, mock_mac, mock_bash, mock_config, mock_setup + ): + """install_vexor returns False when vexor binary installation fails.""" + from installer.steps.dependencies import install_vexor + + mock_cmd.return_value = False + mock_model.return_value = False + mock_mac.return_value = False + mock_bash.return_value = False + + result = install_vexor(use_local=True) + + assert result is False + mock_config.assert_not_called() + mock_setup.assert_not_called() + + class TestInstallPluginDependencies: """Test plugin dependencies installation via bun/npm install.""" diff --git a/launcher/tests/unit/test_tool_redirect.py b/launcher/tests/unit/test_tool_redirect.py index f3dc8bf9..f76d023e 100644 Binary files a/launcher/tests/unit/test_tool_redirect.py and b/launcher/tests/unit/test_tool_redirect.py differ diff --git a/pilot/agents/plan-challenger.md b/pilot/agents/plan-challenger.md index 016c4b26..17192f5e 100644 --- a/pilot/agents/plan-challenger.md +++ b/pilot/agents/plan-challenger.md @@ -3,6 +3,7 @@ name: plan-challenger description: Adversarial plan reviewer that challenges assumptions, finds weaknesses, and proposes failure scenarios. Returns structured JSON findings. tools: Read, Grep, Glob, Write model: sonnet +background: true permissionMode: plan --- diff --git a/pilot/agents/plan-verifier.md b/pilot/agents/plan-verifier.md index 16b3250e..9d59aa01 100644 --- a/pilot/agents/plan-verifier.md +++ b/pilot/agents/plan-verifier.md @@ -3,6 +3,7 @@ name: plan-verifier description: Verifies plan completeness and alignment with user requirements. Returns structured JSON findings. tools: Read, Grep, Glob, Write model: sonnet +background: true permissionMode: plan --- diff --git a/pilot/agents/spec-reviewer-compliance.md b/pilot/agents/spec-reviewer-compliance.md index 6ba23cbb..d2cf5ce2 100644 --- a/pilot/agents/spec-reviewer-compliance.md +++ b/pilot/agents/spec-reviewer-compliance.md @@ -3,6 +3,7 @@ name: spec-reviewer-compliance description: Verifies implementation matches the plan, DoD criteria are met, and risk mitigations are implemented. Returns structured JSON findings. tools: Read, Grep, Glob, Write, Bash(git diff:*), Bash(git log:*) model: sonnet +background: true permissionMode: plan --- diff --git a/pilot/agents/spec-reviewer-quality.md b/pilot/agents/spec-reviewer-quality.md index bd93ee98..33d36f9f 100644 --- a/pilot/agents/spec-reviewer-quality.md +++ b/pilot/agents/spec-reviewer-quality.md @@ -3,6 +3,7 @@ name: spec-reviewer-quality description: Verifies code quality, testing adequacy, security, performance, and error handling. Returns structured JSON findings. tools: Read, Grep, Glob, Write, Bash(git diff:*), Bash(git log:*) model: opus +background: true permissionMode: plan --- @@ -29,6 +30,7 @@ ls .claude/rules/*.md **For EACH matched rule file, use the Read tool to read it completely.** Rules to SKIP (not relevant to code review): + - `context-continuation.md` — session management - `cli-tools.md` — CLI references (Pilot, Vexor) - `research-tools.md` — Context7, grep-mcp, web search, gh CLI @@ -60,7 +62,7 @@ Key rules are summarized below, but you MUST read the full rule files for comple - Tests MUST have been written BEFORE the implementation - If you see implementation without corresponding test = **must_fix** -### Testing Standards (testing.md, standards-*) +### Testing Standards (testing.md, standards-\*) - Unit tests MUST mock ALL external calls (HTTP, subprocess, file I/O, databases) - Tests making real network calls = **must_fix** (causes hangs/flakiness) @@ -238,11 +240,13 @@ Test exists but only checks that function doesn't crash, not that it returns cor ## Important Notes **You are NOT responsible for:** + - Checking if implementation matches the plan (compliance reviewer does this) - Verifying risk mitigations are implemented (compliance reviewer does this) - Checking DoD criteria are met (compliance reviewer does this) **You ARE responsible for:** + - Reading ALL rule files first (mandatory) - Finding security vulnerabilities - Finding bugs and logic errors diff --git a/pilot/commands/spec-implement.md b/pilot/commands/spec-implement.md index 02ea4a6e..5aa8ba8d 100644 --- a/pilot/commands/spec-implement.md +++ b/pilot/commands/spec-implement.md @@ -203,7 +203,7 @@ TaskCreate: "Task 4: Add documentation" → id=4, addBlockedBy: [2] - Implement minimal code to pass - Refactor if needed (keep tests green) 5. **Verify tests pass** - Run the project's test runner (e.g., `uv run pytest -q`, `bun test`, `npm test`) -6. **Run actual program** - Use the plan's Runtime Environment section to start the service/program. Show real output with sample data. +6. **Run actual program** - Use the plan's Runtime Environment section to start the service/program. Show real output with sample data. Check port availability first: `lsof -i :`. **If using `playwright-cli`, use session isolation:** `playwright-cli -s="${PILOT_SESSION_ID:-default}" open ` (see `playwright-cli.md`). Always close the session after verification. 7. **Check diagnostics** - Must be zero errors 8. **Validate Definition of Done** - Check all criteria from plan 9. **Per-task commit (worktree mode only)** - If `Worktree: Yes` in the plan, commit task changes immediately: diff --git a/pilot/commands/spec-plan.md b/pilot/commands/spec-plan.md index 1a9050a4..e78610d9 100644 --- a/pilot/commands/spec-plan.md +++ b/pilot/commands/spec-plan.md @@ -560,7 +560,7 @@ Define output paths (replace `` with the resolved value): #### Launch Plan Verification (Parallel Review) -**⛔ CRITICAL: You MUST send BOTH Task calls in a SINGLE message.** Set `run_in_background=true` on both so they run in parallel. If you send them in separate messages, the first blocks and the second waits — defeating the purpose. +**⛔ CRITICAL: You MUST send BOTH Task calls in a SINGLE message.** Both agents have `background: true` in their definitions, so they run in the background automatically. As a fallback, also set `run_in_background=true` on both. If you send them in separate messages, the first blocks and the second waits — defeating the purpose. **Agent 1: plan-verifier** (alignment and completeness) ``` diff --git a/pilot/commands/spec-verify.md b/pilot/commands/spec-verify.md index b57aaddd..c9005646 100644 --- a/pilot/commands/spec-verify.md +++ b/pilot/commands/spec-verify.md @@ -97,7 +97,7 @@ Define output paths (replace `` with the resolved value): #### 3.0d: Launch Both Reviewers in Parallel -Spawn 2 agents in parallel using TWO Task tool calls in a SINGLE message. Set `run_in_background=true` on both. +Spawn 2 agents in parallel using TWO Task tool calls in a SINGLE message. Both agents have `background: true` in their definitions, so they run in the background automatically. As a fallback, also set `run_in_background=true` on both. **Agent 1: spec-reviewer-compliance** (plan alignment, DoD, risk mitigations) @@ -357,6 +357,8 @@ If the project builds artifacts that are deployed separately from source (e.g., 2. Copy new builds to the installed location 3. Restart any running services that use the old artifacts +**⚠️ Parallel spec warning:** Deploy paths are shared OS resources. If another `/spec` session deploys to the same path, Code Identity Verification (3.7c) becomes unreliable. Check `ps aux | grep ` before restarting shared services. + **If no separate deployment is needed, skip to 3.7c.** #### 3.7c: Code Identity Verification (MANDATORY) @@ -380,6 +382,8 @@ If the project builds artifacts that are deployed separately from source (e.g., Run the actual program and verify real output. +**⚠️ Parallel spec warning:** Before starting a server, check if the port is already in use: `lsof -i :`. If another `/spec` session occupies it, wait for it to finish Phase B or use a different port. + **Execution checklist:** - [ ] Program starts without errors @@ -438,13 +442,36 @@ Verify each level against the actual codebase and running program. If any level **⚠️ Unit + Integration tests are NOT enough. You MUST also run E2E tests.** +#### 3.10.0: Resolve Playwright Session (MANDATORY before any playwright-cli usage) + +**⛔ Parallel `/spec` sessions WILL interfere if you use bare `playwright-cli` commands.** Always use a session-scoped browser instance. + +```bash +PW_SESSION="${PILOT_SESSION_ID:-default}" +``` + +**ALL `playwright-cli` commands in this phase MUST use `-s="$PW_SESSION"`:** + +```bash +playwright-cli -s="$PW_SESSION" open +playwright-cli -s="$PW_SESSION" snapshot +playwright-cli -s="$PW_SESSION" click e1 +# ... etc +``` + +**Cleanup at the end of E2E testing (after Step 3.10b):** + +```bash +playwright-cli -s="$PW_SESSION" close +``` + #### 3.10a: Happy Path Testing Test the primary user workflow end-to-end. **For APIs:** Test endpoints with curl. Verify status codes, response content, and state changes. -**For Frontend/UI:** Use `playwright-cli` to verify UI renders and workflows complete. See `~/.claude/rules/playwright-cli.md`. +**For Frontend/UI:** Use `playwright-cli -s="$PW_SESSION"` to verify UI renders and workflows complete. See `~/.claude/rules/playwright-cli.md`. Walk through the main user scenario described in the plan. Every view, every interaction, every state transition. @@ -466,6 +493,12 @@ For each edge case: 2. Exercise the UI/API 3. Verify the result is reasonable (not blank, not broken, not stuck, no unhandled errors) +**After all E2E testing completes, close the playwright session:** + +```bash +playwright-cli -s="$PW_SESSION" close +``` + ### Step 3.11: Final Regression Check Run the test suite and type checker one final time to catch any regressions from Phase B fixes (if any code changed during execution/E2E testing): diff --git a/pilot/hooks/file_checker.py b/pilot/hooks/file_checker.py index 68d3d5a2..bced553e 100644 --- a/pilot/hooks/file_checker.py +++ b/pilot/hooks/file_checker.py @@ -1,7 +1,8 @@ """Unified quality gate — file length + TDD enforcement in a single hook. -Runs both checks and produces one combined blocking output to avoid -duplicate system-reminders from multiple hooks in the same group. +Runs both checks and produces one combined warning via additionalContext +to avoid duplicate system-reminders from multiple hooks in the same group. +Warnings are non-blocking — they inform but never prevent edits. """ from __future__ import annotations @@ -15,7 +16,7 @@ from _checkers.go import check_go from _checkers.python import check_python from _checkers.typescript import TS_EXTENSIONS, check_typescript -from _util import find_git_root, post_tool_use_block +from _util import find_git_root, post_tool_use_context from tdd_enforcer import ( has_go_test_file, has_python_test_file, @@ -95,7 +96,7 @@ def main() -> int: reasons = [r for r in (file_reason, tdd_reason) if r] if reasons: - print(post_tool_use_block("\n".join(reasons))) + print(post_tool_use_context("\n".join(reasons))) return 0 diff --git a/pilot/hooks/tests/conftest.py b/pilot/hooks/tests/conftest.py new file mode 100644 index 00000000..a7fe1ca5 --- /dev/null +++ b/pilot/hooks/tests/conftest.py @@ -0,0 +1,8 @@ +"""Configure sys.path so hook modules are importable in tests.""" + +import sys +from pathlib import Path + +_hooks_dir = str(Path(__file__).resolve().parent.parent) +if _hooks_dir not in sys.path: + sys.path.insert(0, _hooks_dir) diff --git a/pilot/hooks/tests/test__util.py b/pilot/hooks/tests/test__util.py index e5bafa0e..614c6dd0 100644 --- a/pilot/hooks/tests/test__util.py +++ b/pilot/hooks/tests/test__util.py @@ -1,13 +1,30 @@ -"""Tests for _util.py model config helper functions.""" +"""Tests for _util.py — model config, JSON helpers, session paths, and shared utilities.""" from __future__ import annotations import json import sys from pathlib import Path -from unittest.mock import patch - -sys.path.insert(0, str(Path(__file__).parent.parent)) +from unittest.mock import MagicMock, patch + +from _util import ( + BLUE, + CYAN, + FILE_LENGTH_CRITICAL, + FILE_LENGTH_WARN, + GREEN, + MAGENTA, + NC, + RED, + YELLOW, + _sessions_base, + find_git_root, + get_edited_file_from_stdin, + get_session_cache_path, + get_session_plan_path, + is_waiting_for_user_input, + read_hook_stdin, +) class TestReadModelFromConfig: @@ -216,3 +233,197 @@ def test_no_ansi_codes_in_output(self, tmp_path: Path) -> None: f.write_text("\n".join(f"line {i}" for i in range(550))) result = check_file_length(f) assert "\033[" not in result + + + + +class TestColorConstants: + """Color constants are defined and non-empty.""" + + def test_all_colors_defined(self): + assert RED + assert YELLOW + assert GREEN + assert CYAN + assert BLUE + assert MAGENTA + assert NC + + +class TestFileLengthConstants: + """File length constants have expected values.""" + + def test_warn_threshold(self): + assert FILE_LENGTH_WARN == 300 + + def test_critical_threshold(self): + assert FILE_LENGTH_CRITICAL == 500 + + + + +class TestSessionsBase: + """Tests for _sessions_base().""" + + def test_returns_path_under_home(self): + base = _sessions_base() + assert isinstance(base, Path) + assert base == Path.home() / ".pilot" / "sessions" + + +class TestGetSessionCachePath: + """Tests for get_session_cache_path().""" + + @patch.dict("os.environ", {"PILOT_SESSION_ID": "test-session-123"}) + def test_with_session_id(self): + path = get_session_cache_path() + assert isinstance(path, Path) + assert "test-session-123" in str(path) + assert path.name == "context-cache.json" + + @patch.dict("os.environ", {}, clear=True) + def test_defaults_to_default(self): + path = get_session_cache_path() + assert isinstance(path, Path) + assert "default" in str(path) + + +class TestGetSessionPlanPath: + """Tests for get_session_plan_path().""" + + @patch.dict("os.environ", {"PILOT_SESSION_ID": "test-session-456"}) + def test_returns_session_scoped_plan_path(self): + path = get_session_plan_path() + assert isinstance(path, Path) + assert "test-session-456" in str(path) + assert path.name == "active_plan.json" + + + + +class TestFindGitRoot: + """Tests for find_git_root().""" + + @patch("subprocess.run") + def test_returns_root_when_in_repo(self, mock_run): + mock_run.return_value = MagicMock(returncode=0, stdout="/home/user/repo\n") + result = find_git_root() + assert result == Path("/home/user/repo") + + @patch("subprocess.run") + def test_returns_none_when_not_in_repo(self, mock_run): + mock_run.return_value = MagicMock(returncode=1, stdout="") + result = find_git_root() + assert result is None + + @patch("subprocess.run", side_effect=Exception("Git not found")) + def test_handles_exception(self, mock_run): + result = find_git_root() + assert result is None + + + + +class TestReadHookStdin: + """Tests for read_hook_stdin().""" + + def test_parses_valid_json(self, monkeypatch): + test_data = {"tool_name": "Write", "tool_input": {"file_path": "test.py"}} + monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: json.dumps(test_data))) + result = read_hook_stdin() + assert result == test_data + + def test_returns_empty_dict_on_invalid_json(self, monkeypatch): + monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: "not json")) + result = read_hook_stdin() + assert result == {} + + def test_returns_empty_dict_on_empty_input(self, monkeypatch): + monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: "")) + result = read_hook_stdin() + assert result == {} + + +class TestGetEditedFileFromStdin: + """Tests for get_edited_file_from_stdin().""" + + def test_extracts_file_path(self, monkeypatch): + test_data = {"tool_input": {"file_path": "/path/to/file.py"}} + with patch("select.select") as mock_select: + mock_select.return_value = ([sys.stdin], [], []) + monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: json.dumps(test_data))) + with patch("json.load", return_value=test_data): + result = get_edited_file_from_stdin() + assert result == Path("/path/to/file.py") + + def test_returns_none_without_file_path(self, monkeypatch): + test_data = {"tool_input": {}} + with patch("select.select") as mock_select: + mock_select.return_value = ([sys.stdin], [], []) + with patch("json.load", return_value=test_data): + result = get_edited_file_from_stdin() + assert result is None + + def test_returns_none_when_stdin_empty(self, monkeypatch): + with patch("select.select") as mock_select: + mock_select.return_value = ([], [], []) + result = get_edited_file_from_stdin() + assert result is None + + + + +class TestIsWaitingForUserInput: + """Tests for is_waiting_for_user_input().""" + + def test_returns_true_when_last_tool_is_ask_user_question(self, tmp_path): + transcript = tmp_path / "transcript.jsonl" + msg = { + "type": "assistant", + "message": { + "content": [ + {"type": "tool_use", "name": "AskUserQuestion", "input": {}} + ] + }, + } + transcript.write_text(json.dumps(msg) + "\n") + assert is_waiting_for_user_input(str(transcript)) is True + + def test_returns_false_when_last_tool_is_not_ask(self, tmp_path): + transcript = tmp_path / "transcript.jsonl" + msg = { + "type": "assistant", + "message": { + "content": [{"type": "tool_use", "name": "Write", "input": {}}] + }, + } + transcript.write_text(json.dumps(msg) + "\n") + assert is_waiting_for_user_input(str(transcript)) is False + + def test_returns_false_for_missing_file(self): + assert is_waiting_for_user_input("/nonexistent/transcript.jsonl") is False + + def test_returns_false_for_empty_transcript(self, tmp_path): + transcript = tmp_path / "transcript.jsonl" + transcript.write_text("") + assert is_waiting_for_user_input(str(transcript)) is False + + def test_uses_last_assistant_message(self, tmp_path): + transcript = tmp_path / "transcript.jsonl" + ask_msg = { + "type": "assistant", + "message": { + "content": [ + {"type": "tool_use", "name": "AskUserQuestion", "input": {}} + ] + }, + } + write_msg = { + "type": "assistant", + "message": { + "content": [{"type": "tool_use", "name": "Write", "input": {}}] + }, + } + lines = [json.dumps(ask_msg), json.dumps(write_msg)] + transcript.write_text("\n".join(lines) + "\n") + assert is_waiting_for_user_input(str(transcript)) is False diff --git a/pilot/hooks/tests/test_context_monitor.py b/pilot/hooks/tests/test_context_monitor.py index 0355c2a5..54a2f884 100644 --- a/pilot/hooks/tests/test_context_monitor.py +++ b/pilot/hooks/tests/test_context_monitor.py @@ -1,22 +1,14 @@ -"""Tests for context_monitor hook.""" +"""Tests for context_monitor hook — behavior at thresholds and internal helpers.""" from __future__ import annotations import json -import sys import time -from pathlib import Path from unittest.mock import patch -sys.path.insert(0, str(Path(__file__).parent.parent)) -from context_monitor import run_context_monitor +from context_monitor import _is_throttled, _resolve_context, run_context_monitor -def _make_statusline_cache(tmp_path: Path, session_id: str, pct: float) -> None: - """Write a statusline context-pct.json cache file.""" - cache_dir = tmp_path / ".pilot" / "sessions" / session_id - cache_dir.mkdir(parents=True, exist_ok=True) - (cache_dir / "context-pct.json").write_text(json.dumps({"pct": pct, "ts": time.time()})) class TestContextMonitorAutocompact: @@ -119,3 +111,126 @@ def test_no_context_data_no_output(self, mock_resolve, mock_throttle, mock_sid, assert result == 0 captured = capsys.readouterr() assert captured.out == "" + + + + +class TestIsThrottled: + """Tests for throttle logic based on cache freshness and context level.""" + + def test_throttle_skips_when_recent_and_low_context(self, tmp_path, monkeypatch): + """Throttle returns True when last check was < 30s ago and context below warning threshold.""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + + session_id = "test-session-123" + cache_file.write_text(json.dumps({ + "session_id": session_id, + "tokens": 100000, + "timestamp": time.time() - 5, + })) + + assert _is_throttled(session_id) is True + + def test_throttle_allows_when_high_context(self, tmp_path, monkeypatch): + """Throttle returns False when context is high (never skip near compaction).""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + + session_id = "test-session-123" + cache_file.write_text(json.dumps({ + "session_id": session_id, + "tokens": 170000, + "timestamp": time.time() - 5, + })) + + assert _is_throttled(session_id) is False + + def test_throttle_allows_when_stale_timestamp(self, tmp_path, monkeypatch): + """Throttle returns False when last check was > 30s ago.""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + + session_id = "test-session-123" + cache_file.write_text(json.dumps({ + "session_id": session_id, + "tokens": 100000, + "timestamp": time.time() - 35, + })) + + assert _is_throttled(session_id) is False + + def test_throttle_allows_when_no_cache(self, tmp_path, monkeypatch): + """Throttle returns False when no cache file exists.""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + + assert _is_throttled("test-session-123") is False + + def test_throttle_allows_when_different_session(self, tmp_path, monkeypatch): + """Throttle returns False when cache is for a different session.""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + + cache_file.write_text(json.dumps({ + "session_id": "other-session-456", + "tokens": 100000, + "timestamp": time.time() - 5, + })) + + assert _is_throttled("test-session-123") is False + + + + +class TestResolveContext: + """Tests for context resolution from statusline cache.""" + + def test_returns_none_when_statusline_cache_missing(self, tmp_path, monkeypatch): + """Returns None when no statusline cache exists (no racy fallback).""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + monkeypatch.setattr("context_monitor._read_statusline_context_pct", lambda: None) + + result = _resolve_context("test-session-123") + + assert result is None + + def test_returns_statusline_percentage(self, tmp_path, monkeypatch): + """Returns percentage from statusline cache when available.""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + monkeypatch.setattr("context_monitor._read_statusline_context_pct", lambda: 45.0) + + result = _resolve_context("test-session-123") + + assert result is not None + pct, tokens, shown_learn, shown_80 = result + assert pct == 45.0 + assert tokens == 90000 + assert shown_learn == [] + assert shown_80 is False + + def test_includes_session_flags(self, tmp_path, monkeypatch): + """Returns session flags (learn thresholds, 80% warning) from cache.""" + cache_file = tmp_path / "context_cache.json" + monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) + monkeypatch.setattr("context_monitor._read_statusline_context_pct", lambda: 85.0) + + session_id = "test-session-123" + cache_file.write_text(json.dumps({ + "session_id": session_id, + "tokens": 170000, + "timestamp": time.time() - 5, + "shown_learn": [40, 60], + "shown_80_warn": True, + })) + + result = _resolve_context(session_id) + + assert result is not None + pct, tokens, shown_learn, shown_80 = result + assert pct == 85.0 + assert tokens == 170000 + assert shown_learn == [40, 60] + assert shown_80 is True diff --git a/pilot/tests/hooks/test_file_checker.py b/pilot/hooks/tests/test_file_checker.py similarity index 51% rename from pilot/tests/hooks/test_file_checker.py rename to pilot/hooks/tests/test_file_checker.py index c4fc1e3a..c6b46797 100644 --- a/pilot/tests/hooks/test_file_checker.py +++ b/pilot/hooks/tests/test_file_checker.py @@ -2,11 +2,17 @@ from __future__ import annotations +import io import json -from pathlib import Path from unittest.mock import patch -from pilot.hooks.file_checker import main +from file_checker import main + + +def _make_stdin(tool_name: str, file_path: str) -> io.StringIO: + """Create a stdin mock with hook JSON data.""" + data = {"tool_name": tool_name, "tool_input": {"file_path": file_path}} + return io.StringIO(json.dumps(data)) def test_python_file_dispatches_to_python_checker(tmp_path): @@ -14,8 +20,8 @@ def test_python_file_dispatches_to_python_checker(tmp_path): py_file = tmp_path / "test.py" py_file.write_text("print('hello')\n") - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=py_file): - with patch("pilot.hooks.file_checker.check_python") as mock_check: + with patch("sys.stdin", _make_stdin("Edit", str(py_file))): + with patch("file_checker.check_python") as mock_check: mock_check.return_value = (0, "") result = main() @@ -28,8 +34,8 @@ def test_typescript_file_dispatches_to_typescript_checker(tmp_path): ts_file = tmp_path / "test.ts" ts_file.write_text("const x = 1;\n") - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=ts_file): - with patch("pilot.hooks.file_checker.check_typescript") as mock_check: + with patch("sys.stdin", _make_stdin("Edit", str(ts_file))): + with patch("file_checker.check_typescript") as mock_check: mock_check.return_value = (0, "") result = main() @@ -42,8 +48,8 @@ def test_go_file_dispatches_to_go_checker(tmp_path): go_file = tmp_path / "test.go" go_file.write_text("package main\n") - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=go_file): - with patch("pilot.hooks.file_checker.check_go") as mock_check: + with patch("sys.stdin", _make_stdin("Edit", str(go_file))): + with patch("file_checker.check_go") as mock_check: mock_check.return_value = (0, "") result = main() @@ -56,56 +62,57 @@ def test_unsupported_file_returns_zero(tmp_path): md_file = tmp_path / "test.md" md_file.write_text("# Heading\n") - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=md_file): + with patch("sys.stdin", _make_stdin("Edit", str(md_file))): result = main() assert result == 0 def test_nonexistent_file_returns_zero(): """Nonexistent files return 0.""" - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=Path("/nonexistent/file.py")): + with patch("sys.stdin", _make_stdin("Edit", "/nonexistent/file.py")): result = main() assert result == 0 -class TestJsonDecisionOutput: - """Test JSON decision output on stdout.""" +class TestContextOutput: + """Test additionalContext output on stdout.""" - def test_block_decision_when_issues_found(self, tmp_path, capsys): - """Should print block decision JSON when checker finds issues.""" + def test_context_output_when_issues_found(self, tmp_path, capsys): + """Should print additionalContext JSON when checker finds issues.""" py_file = tmp_path / "app.py" py_file.write_text("x = 1\n") - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=py_file): - with patch("pilot.hooks.file_checker.check_python") as mock_check: + with patch("sys.stdin", _make_stdin("Edit", str(py_file))): + with patch("file_checker.check_python") as mock_check: mock_check.return_value = (2, "Python: 3 ruff issues in app.py") main() captured = capsys.readouterr() output = json.loads(captured.out) - assert output["decision"] == "block" - assert "Python" in output["reason"] + assert "hookSpecificOutput" in output + assert output["hookSpecificOutput"]["hookEventName"] == "PostToolUse" + assert "Python" in output["hookSpecificOutput"]["additionalContext"] - def test_empty_json_when_clean(self, tmp_path, capsys): - """Should print empty JSON when checks pass.""" + def test_no_output_when_clean(self, tmp_path, capsys): + """Should print nothing when checks pass.""" py_file = tmp_path / "app.py" py_file.write_text("x = 1\n") - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=py_file): - with patch("pilot.hooks.file_checker.check_python") as mock_check: - mock_check.return_value = (2, "") - main() + with patch("sys.stdin", _make_stdin("Edit", str(py_file))): + with patch("file_checker.check_python") as mock_check: + mock_check.return_value = (0, "") + with patch("file_checker._tdd_check", return_value=""): + main() captured = capsys.readouterr() - output = json.loads(captured.out) - assert output == {} + assert captured.out == "" - def test_no_json_for_unsupported_files(self, tmp_path, capsys): - """Should not print JSON for unsupported file types.""" + def test_no_output_for_unsupported_files(self, tmp_path, capsys): + """Should not print anything for unsupported file types.""" md_file = tmp_path / "readme.md" md_file.write_text("# Hello\n") - with patch("pilot.hooks.file_checker.get_edited_file_from_stdin", return_value=md_file): + with patch("sys.stdin", _make_stdin("Edit", str(md_file))): main() captured = capsys.readouterr() diff --git a/pilot/tests/hooks/test_go_checker.py b/pilot/hooks/tests/test_go_checker.py similarity index 90% rename from pilot/tests/hooks/test_go_checker.py rename to pilot/hooks/tests/test_go_checker.py index 04d8d9e6..10970105 100644 --- a/pilot/tests/hooks/test_go_checker.py +++ b/pilot/hooks/tests/test_go_checker.py @@ -26,13 +26,13 @@ def test_vet_count_excludes_package_header_lines(self, tmp_path: Path) -> None: with ( patch("_checkers.go.strip_go_comments"), - patch("_checkers.go.check_file_length"), + patch("_checkers.go.check_file_length", return_value=""), patch("_checkers.go.shutil.which", side_effect=lambda name: f"/usr/bin/{name}" if name == "go" else None), patch("_checkers.go.subprocess.run", return_value=mock_result), ): exit_code, reason = check_go(go_file) - assert exit_code == 2 + assert exit_code == 0 assert "1 vet" in reason, f"Expected '1 vet' but got: {reason}" def test_vet_count_with_multiple_issues_and_header(self, tmp_path: Path) -> None: @@ -51,13 +51,13 @@ def test_vet_count_with_multiple_issues_and_header(self, tmp_path: Path) -> None with ( patch("_checkers.go.strip_go_comments"), - patch("_checkers.go.check_file_length"), + patch("_checkers.go.check_file_length", return_value=""), patch("_checkers.go.shutil.which", side_effect=lambda name: f"/usr/bin/{name}" if name == "go" else None), patch("_checkers.go.subprocess.run", return_value=mock_result), ): exit_code, reason = check_go(go_file) - assert exit_code == 2 + assert exit_code == 0 assert "2 vet" in reason, f"Expected '2 vet' but got: {reason}" def test_vet_header_only_output_means_no_issues(self, tmp_path: Path) -> None: @@ -72,7 +72,7 @@ def test_vet_header_only_output_means_no_issues(self, tmp_path: Path) -> None: with ( patch("_checkers.go.strip_go_comments"), - patch("_checkers.go.check_file_length"), + patch("_checkers.go.check_file_length", return_value=""), patch("_checkers.go.shutil.which", side_effect=lambda name: f"/usr/bin/{name}" if name == "go" else None), patch("_checkers.go.subprocess.run", return_value=mock_vet), ): @@ -100,7 +100,7 @@ class TestCheckGoCleanFile: """Clean files should pass.""" def test_clean_file_returns_success(self, tmp_path: Path) -> None: - """Clean Go file should return exit 2 with empty reason.""" + """Clean Go file should return exit 0 with empty reason.""" go_file = tmp_path / "main.go" go_file.write_text("package main\n") @@ -111,11 +111,11 @@ def test_clean_file_returns_success(self, tmp_path: Path) -> None: with ( patch("_checkers.go.strip_go_comments"), - patch("_checkers.go.check_file_length"), + patch("_checkers.go.check_file_length", return_value=""), patch("_checkers.go.shutil.which", side_effect=lambda name: f"/usr/bin/{name}" if name == "go" else None), patch("_checkers.go.subprocess.run", return_value=mock_result), ): exit_code, reason = check_go(go_file) - assert exit_code == 2 + assert exit_code == 0 assert reason == "" diff --git a/pilot/tests/hooks/test_python_checker.py b/pilot/hooks/tests/test_python_checker.py similarity index 95% rename from pilot/tests/hooks/test_python_checker.py rename to pilot/hooks/tests/test_python_checker.py index 13504556..a71cdb47 100644 --- a/pilot/tests/hooks/test_python_checker.py +++ b/pilot/hooks/tests/test_python_checker.py @@ -141,7 +141,7 @@ def test_no_tools_returns_zero(self, tmp_path: Path) -> None: with ( patch("_checkers.python.strip_python_comments"), - patch("_checkers.python.check_file_length"), + patch("_checkers.python.check_file_length", return_value=""), patch("_checkers.python.shutil.which", return_value=None), ): exit_code, reason = check_python(py_file) @@ -180,13 +180,13 @@ def which_side_effect(name): with ( patch("_checkers.python.strip_python_comments"), - patch("_checkers.python.check_file_length"), + patch("_checkers.python.check_file_length", return_value=""), patch("_checkers.python.shutil.which", side_effect=which_side_effect), patch("_checkers.python.subprocess.run", side_effect=run_side_effect), ): exit_code, reason = check_python(py_file) - assert exit_code == 2 + assert exit_code == 0 assert "2 ruff" in reason def test_ruff_clean_output_no_issues(self, tmp_path: Path) -> None: @@ -203,13 +203,13 @@ def which_side_effect(name): with ( patch("_checkers.python.strip_python_comments"), - patch("_checkers.python.check_file_length"), + patch("_checkers.python.check_file_length", return_value=""), patch("_checkers.python.shutil.which", side_effect=which_side_effect), patch("_checkers.python.subprocess.run", return_value=mock_result), ): exit_code, reason = check_python(py_file) - assert exit_code == 2 + assert exit_code == 0 assert reason == "" @@ -233,7 +233,7 @@ def which_side_effect(name): with ( patch("_checkers.python.strip_python_comments"), - patch("_checkers.python.check_file_length"), + patch("_checkers.python.check_file_length", return_value=""), patch("_checkers.python.shutil.which", side_effect=which_side_effect), patch("_checkers.python.subprocess.run", side_effect=run_side_effect), ): diff --git a/pilot/hooks/tests/test_session_end.py b/pilot/hooks/tests/test_session_end.py index f637e32a..ca82d452 100644 --- a/pilot/hooks/tests/test_session_end.py +++ b/pilot/hooks/tests/test_session_end.py @@ -1,62 +1,57 @@ -"""Tests for session_end hook.""" +"""Tests for session_end hook — worker stop behavior.""" from __future__ import annotations -import sys -from pathlib import Path +import os from unittest.mock import MagicMock, patch -sys.path.insert(0, str(Path(__file__).parent.parent)) -from session_end import main +import session_end -class TestSessionEndWorkerStop: - @patch("session_end._get_active_session_count") - @patch("session_end.subprocess.run") - @patch("os.environ", {"CLAUDE_PLUGIN_ROOT": "/plugin"}) - def test_stops_worker_on_clean_session_end( - self, - mock_subprocess, - mock_count, - ): - """Should stop worker when session count <= 1.""" - mock_count.return_value = 1 - mock_subprocess.return_value = MagicMock(returncode=0) - - result = main() - - assert result == 0 - mock_subprocess.assert_called_once() - - @patch("session_end._get_active_session_count") - @patch("session_end.subprocess.run") - @patch("os.environ", {"CLAUDE_PLUGIN_ROOT": "/plugin"}) - def test_stops_worker_when_zero_sessions( - self, - mock_subprocess, - mock_count, +def test_returns_early_without_plugin_root(): + """Should return 0 when CLAUDE_PLUGIN_ROOT is not set.""" + with patch.dict(os.environ, {}, clear=True): + os.environ.pop("CLAUDE_PLUGIN_ROOT", None) + result = session_end.main() + + assert result == 0 + + +def test_skips_stop_when_other_sessions_active(): + """Should skip worker stop when other Pilot sessions are running.""" + with ( + patch.dict(os.environ, {"CLAUDE_PLUGIN_ROOT": "/fake/plugin"}), + patch("session_end._get_active_session_count", return_value=2), + patch("session_end.subprocess.run") as mock_run, ): - """Should stop worker when count is 0 (current session already gone).""" - mock_count.return_value = 0 - mock_subprocess.return_value = MagicMock(returncode=0) + result = session_end.main() - result = main() + assert result == 0 + mock_run.assert_not_called() - assert result == 0 - mock_subprocess.assert_called_once() - @patch("session_end._get_active_session_count") - @patch("os.environ", {"CLAUDE_PLUGIN_ROOT": "/plugin"}) - def test_skips_stop_when_other_sessions_running(self, mock_count): - """Should skip worker stop when other sessions are still running.""" - mock_count.return_value = 2 +def test_stops_worker_when_no_other_sessions(): + """Should stop worker when this is the only active session.""" + with ( + patch.dict(os.environ, {"CLAUDE_PLUGIN_ROOT": "/fake/plugin"}), + patch("session_end._get_active_session_count", return_value=1), + patch("session_end.subprocess.run", return_value=MagicMock(returncode=0)) as mock_run, + ): + result = session_end.main() + + assert result == 0 + mock_run.assert_called_once() + assert "stop" in str(mock_run.call_args) - result = main() - assert result == 0 +def test_stops_worker_when_zero_sessions(): + """Should stop worker and return 0 when no sessions active.""" + with ( + patch.dict(os.environ, {"CLAUDE_PLUGIN_ROOT": "/fake/plugin"}), + patch("session_end._get_active_session_count", return_value=0), + patch("session_end.subprocess.run", return_value=MagicMock(returncode=0)) as mock_run, + ): + result = session_end.main() - @patch("os.environ", {}) - def test_returns_0_when_no_plugin_root(self): - """Should return 0 when CLAUDE_PLUGIN_ROOT is not set.""" - result = main() - assert result == 0 + assert result == 0 + mock_run.assert_called_once() diff --git a/pilot/tests/hooks/test_spec_validators.py b/pilot/hooks/tests/test_spec_validators.py similarity index 93% rename from pilot/tests/hooks/test_spec_validators.py rename to pilot/hooks/tests/test_spec_validators.py index 224689a6..0ffd86f3 100644 --- a/pilot/tests/hooks/test_spec_validators.py +++ b/pilot/hooks/tests/test_spec_validators.py @@ -34,7 +34,7 @@ def test_allows_stop_when_plan_created(self): assert result.returncode == 0, f"Should allow stop when plan exists. stderr: {result.stderr}" def test_blocks_stop_when_no_plan(self): - """Should block stop when no plan file exists.""" + """Should output block decision when no plan file exists.""" with tempfile.TemporaryDirectory() as tmpdir: result = subprocess.run( ["uv", "run", "python", "pilot/hooks/spec_plan_validator.py"], @@ -44,8 +44,8 @@ def test_blocks_stop_when_no_plan(self): cwd=PROJECT_ROOT, ) - assert result.returncode == 2, f"Should block stop when plan missing. stderr: {result.stderr}" - assert "Plan file not created yet" in result.stderr + assert result.returncode == 0, f"Unexpected return code. stderr: {result.stderr}" + assert "Plan file not created yet" in result.stdout def test_escape_hatch_allows_stop(self): """Should allow stop when stop_hook_active is true (escape hatch).""" @@ -60,7 +60,6 @@ def test_escape_hatch_allows_stop(self): assert result.returncode == 0, f"Escape hatch should allow stop. stderr: {result.stderr}" - def test_allows_stop_when_asking_user_question(self): """Should allow stop when AskUserQuestion was the last tool.""" with tempfile.TemporaryDirectory() as tmpdir: @@ -130,7 +129,7 @@ def test_allows_stop_when_status_changed(self): active_plan_json.unlink(missing_ok=True) def test_blocks_stop_when_status_complete(self): - """Should block stop when plan status is still COMPLETE.""" + """Should output block decision when plan status is still COMPLETE.""" with tempfile.TemporaryDirectory() as tmpdir: plan_path = Path(tmpdir) / "docs" / "plans" / "2026-02-11-test.md" plan_path.parent.mkdir(parents=True, exist_ok=True) @@ -139,8 +138,8 @@ def test_blocks_stop_when_status_complete(self): active_plan_json = self._setup_active_plan(plan_path) try: result = self._run_validator({"project_root": tmpdir, "stop_hook_active": False}) - assert result.returncode == 2, f"Should block stop when COMPLETE. stderr: {result.stderr}" - assert "status was not updated" in result.stderr.lower() + assert result.returncode == 0, f"Unexpected return code. stderr: {result.stderr}" + assert "status was not updated" in result.stdout.lower() finally: active_plan_json.unlink(missing_ok=True) diff --git a/pilot/hooks/tests/test_tdd_enforcer.py b/pilot/hooks/tests/test_tdd_enforcer.py index ead53aa8..d47eaca5 100644 --- a/pilot/hooks/tests/test_tdd_enforcer.py +++ b/pilot/hooks/tests/test_tdd_enforcer.py @@ -3,11 +3,9 @@ from __future__ import annotations import json -import sys import tempfile from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent)) from tdd_enforcer import ( _find_test_dirs, _pascal_to_kebab, @@ -34,6 +32,9 @@ def test_skips_excluded_dir(self): assert should_skip("/project/dist/bundle.js") is True assert should_skip("/project/__pycache__/mod.pyc") is True + def test_skips_migrations(self): + assert should_skip("src/migrations/001_init.py") is True + def test_does_not_skip_source_files(self): assert should_skip("/project/src/app.py") is False assert should_skip("/project/src/app.ts") is False diff --git a/pilot/hooks/tests/test_tool_redirect.py b/pilot/hooks/tests/test_tool_redirect.py index fa7db20f..331927cc 100644 --- a/pilot/hooks/tests/test_tool_redirect.py +++ b/pilot/hooks/tests/test_tool_redirect.py @@ -1,30 +1,34 @@ -"""Tests for tool_redirect hook.""" +"""Tests for tool_redirect hook — blocks broken tools, hints at alternatives.""" from __future__ import annotations import json -import sys -from pathlib import Path +from io import StringIO from unittest.mock import patch -sys.path.insert(0, str(Path(__file__).parent.parent)) +import pytest + from tool_redirect import block, hint, is_semantic_pattern, run_tool_redirect class TestBlock: - def test_returns_0_and_outputs_deny_json(self, capsys): + """Tests for block() output format.""" + + def test_returns_2_and_outputs_deny_json(self, capsys): info = {"message": "Tool blocked", "alternative": "Use X instead", "example": "X foo"} result = block(info) - assert result == 0 + assert result == 2 captured = capsys.readouterr() data = json.loads(captured.out) assert data["permissionDecision"] == "deny" assert "Tool blocked" in data["reason"] assert "Use X instead" in data["reason"] - assert captured.err == "" + assert "Tool blocked" in captured.err class TestHint: + """Tests for hint() output format.""" + def test_returns_0_and_outputs_additional_context(self, capsys): info = {"message": "Better alternative exists", "alternative": "Use Y", "example": "Y bar"} result = hint(info) @@ -37,96 +41,153 @@ def test_returns_0_and_outputs_additional_context(self, capsys): assert captured.err == "" -class TestRunToolRedirect: - @patch("sys.stdin") - def test_websearch_blocked(self, mock_stdin, capsys): - mock_stdin.__enter__ = lambda s: s - mock_stdin.__exit__ = lambda s, *a: None - mock_stdin.read = lambda: json.dumps({"tool_name": "WebSearch", "tool_input": {"query": "test"}}) +class TestIsSemanticPattern: + """Tests for semantic vs code pattern detection.""" + + @pytest.mark.parametrize( + "pattern", + [ + "where is config loaded", + "how does authentication work", + "find the database connection", + "what are the API endpoints", + "looking for error handling", + "locate all test fixtures", + ], + ) + def test_detects_semantic_patterns(self, pattern: str): + assert is_semantic_pattern(pattern) is True + + @pytest.mark.parametrize( + "pattern", + [ + "def save_config", + "class Handler", + "import os", + "from pathlib import Path", + "const API_URL =", + "function handleClick(", + "interface UserProps", + "x == y", + "result != None", + ], + ) + def test_rejects_code_patterns(self, pattern: str): + assert is_semantic_pattern(pattern) is False + + def test_empty_string_not_semantic(self): + assert is_semantic_pattern("") is False + + def test_plain_word_not_semantic(self): + assert is_semantic_pattern("config") is False + + +def _run_with_input(tool_name: str, tool_input: dict | None = None) -> int: + """Simulate hook invocation with the given tool name and optional input.""" + hook_data = {"tool_name": tool_name} + if tool_input is not None: + hook_data["tool_input"] = tool_input + stdin = StringIO(json.dumps(hook_data)) + with patch("sys.stdin", stdin): + return run_tool_redirect() + + +class TestBlockedTools: + """Tests for tools that should be blocked (exit code 2).""" + + def test_blocks_web_search(self): + result = _run_with_input("WebSearch", {"query": "python tutorial"}) + assert result == 2 + + def test_blocks_web_fetch(self): + result = _run_with_input("WebFetch", {"url": "https://example.com"}) + assert result == 2 + + def test_blocks_enter_plan_mode(self): + result = _run_with_input("EnterPlanMode") + assert result == 2 + + def test_blocks_exit_plan_mode(self): + result = _run_with_input("ExitPlanMode") + assert result == 2 + + +class TestHintedTools: + """Tests for tools that get hints but are allowed (exit code 0).""" + + def test_hints_grep_with_semantic_pattern(self): + result = _run_with_input("Grep", {"pattern": "where is config loaded"}) + assert result == 0 - with patch("tool_redirect.json.load", return_value={"tool_name": "WebSearch", "tool_input": {"query": "test"}}): - result = run_tool_redirect() + def test_no_hint_grep_with_code_pattern(self): + result = _run_with_input("Grep", {"pattern": "def save_config"}) + assert result == 0 + def test_hints_task_explore(self): + result = _run_with_input("Task", {"subagent_type": "Explore"}) assert result == 0 - captured = capsys.readouterr() - data = json.loads(captured.out) - assert data["permissionDecision"] == "deny" - @patch("sys.stdin") - def test_webfetch_blocked(self, mock_stdin, capsys): - with patch( - "tool_redirect.json.load", return_value={"tool_name": "WebFetch", "tool_input": {"url": "http://x"}} - ): - result = run_tool_redirect() + def test_hints_task_generic_subagent(self): + """Non-allowed subagent types get a hint.""" + result = _run_with_input("Task", {"subagent_type": "general-purpose"}) + assert result == 0 + def test_no_hint_task_spec_reviewer(self): + """Spec reviewer sub-agents should be allowed without hints.""" + result = _run_with_input("Task", {"subagent_type": "pilot:spec-reviewer-compliance"}) assert result == 0 - captured = capsys.readouterr() - data = json.loads(captured.out) - assert data["permissionDecision"] == "deny" - @patch("sys.stdin") - def test_enter_plan_mode_blocked(self, mock_stdin, capsys): - with patch("tool_redirect.json.load", return_value={"tool_name": "EnterPlanMode", "tool_input": {}}): - result = run_tool_redirect() + def test_no_hint_task_plan_verifier(self): + result = _run_with_input("Task", {"subagent_type": "pilot:plan-verifier"}) + assert result == 0 + def test_no_hint_task_plan_challenger(self): + result = _run_with_input("Task", {"subagent_type": "pilot:plan-challenger"}) assert result == 0 - captured = capsys.readouterr() - data = json.loads(captured.out) - assert data["permissionDecision"] == "deny" - @patch("sys.stdin") - def test_explore_hinted(self, mock_stdin, capsys): - with patch( - "tool_redirect.json.load", return_value={"tool_name": "Task", "tool_input": {"subagent_type": "Explore"}} - ): - result = run_tool_redirect() - assert result == 0 - captured = capsys.readouterr() - data = json.loads(captured.out) - assert "hookSpecificOutput" in data - assert "vexor" in data["hookSpecificOutput"]["additionalContext"].lower() +class TestAllowedTools: + """Tests for tools that should pass through without blocks or hints.""" - @patch("sys.stdin") - def test_allowed_tool_no_output(self, mock_stdin, capsys): - with patch("tool_redirect.json.load", return_value={"tool_name": "Read", "tool_input": {"file_path": "/x"}}): - result = run_tool_redirect() + def test_allows_read(self): + assert _run_with_input("Read", {"file_path": "/foo.py"}) == 0 - assert result == 0 - captured = capsys.readouterr() - assert captured.out == "" - - @patch("sys.stdin") - def test_grep_semantic_pattern_hinted(self, mock_stdin, capsys): - with patch( - "tool_redirect.json.load", - return_value={"tool_name": "Grep", "tool_input": {"pattern": "where is config loaded"}}, - ): - result = run_tool_redirect() + def test_allows_write(self): + assert _run_with_input("Write", {"file_path": "/foo.py"}) == 0 - assert result == 0 - captured = capsys.readouterr() - data = json.loads(captured.out) - assert "hookSpecificOutput" in data + def test_allows_edit(self): + assert _run_with_input("Edit", {"file_path": "/foo.py"}) == 0 + + def test_allows_bash(self): + assert _run_with_input("Bash", {"command": "ls"}) == 0 - @patch("sys.stdin") - def test_grep_code_pattern_no_hint(self, mock_stdin, capsys): - with patch( - "tool_redirect.json.load", return_value={"tool_name": "Grep", "tool_input": {"pattern": "def save_config"}} - ): + def test_allows_task_create(self): + assert _run_with_input("TaskCreate", {"subject": "test"}) == 0 + + +class TestEdgeCases: + """Tests for malformed input and edge cases.""" + + def test_handles_invalid_json(self): + stdin = StringIO("not json") + with patch("sys.stdin", stdin): result = run_tool_redirect() + assert result == 0 + def test_handles_empty_stdin(self): + stdin = StringIO("") + with patch("sys.stdin", stdin): + result = run_tool_redirect() assert result == 0 - captured = capsys.readouterr() - assert captured.out == "" + def test_handles_missing_tool_name(self): + stdin = StringIO(json.dumps({"tool_input": {}})) + with patch("sys.stdin", stdin): + result = run_tool_redirect() + assert result == 0 -class TestIsSemanticPattern: - def test_natural_language_detected(self): - assert is_semantic_pattern("where is the config loaded") is True - assert is_semantic_pattern("how does authentication work") is True - - def test_code_pattern_not_detected(self): - assert is_semantic_pattern("def save_config") is False - assert is_semantic_pattern("class Handler") is False - assert is_semantic_pattern("import os") is False + def test_handles_non_dict_tool_input(self): + stdin = StringIO(json.dumps({"tool_name": "Grep", "tool_input": "not a dict"})) + with patch("sys.stdin", stdin): + result = run_tool_redirect() + assert result == 0 diff --git a/pilot/tests/hooks/test_typescript_checker.py b/pilot/hooks/tests/test_typescript_checker.py similarity index 96% rename from pilot/tests/hooks/test_typescript_checker.py rename to pilot/hooks/tests/test_typescript_checker.py index c85c3691..c0b2275e 100644 --- a/pilot/tests/hooks/test_typescript_checker.py +++ b/pilot/hooks/tests/test_typescript_checker.py @@ -214,7 +214,7 @@ def test_no_tools_returns_zero(self, tmp_path: Path) -> None: with ( patch("_checkers.typescript.strip_typescript_comments"), - patch("_checkers.typescript.check_file_length"), + patch("_checkers.typescript.check_file_length", return_value=""), patch("_checkers.typescript.find_project_root", return_value=None), patch("_checkers.typescript.find_tool", return_value=None), ): @@ -253,14 +253,14 @@ def run_side_effect(cmd, **kwargs): with ( patch("_checkers.typescript.strip_typescript_comments"), - patch("_checkers.typescript.check_file_length"), + patch("_checkers.typescript.check_file_length", return_value=""), patch("_checkers.typescript.find_project_root", return_value=None), patch("_checkers.typescript.find_tool", side_effect=lambda name, _: f"/usr/bin/{name}" if name in ("prettier", "eslint") else None), patch("_checkers.typescript.subprocess.run", side_effect=run_side_effect), ): exit_code, reason = check_typescript(ts_file) - assert exit_code == 2 + assert exit_code == 0 assert "3 eslint" in reason @@ -268,7 +268,7 @@ class TestCheckTypescriptCleanFile: """Clean files should pass.""" def test_clean_file_returns_success(self, tmp_path: Path) -> None: - """Clean TS file returns exit 2 with empty reason.""" + """Clean TS file returns exit 0 with empty reason.""" ts_file = tmp_path / "app.ts" ts_file.write_text("const x = 1;\n") @@ -284,14 +284,14 @@ def run_side_effect(cmd, **_kwargs): with ( patch("_checkers.typescript.strip_typescript_comments"), - patch("_checkers.typescript.check_file_length"), + patch("_checkers.typescript.check_file_length", return_value=""), patch("_checkers.typescript.find_project_root", return_value=None), patch("_checkers.typescript.find_tool", side_effect=lambda name, _: f"/usr/bin/{name}" if name in ("prettier", "eslint") else None), patch("_checkers.typescript.subprocess.run", side_effect=run_side_effect), ): exit_code, reason = check_typescript(ts_file) - assert exit_code == 2 + assert exit_code == 0 assert reason == "" @@ -317,7 +317,7 @@ def run_side_effect(cmd, **_kwargs): with ( patch("_checkers.typescript.strip_typescript_comments"), - patch("_checkers.typescript.check_file_length"), + patch("_checkers.typescript.check_file_length", return_value=""), patch("_checkers.typescript.find_project_root", return_value=None), patch("_checkers.typescript.find_tool", side_effect=lambda name, _: f"/usr/bin/{name}"), patch("_checkers.typescript.subprocess.run", side_effect=run_side_effect), diff --git a/pilot/hooks/tool_redirect.py b/pilot/hooks/tool_redirect.py index b6ac0813..ad15fe6d 100755 --- a/pilot/hooks/tool_redirect.py +++ b/pilot/hooks/tool_redirect.py @@ -116,14 +116,14 @@ def is_semantic_pattern(pattern: str) -> bool: "example": 'mcp-cli call plugin_pilot_web-fetch/fetch_url \'{"url": "..."}\'', }, "EnterPlanMode": { - "message": "EnterPlanMode is blocked (project uses /spec workflow)", - "alternative": "Use Skill(skill='spec') for dispatch, or invoke phases directly: spec-plan, spec-implement, spec-verify", - "example": "Skill(skill='spec', args='task description') or Skill(skill='spec-plan', args='task description')", + "message": "BLOCKED: EnterPlanMode is FORBIDDEN. Plan mode is completely disabled in this project.", + "alternative": "Do NOT use plan mode under any circumstances. Use /spec for structured planning, or execute directly for simple tasks", + "example": "Skill(skill='spec', args='task description')", }, "ExitPlanMode": { - "message": "ExitPlanMode is blocked (project uses /spec workflow)", - "alternative": "Use AskUserQuestion for plan approval, then Skill(skill='spec-implement', args='plan-path')", - "example": "AskUserQuestion to confirm plan, then Skill(skill='spec-implement', args='plan-path')", + "message": "BLOCKED: ExitPlanMode is FORBIDDEN. Plan mode is completely disabled in this project.", + "alternative": "Do NOT use plan mode under any circumstances. Use /spec for structured planning, or execute directly for simple tasks", + "example": "Skill(skill='spec', args='task description')", }, "Task": { "message": "Task(subagent_type='Plan') is blocked (project uses /spec workflow)", @@ -144,11 +144,12 @@ def _format_example(redirect_info: dict, pattern: str | None = None) -> str: def block(redirect_info: dict, pattern: str | None = None) -> int: - """Output deny JSON to stdout and return 0.""" + """Output deny JSON to stdout and return 2 (non-zero reinforces block).""" example = _format_example(redirect_info, pattern) reason = f"{redirect_info['message']}\n-> {redirect_info['alternative']}\nExample: {example}" + sys.stderr.write(f"\033[0;31m[Pilot] {redirect_info['message']}\033[0m\n") print(pre_tool_use_deny(reason)) - return 0 + return 2 def hint(redirect_info: dict, pattern: str | None = None) -> int: diff --git a/pilot/rules/playwright-cli.md b/pilot/rules/playwright-cli.md index dd4e761c..4e3ea24f 100644 --- a/pilot/rules/playwright-cli.md +++ b/pilot/rules/playwright-cli.md @@ -2,15 +2,37 @@ **MANDATORY for E2E testing of any app with a UI.** API tests verify backend; playwright-cli verifies what the user sees. +### Session Isolation (Parallel Workflows) + +**⛔ MANDATORY when running inside `/spec` or any parallel workflow.** Without session isolation, parallel agents share the default browser instance and overwrite each other's state (wrong pages, failed snapshots, broken interactions). + +**Use `-s=$PILOT_SESSION_ID` on ALL `playwright-cli` commands:** + +```bash +# Resolve session name once at the start of E2E testing +PW_SESSION="${PILOT_SESSION_ID:-default}" + +# All subsequent commands use -s= +playwright-cli -s="$PW_SESSION" open +playwright-cli -s="$PW_SESSION" snapshot +playwright-cli -s="$PW_SESSION" click e1 +playwright-cli -s="$PW_SESSION" close # Always close when done +``` + +**Why:** Each `/spec` session gets a unique `PILOT_SESSION_ID`. The `-s=` flag gives each session its own isolated browser instance, preventing parallel workflows from interfering with each other. + +**⛔ NEVER use bare `playwright-cli` commands (without `-s=`) during `/spec` workflows.** This causes cross-session interference that is extremely difficult to debug. + ### Core Workflow ```bash -playwright-cli open # 1. Open browser -playwright-cli snapshot # 2. Get elements with refs (e1, e2, ...) -playwright-cli fill e1 "text" # 3. Interact using refs -playwright-cli click e2 -playwright-cli snapshot # 4. Re-snapshot to verify result -playwright-cli close # 5. Clean up +PW_SESSION="${PILOT_SESSION_ID:-default}" +playwright-cli -s="$PW_SESSION" open # 1. Open browser +playwright-cli -s="$PW_SESSION" snapshot # 2. Get elements with refs (e1, e2, ...) +playwright-cli -s="$PW_SESSION" fill e1 "text" # 3. Interact using refs +playwright-cli -s="$PW_SESSION" click e2 +playwright-cli -s="$PW_SESSION" snapshot # 4. Re-snapshot to verify result +playwright-cli -s="$PW_SESSION" close # 5. Clean up ``` ### Command Reference @@ -60,6 +82,8 @@ playwright-cli list # List sessions playwright-cli close-all # Close all ``` +**In `/spec` workflows**, the session name is always `$PILOT_SESSION_ID` — never invent custom names. This ensures each parallel spec run is fully isolated. + ### E2E Checklist - [ ] User can complete the main workflow diff --git a/pilot/rules/task-and-workflow.md b/pilot/rules/task-and-workflow.md index 5a0e254d..e196a062 100644 --- a/pilot/rules/task-and-workflow.md +++ b/pilot/rules/task-and-workflow.md @@ -1,5 +1,11 @@ # Task and Workflow Rules +## ⛔ NEVER Use Built-in Plan Mode + +**`EnterPlanMode` and `ExitPlanMode` are BLOCKED. Do NOT call them under any circumstances.** They are intercepted by hooks and will fail. This project uses `/spec` for structured planning instead. + +--- + ## Task Complexity Triage **Default mode is quick mode (direct execution).** `/spec` is ONLY used when the user explicitly types `/spec`. @@ -64,12 +70,14 @@ When resuming same session (same `CLAUDE_CODE_TASK_LIST_ID`): run `TaskList` fir The Task tool spawns verification sub-agents at two points: -| Phase | Agents (parallel, both `run_in_background=true`) | `subagent_type` | -|-------|--------------------------------------------------|-----------------| +| Phase | Agents (parallel, run in background) | `subagent_type` | +|-------|--------------------------------------|-----------------| | `spec-plan` Step 1.7 | plan-verifier + plan-challenger | `pilot:plan-verifier` + `pilot:plan-challenger` | | `spec-verify` Step 3.0, 3.5 | spec-reviewer-compliance + spec-reviewer-quality | `pilot:spec-reviewer-compliance` + `pilot:spec-reviewer-quality` | -**Launch with TWO `Task()` calls in a SINGLE message, both with `run_in_background=true`.** Without it, they run sequentially. +All verification agents have `background: true` in their agent definitions, so they run in the background automatically. **As a fallback**, also pass `run_in_background=true` in the Task() call. + +**Launch with TWO `Task()` calls in a SINGLE message.** If sent in separate messages, the first blocks and the second waits. **⛔ NEVER skip verification. ⛔ NEVER use `TaskOutput` to retrieve results** (dumps full transcript, wastes tokens). Agents write findings to JSON files — poll with Read tool, `sleep 10` between attempts. @@ -79,10 +87,6 @@ The Task tool spawns verification sub-agents at two points: Use `run_in_background=true` only for long-running processes (dev servers, watchers). Prefer synchronous for tests, linting, git, installs. -### No Built-in Plan Mode - -**NEVER use `EnterPlanMode` or `ExitPlanMode`.** Use `/spec` instead. - --- ## Deviation Handling During Implementation diff --git a/pilot/rules/testing.md b/pilot/rules/testing.md index dc651b1c..6e8b949b 100644 --- a/pilot/rules/testing.md +++ b/pilot/rules/testing.md @@ -61,6 +61,8 @@ Mock at module level (where imported, not where defined). Test > 1s = likely unm Use `playwright-cli` to verify what the user sees. See `playwright-cli.md` for commands. +**⛔ Session isolation:** In `/spec` workflows or any parallel context, ALWAYS use `-s="${PILOT_SESSION_ID:-default}"` on all `playwright-cli` commands to prevent cross-session browser interference. See `playwright-cli.md` Session Isolation section. + ### Anti-Patterns - **Dependent tests** — each test must work independently diff --git a/pilot/rules/verification.md b/pilot/rules/verification.md index 7367f77d..8a36c220 100644 --- a/pilot/rules/verification.md +++ b/pilot/rules/verification.md @@ -6,7 +6,7 @@ Unit tests with mocks prove nothing about real-world behavior. After tests pass: -- CLI command → **run it** | API endpoint → **call it** | Frontend UI → **open with playwright-cli** +- CLI command → **run it** | API endpoint → **call it** | Frontend UI → **open with `playwright-cli -s="${PILOT_SESSION_ID:-default}"`** (session isolation — see `playwright-cli.md`) - Any runnable program → **run it** **When:** After tests pass, after refactoring, after changing imports/deps/config, before marking any task complete. diff --git a/pilot/tests/hooks/conftest.py b/pilot/tests/hooks/conftest.py deleted file mode 100644 index 427388b4..00000000 --- a/pilot/tests/hooks/conftest.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Configure sys.path so _checkers and _util are importable in tests.""" - -import sys -from pathlib import Path - -_hooks_dir = str(Path(__file__).resolve().parents[2] / "hooks") -if _hooks_dir not in sys.path: - sys.path.insert(0, _hooks_dir) diff --git a/pilot/tests/hooks/test_context_monitor.py b/pilot/tests/hooks/test_context_monitor.py deleted file mode 100644 index 82ecd816..00000000 --- a/pilot/tests/hooks/test_context_monitor.py +++ /dev/null @@ -1,127 +0,0 @@ -"""Tests for context_monitor throttling and context resolution.""" - -import json -import time - -from context_monitor import _is_throttled, _resolve_context - - -def test_throttle_skips_when_recent_and_low_context(tmp_path, monkeypatch): - """Throttle returns True when last check was < 30s ago and context below warning threshold.""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - - session_id = "test-session-123" - cache_file.write_text(json.dumps({ - "session_id": session_id, - "tokens": 100000, - "timestamp": time.time() - 5, - })) - - assert _is_throttled(session_id) is True - - -def test_throttle_allows_when_high_context(tmp_path, monkeypatch): - """Throttle returns False when context is high (never skip near compaction).""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - - session_id = "test-session-123" - cache_file.write_text(json.dumps({ - "session_id": session_id, - "tokens": 170000, - "timestamp": time.time() - 5, - })) - - assert _is_throttled(session_id) is False - - -def test_throttle_allows_when_stale_timestamp(tmp_path, monkeypatch): - """Throttle returns False when last check was > 30s ago.""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - - session_id = "test-session-123" - cache_file.write_text(json.dumps({ - "session_id": session_id, - "tokens": 100000, - "timestamp": time.time() - 35, - })) - - assert _is_throttled(session_id) is False - - -def test_throttle_allows_when_no_cache(tmp_path, monkeypatch): - """Throttle returns False when no cache file exists.""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - - assert _is_throttled("test-session-123") is False - - -def test_throttle_allows_when_different_session(tmp_path, monkeypatch): - """Throttle returns False when cache is for a different session.""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - - cache_file.write_text(json.dumps({ - "session_id": "other-session-456", - "tokens": 100000, - "timestamp": time.time() - 5, - })) - - assert _is_throttled("test-session-123") is False - - - - -def test_resolve_context_returns_none_when_statusline_cache_missing(tmp_path, monkeypatch): - """Returns None when no statusline cache exists (no racy fallback).""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - monkeypatch.setattr("context_monitor._read_statusline_context_pct", lambda: None) - - result = _resolve_context("test-session-123") - - assert result is None - - -def test_resolve_context_returns_statusline_percentage(tmp_path, monkeypatch): - """Returns percentage from statusline cache when available.""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - monkeypatch.setattr("context_monitor._read_statusline_context_pct", lambda: 45.0) - - result = _resolve_context("test-session-123") - - assert result is not None - pct, tokens, shown_learn, shown_80 = result - assert pct == 45.0 - assert tokens == 90000 - assert shown_learn == [] - assert shown_80 is False - - -def test_resolve_context_includes_session_flags(tmp_path, monkeypatch): - """Returns session flags (learn thresholds, 80% warning) from cache.""" - cache_file = tmp_path / "context_cache.json" - monkeypatch.setattr("context_monitor.get_session_cache_path", lambda: cache_file) - monkeypatch.setattr("context_monitor._read_statusline_context_pct", lambda: 85.0) - - session_id = "test-session-123" - cache_file.write_text(json.dumps({ - "session_id": session_id, - "tokens": 170000, - "timestamp": time.time() - 5, - "shown_learn": [40, 60], - "shown_80_warn": True, - })) - - result = _resolve_context(session_id) - - assert result is not None - pct, tokens, shown_learn, shown_80 = result - assert pct == 85.0 - assert tokens == 170000 - assert shown_learn == [40, 60] - assert shown_80 is True diff --git a/pilot/tests/hooks/test_session_end.py b/pilot/tests/hooks/test_session_end.py deleted file mode 100644 index e9e7e973..00000000 --- a/pilot/tests/hooks/test_session_end.py +++ /dev/null @@ -1,94 +0,0 @@ -"""Tests for session_end hook — worker stop and notification behavior.""" - -from __future__ import annotations - -import json -import os -from unittest.mock import MagicMock, patch - -import pytest - -import session_end - - -@pytest.mark.unit -def test_returns_early_without_plugin_root(): - """Should return 0 when CLAUDE_PLUGIN_ROOT is not set.""" - with patch.dict(os.environ, {}, clear=True): - os.environ.pop("CLAUDE_PLUGIN_ROOT", None) - result = session_end.main() - - assert result == 0 - - -@pytest.mark.unit -def test_skips_stop_when_other_sessions_active(): - """Should skip worker stop when other Pilot sessions are running.""" - with ( - patch.dict(os.environ, {"CLAUDE_PLUGIN_ROOT": "/fake/plugin"}), - patch("session_end._get_active_session_count", return_value=2), - patch("session_end.subprocess.run") as mock_run, - ): - result = session_end.main() - - assert result == 0 - mock_run.assert_not_called() - - -@pytest.mark.unit -def test_stops_worker_when_no_other_sessions(tmp_path): - """Should stop worker when this is the only active session.""" - session_dir = tmp_path / "sessions" / "test-session" - session_dir.mkdir(parents=True) - - with ( - patch.dict(os.environ, {"CLAUDE_PLUGIN_ROOT": "/fake/plugin", "PILOT_SESSION_ID": "test-session"}), - patch("session_end._get_active_session_count", return_value=1), - patch("session_end._sessions_base", return_value=tmp_path / "sessions"), - patch("session_end.subprocess.run", return_value=MagicMock(returncode=0)) as mock_run, - patch("session_end.send_dashboard_notification"), - ): - result = session_end.main() - - assert result == 0 - mock_run.assert_called_once() - assert "stop" in str(mock_run.call_args) - - -@pytest.mark.unit -def test_returns_0_when_zero_sessions(tmp_path): - """Should stop worker and return 0 when no sessions active.""" - session_dir = tmp_path / "sessions" / "test-session" - session_dir.mkdir(parents=True) - - with ( - patch.dict(os.environ, {"CLAUDE_PLUGIN_ROOT": "/fake/plugin", "PILOT_SESSION_ID": "test-session"}), - patch("session_end._get_active_session_count", return_value=0), - patch("session_end._sessions_base", return_value=tmp_path / "sessions"), - patch("session_end.subprocess.run", return_value=MagicMock(returncode=0)), - patch("session_end.send_dashboard_notification"), - ): - result = session_end.main() - - assert result == 0 - - -@pytest.mark.unit -def test_returns_0_when_plan_pending(tmp_path): - """Should return 0 when plan is PENDING (not verified).""" - session_dir = tmp_path / "sessions" / "test-session" - session_dir.mkdir(parents=True) - plan_data = {"plan_path": "/fake/plan.md", "status": "PENDING"} - (session_dir / "active_plan.json").write_text(json.dumps(plan_data)) - - with ( - patch.dict(os.environ, {"CLAUDE_PLUGIN_ROOT": "/fake/plugin", "PILOT_SESSION_ID": "test-session"}), - patch("session_end._get_active_session_count", return_value=0), - patch("session_end._sessions_base", return_value=tmp_path / "sessions"), - patch("session_end.subprocess.run", return_value=MagicMock(returncode=0)), - patch("session_end.send_dashboard_notification") as mock_notify, - ): - result = session_end.main() - - assert result == 0 - mock_notify.assert_called_once_with("attention_needed", "Session Ended", "Claude session ended") diff --git a/pilot/tests/hooks/test_tdd_enforcer.py b/pilot/tests/hooks/test_tdd_enforcer.py deleted file mode 100644 index 9c5a4b53..00000000 --- a/pilot/tests/hooks/test_tdd_enforcer.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for TDD enforcer hook.""" - -from __future__ import annotations - -import json -from pathlib import Path - -import pytest - -from pilot.hooks.tdd_enforcer import ( - has_typescript_test_file, - is_test_file, - is_trivial_edit, - should_skip, -) - - -class TestIsTestFile: - """Test is_test_file() detection.""" - - def test_python_test_prefix(self): - assert is_test_file("test_handler.py") is True - - def test_python_test_suffix(self): - assert is_test_file("handler_test.py") is True - - def test_typescript_test_suffix(self): - assert is_test_file("handler.test.ts") is True - - def test_typescript_spec_suffix(self): - assert is_test_file("handler.spec.ts") is True - - def test_go_test_suffix(self): - assert is_test_file("handler_test.go") is True - - def test_python_impl_file(self): - assert is_test_file("handler.py") is False - - def test_go_impl_file(self): - assert is_test_file("handler.go") is False - - -class TestGoSupport: - """Test Go-specific TDD enforcement.""" - - def test_go_test_file_recognized(self): - """Go test files should be recognized and skipped.""" - assert is_test_file("handler_test.go") is True - assert is_test_file("service_test.go") is True - - def test_go_impl_file_not_test(self): - """Go implementation files should not be marked as test files.""" - assert is_test_file("handler.go") is False - assert is_test_file("service.go") is False - - def test_has_go_test_file_exists(self, tmp_path): - """Should detect when corresponding _test.go file exists.""" - impl_file = tmp_path / "handler.go" - test_file = tmp_path / "handler_test.go" - - impl_file.write_text("package main\n") - test_file.write_text("package main\n") - - from pilot.hooks.tdd_enforcer import has_go_test_file - - assert has_go_test_file(str(impl_file)) is True - - def test_has_go_test_file_missing(self, tmp_path): - """Should detect when no _test.go file exists.""" - impl_file = tmp_path / "handler.go" - impl_file.write_text("package main\n") - - from pilot.hooks.tdd_enforcer import has_go_test_file - - assert has_go_test_file(str(impl_file)) is False - - -class TestShouldSkip: - """Test should_skip() exclusion logic.""" - - def test_skip_markdown(self): - assert should_skip("README.md") is True - - def test_skip_json(self): - assert should_skip("config.json") is True - - def test_skip_migrations(self): - assert should_skip("src/migrations/001_init.py") is True - - def test_dont_skip_python(self): - assert should_skip("handler.py") is False - - def test_dont_skip_go(self): - assert should_skip("handler.go") is False - - -class TestTrivialEdit: - """Test is_trivial_edit() detection.""" - - def test_import_only_edit(self): - tool_input = { - "old_string": "import os", - "new_string": "import os\nimport sys", - } - assert is_trivial_edit("Edit", tool_input) is True - - def test_logic_change_not_trivial(self): - tool_input = { - "old_string": "return x + 1", - "new_string": "return x + 2", - } - assert is_trivial_edit("Edit", tool_input) is False - - def test_write_not_trivial(self): - tool_input = {"file_path": "test.py", "content": "def foo(): pass"} - assert is_trivial_edit("Write", tool_input) is False - - -class TestTypescriptTestFile: - """Test has_typescript_test_file() detection.""" - - def test_finds_test_ts(self, tmp_path): - impl_file = tmp_path / "handler.ts" - test_file = tmp_path / "handler.test.ts" - - impl_file.write_text("export function handler() {}") - test_file.write_text("test('handler', () => {})") - - assert has_typescript_test_file(str(impl_file)) is True - - def test_no_test_file(self, tmp_path): - impl_file = tmp_path / "handler.ts" - impl_file.write_text("export function handler() {}") - - assert has_typescript_test_file(str(impl_file)) is False diff --git a/pilot/tests/hooks/test_tool_redirect.py b/pilot/tests/hooks/test_tool_redirect.py deleted file mode 100644 index 611b250d..00000000 --- a/pilot/tests/hooks/test_tool_redirect.py +++ /dev/null @@ -1,177 +0,0 @@ -"""Tests for tool_redirect hook — blocks broken tools, hints at alternatives.""" - -from __future__ import annotations - -import json -from io import StringIO -from unittest.mock import patch - -import pytest - -from tool_redirect import is_semantic_pattern, run_tool_redirect - - -class TestIsSemanticPattern: - """Tests for semantic vs code pattern detection.""" - - @pytest.mark.parametrize( - "pattern", - [ - "where is config loaded", - "how does authentication work", - "find the database connection", - "what are the API endpoints", - "looking for error handling", - "locate all test fixtures", - ], - ) - def test_detects_semantic_patterns(self, pattern: str): - assert is_semantic_pattern(pattern) is True - - @pytest.mark.parametrize( - "pattern", - [ - "def save_config", - "class Handler", - "import os", - "from pathlib import Path", - "const API_URL =", - "function handleClick(", - "interface UserProps", - "x == y", - "result != None", - ], - ) - def test_rejects_code_patterns(self, pattern: str): - assert is_semantic_pattern(pattern) is False - - def test_empty_string_not_semantic(self): - assert is_semantic_pattern("") is False - - def test_plain_word_not_semantic(self): - assert is_semantic_pattern("config") is False - - -class TestBlockedTools: - """Tests for tools that should be blocked (exit code 2).""" - - def _run_with_input(self, tool_name: str, tool_input: dict | None = None) -> int: - hook_data = {"tool_name": tool_name} - if tool_input is not None: - hook_data["tool_input"] = tool_input - stdin = StringIO(json.dumps(hook_data)) - with patch("sys.stdin", stdin): - return run_tool_redirect() - - def test_blocks_web_search(self): - result = self._run_with_input("WebSearch", {"query": "python tutorial"}) - assert result == 2 - - def test_blocks_web_fetch(self): - result = self._run_with_input("WebFetch", {"url": "https://example.com"}) - assert result == 2 - - def test_blocks_enter_plan_mode(self): - result = self._run_with_input("EnterPlanMode") - assert result == 2 - - def test_blocks_exit_plan_mode(self): - result = self._run_with_input("ExitPlanMode") - assert result == 2 - - -class TestHintedTools: - """Tests for tools that get hints but are allowed (exit code 0).""" - - def _run_with_input(self, tool_name: str, tool_input: dict | None = None) -> int: - hook_data = {"tool_name": tool_name} - if tool_input is not None: - hook_data["tool_input"] = tool_input - stdin = StringIO(json.dumps(hook_data)) - with patch("sys.stdin", stdin): - return run_tool_redirect() - - def test_hints_grep_with_semantic_pattern(self): - result = self._run_with_input("Grep", {"pattern": "where is config loaded"}) - assert result == 0 - - def test_no_hint_grep_with_code_pattern(self): - result = self._run_with_input("Grep", {"pattern": "def save_config"}) - assert result == 0 - - def test_hints_task_explore(self): - result = self._run_with_input("Task", {"subagent_type": "Explore"}) - assert result == 0 - - def test_hints_task_generic_subagent(self): - """Non-allowed subagent types get a hint.""" - result = self._run_with_input("Task", {"subagent_type": "general-purpose"}) - assert result == 0 - - def test_no_hint_task_spec_reviewer(self): - """Spec reviewer sub-agents should be allowed without hints.""" - result = self._run_with_input("Task", {"subagent_type": "pilot:spec-reviewer-compliance"}) - assert result == 0 - - def test_no_hint_task_plan_verifier(self): - result = self._run_with_input("Task", {"subagent_type": "pilot:plan-verifier"}) - assert result == 0 - - def test_no_hint_task_plan_challenger(self): - result = self._run_with_input("Task", {"subagent_type": "pilot:plan-challenger"}) - assert result == 0 - - -class TestAllowedTools: - """Tests for tools that should pass through without blocks or hints.""" - - def _run_with_input(self, tool_name: str, tool_input: dict | None = None) -> int: - hook_data = {"tool_name": tool_name} - if tool_input is not None: - hook_data["tool_input"] = tool_input - stdin = StringIO(json.dumps(hook_data)) - with patch("sys.stdin", stdin): - return run_tool_redirect() - - def test_allows_read(self): - assert self._run_with_input("Read", {"file_path": "/foo.py"}) == 0 - - def test_allows_write(self): - assert self._run_with_input("Write", {"file_path": "/foo.py"}) == 0 - - def test_allows_edit(self): - assert self._run_with_input("Edit", {"file_path": "/foo.py"}) == 0 - - def test_allows_bash(self): - assert self._run_with_input("Bash", {"command": "ls"}) == 0 - - def test_allows_task_create(self): - assert self._run_with_input("TaskCreate", {"subject": "test"}) == 0 - - -class TestEdgeCases: - """Tests for malformed input and edge cases.""" - - def test_handles_invalid_json(self): - stdin = StringIO("not json") - with patch("sys.stdin", stdin): - result = run_tool_redirect() - assert result == 0 - - def test_handles_empty_stdin(self): - stdin = StringIO("") - with patch("sys.stdin", stdin): - result = run_tool_redirect() - assert result == 0 - - def test_handles_missing_tool_name(self): - stdin = StringIO(json.dumps({"tool_input": {}})) - with patch("sys.stdin", stdin): - result = run_tool_redirect() - assert result == 0 - - def test_handles_non_dict_tool_input(self): - stdin = StringIO(json.dumps({"tool_name": "Grep", "tool_input": "not a dict"})) - with patch("sys.stdin", stdin): - result = run_tool_redirect() - assert result == 0 diff --git a/pilot/tests/hooks/test_util.py b/pilot/tests/hooks/test_util.py deleted file mode 100644 index cd0e4662..00000000 --- a/pilot/tests/hooks/test_util.py +++ /dev/null @@ -1,266 +0,0 @@ -"""Tests for shared hook utilities module.""" - -from __future__ import annotations - -import json -import sys -import tempfile -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "hooks")) - -from _util import ( # type: ignore[import-not-found] - BLUE, - CYAN, - FILE_LENGTH_CRITICAL, - FILE_LENGTH_WARN, - GREEN, - MAGENTA, - NC, - RED, - YELLOW, - _sessions_base, - check_file_length, - find_git_root, - get_edited_file_from_stdin, - get_session_cache_path, - get_session_plan_path, - is_waiting_for_user_input, - read_hook_stdin, -) - - -def test_color_constants_defined(): - """Color constants are defined and non-empty.""" - assert RED - assert YELLOW - assert GREEN - assert CYAN - assert BLUE - assert MAGENTA - assert NC - - -def test_file_length_constants(): - """File length constants have expected values.""" - assert FILE_LENGTH_WARN == 300 - assert FILE_LENGTH_CRITICAL == 500 - - -def test_sessions_base_returns_path(): - """_sessions_base returns Path under ~/.pilot/sessions.""" - base = _sessions_base() - assert isinstance(base, Path) - assert base == Path.home() / ".pilot" / "sessions" - - -@patch.dict("os.environ", {"PILOT_SESSION_ID": "test-session-123"}) -def test_get_session_cache_path_with_session_id(): - """get_session_cache_path returns session-scoped cache path.""" - path = get_session_cache_path() - assert isinstance(path, Path) - assert "test-session-123" in str(path) - assert path.name == "context-cache.json" - - -@patch.dict("os.environ", {}, clear=True) -def test_get_session_cache_path_defaults_to_default(): - """get_session_cache_path uses 'default' when PILOT_SESSION_ID is missing.""" - path = get_session_cache_path() - assert isinstance(path, Path) - assert "default" in str(path) - - -@patch.dict("os.environ", {"PILOT_SESSION_ID": "test-session-456"}) -def test_get_session_plan_path(): - """get_session_plan_path returns session-scoped active plan path.""" - path = get_session_plan_path() - assert isinstance(path, Path) - assert "test-session-456" in str(path) - assert path.name == "active_plan.json" - - -@patch("subprocess.run") -def test_find_git_root_success(mock_run): - """find_git_root returns git root when in repo.""" - mock_run.return_value = MagicMock(returncode=0, stdout="/home/user/repo\n") - result = find_git_root() - assert result == Path("/home/user/repo") - - -@patch("subprocess.run") -def test_find_git_root_not_in_repo(mock_run): - """find_git_root returns None when not in repo.""" - mock_run.return_value = MagicMock(returncode=1, stdout="") - result = find_git_root() - assert result is None - - -@patch("subprocess.run", side_effect=Exception("Git not found")) -def test_find_git_root_handles_exception(mock_run): - """find_git_root returns None on exception.""" - result = find_git_root() - assert result is None - - -def test_read_hook_stdin_valid_json(monkeypatch): - """read_hook_stdin parses valid JSON from stdin.""" - test_data = {"tool_name": "Write", "tool_input": {"file_path": "test.py"}} - monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: json.dumps(test_data))) - result = read_hook_stdin() - assert result == test_data - - -def test_read_hook_stdin_invalid_json(monkeypatch): - """read_hook_stdin returns empty dict on invalid JSON.""" - monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: "not json")) - result = read_hook_stdin() - assert result == {} - - -def test_read_hook_stdin_empty(monkeypatch): - """read_hook_stdin returns empty dict on empty input.""" - monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: "")) - result = read_hook_stdin() - assert result == {} - - -def test_get_edited_file_from_stdin_with_file_path(monkeypatch): - """get_edited_file_from_stdin extracts file path from hook data.""" - test_data = {"tool_input": {"file_path": "/path/to/file.py"}} - with patch("select.select") as mock_select: - mock_select.return_value = ([sys.stdin], [], []) - monkeypatch.setattr("sys.stdin", MagicMock(read=lambda: json.dumps(test_data))) - with patch("json.load", return_value=test_data): - result = get_edited_file_from_stdin() - assert result == Path("/path/to/file.py") - - -def test_get_edited_file_from_stdin_no_file_path(monkeypatch): - """get_edited_file_from_stdin returns None when no file_path.""" - test_data = {"tool_input": {}} - with patch("select.select") as mock_select: - mock_select.return_value = ([sys.stdin], [], []) - with patch("json.load", return_value=test_data): - result = get_edited_file_from_stdin() - assert result is None - - -def test_get_edited_file_from_stdin_no_stdin(monkeypatch): - """get_edited_file_from_stdin returns None when stdin is empty.""" - with patch("select.select") as mock_select: - mock_select.return_value = ([], [], []) - result = get_edited_file_from_stdin() - assert result is None - - -def test_check_file_length_under_warn(): - """check_file_length returns False for files under warn threshold.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: - f.write("\n".join([f"line {i}" for i in range(100)])) - f.flush() - temp_path = Path(f.name) - - try: - result = check_file_length(temp_path) - assert result is False - finally: - temp_path.unlink(missing_ok=True) - - -def test_check_file_length_warn_threshold(): - """check_file_length returns True for files exceeding warn threshold.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: - f.write("\n".join([f"line {i}" for i in range(350)])) - f.flush() - temp_path = Path(f.name) - - try: - result = check_file_length(temp_path) - assert result is True - finally: - temp_path.unlink(missing_ok=True) - - -def test_check_file_length_critical_threshold(): - """check_file_length returns True for files exceeding critical threshold.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: - f.write("\n".join([f"line {i}" for i in range(600)])) - f.flush() - temp_path = Path(f.name) - - try: - result = check_file_length(temp_path) - assert result is True - finally: - temp_path.unlink(missing_ok=True) - - -def test_check_file_length_handles_missing_file(): - """check_file_length returns False for missing files.""" - result = check_file_length(Path("/nonexistent/file.py")) - assert result is False - - -class TestIsWaitingForUserInput: - """Tests for is_waiting_for_user_input.""" - - def test_returns_true_when_last_tool_is_ask_user_question(self, tmp_path): - """Detects AskUserQuestion as last assistant tool call.""" - transcript = tmp_path / "transcript.jsonl" - msg = { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "AskUserQuestion", "input": {}} - ] - }, - } - transcript.write_text(json.dumps(msg) + "\n") - assert is_waiting_for_user_input(str(transcript)) is True - - def test_returns_false_when_last_tool_is_not_ask(self, tmp_path): - """Returns False when last tool is not AskUserQuestion.""" - transcript = tmp_path / "transcript.jsonl" - msg = { - "type": "assistant", - "message": { - "content": [{"type": "tool_use", "name": "Write", "input": {}}] - }, - } - transcript.write_text(json.dumps(msg) + "\n") - assert is_waiting_for_user_input(str(transcript)) is False - - def test_returns_false_for_missing_file(self): - """Returns False when transcript file doesn't exist.""" - assert is_waiting_for_user_input("/nonexistent/transcript.jsonl") is False - - def test_returns_false_for_empty_transcript(self, tmp_path): - """Returns False when transcript is empty.""" - transcript = tmp_path / "transcript.jsonl" - transcript.write_text("") - assert is_waiting_for_user_input(str(transcript)) is False - - def test_uses_last_assistant_message(self, tmp_path): - """Uses the last assistant message, not the first.""" - transcript = tmp_path / "transcript.jsonl" - ask_msg = { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "AskUserQuestion", "input": {}} - ] - }, - } - write_msg = { - "type": "assistant", - "message": { - "content": [{"type": "tool_use", "name": "Write", "input": {}}] - }, - } - lines = [json.dumps(ask_msg), json.dumps(write_msg)] - transcript.write_text("\n".join(lines) + "\n") - assert is_waiting_for_user_input(str(transcript)) is False