diff --git a/.eslintrc.js b/.eslintrc.js index 92d364a..328af2c 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -47,7 +47,7 @@ module.exports = { { // CLI output files use console.log for user-facing output (not logging) // These display results to the user, not debug info - files: ['src/sidecar/read.js', 'src/sidecar/session-utils.js'], + files: ['src/sidecar/read.js', 'src/sidecar/session-utils.js', 'src/cli-handlers.js'], rules: { 'no-console': 'off' } diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 967e280..56c8a04 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -41,6 +41,12 @@ jobs: run: | # Find previous tag PREV_TAG=$(git tag --sort=-version:refname | grep -v "^${TAG_NAME}$" | head -1) + + if [ -z "$PREV_TAG" ]; then + echo "No previous tag found. Skipping Claude-generated release notes." + exit 0 + fi + echo "Generating notes for ${PREV_TAG}..${TAG_NAME}" # Build context: commit log + diff stat + actual diff (truncated) @@ -90,7 +96,7 @@ jobs: role: "user", content: ("Here are the commits from " + $prev_tag + " to " + $tag + ":\n\n" + $commits + "\n\nDiff stat:\n" + $diffstat + "\n\nActual diff (truncated):\n" + $diff + "\n\nChangelog link: https://github.com/" + $repo + "/compare/" + $prev_tag + "..." + $tag + "\n\n" + $prompt) }] - }' | curl -s https://api.anthropic.com/v1/messages \ + }' | curl -s --connect-timeout 10 --max-time 60 https://api.anthropic.com/v1/messages \ -H "content-type: application/json" \ -H "x-api-key: ${ANTHROPIC_API_KEY}" \ -H "anthropic-version: 2023-06-01" \ diff --git a/.husky/pre-push b/.husky/pre-push index 03f55f9..dc06c77 100755 --- a/.husky/pre-push +++ b/.husky/pre-push @@ -15,7 +15,7 @@ if [ -n "$HEAD_SHA" ] && [ "$HEAD_SHA" = "$CACHED_SHA" ]; then echo "Tests already passed for $HEAD_SHA — skipping." else echo "Running full test suite (unit + integration) before push..." - npm run test:all + npm run test:all || exit 1 fi echo "Checking for dependency vulnerabilities..." diff --git a/CLAUDE.md b/CLAUDE.md index 39a8918..801b457 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -103,6 +103,7 @@ src/ │ ├── api-key-store.js # Maps provider IDs to environment variable names │ ├── api-key-validation.js # Validation endpoints per provider │ ├── auth-json.js # Known provider IDs that map to sidecar's PROVIDER_ENV_MAP +│ ├── auto-skills-config.js # Valid auto-skill names (keys in autoSkills config) │ ├── config.js # Default model alias map — short names to full OpenRouter model identifiers │ ├── logger.js # Structured Logger Module │ ├── mcp-discovery.js # MCP Discovery - Discovers MCP servers from parent LLM configuration @@ -116,6 +117,7 @@ src/ │ ├── updater.js # @type {import('update-notifier').UpdateNotifier|null} │ └── validators.js # * Provider to API key mapping ├── cli-handlers.js # CLI Command Handlers +├── cli-usage.js # Commands section of usage text ├── cli.js # * Default values per spec §4.1 ├── conflict.js # File Conflict Detection Module ├── context-compression.js # Context Compression Module @@ -163,7 +165,8 @@ scripts/ ├── generate-icon.js # Generate app icon PNG from SVG source. ├── integration-test.sh ├── list-models.js -├── postinstall.js # Install skill file to ~/.claude/skills/sidecar/ +├── postinstall.js # Install skill files to ~/.claude/skills/ +├── preuninstall.js # Pre-uninstall script for claude-sidecar ├── test-tools.sh ├── validate-docs.js # * Main entry point. ├── validate-thinking.js @@ -190,51 +193,53 @@ evals/ | Module | Purpose | Key Exports | |--------|---------|-------------| -| `cli-handlers.js` | CLI Command Handlers | `handleSetup()`, `handleAbort()`, `handleUpdate()`, `handleMcp()` | -| `cli.js` | * Default values per spec §4.1 | `parseArgs()`, `validateStartArgs()`, `getUsage()`, `DEFAULTS()` | -| `conflict.js` | File Conflict Detection Module | `detectConflicts()`, `formatConflictWarning()` | -| `context-compression.js` | Context Compression Module | `compressContext()`, `estimateTokenCount()`, `buildPreamble()`, `DEFAULT_TOKEN_LIMIT()` | -| `context.js` | Context Filtering Module | `filterContext()`, `parseDuration()`, `estimateTokens()`, `takeLastNTurns()` | -| `drift.js` | Context Drift Detection Module | `calculateDrift()`, `formatDriftWarning()`, `countTurnsSince()`, `isDriftSignificant()` | -| `environment.js` | Environment Detection Module | `inferClient()`, `getSessionRoot()`, `detectEnvironment()`, `VALID_CLIENTS()` | -| `headless.js` | * Default timeout: 15 minutes per spec §6.2 | `runHeadless()`, `waitForServer()`, `extractSummary()`, `formatFoldOutput()`, `DEFAULT_TIMEOUT()` | -| `index.js` | Claude Sidecar - Main Module | `APIs()`, `startSidecar()`, `listSidecars()`, `resumeSidecar()`, `continueSidecar()` | -| `jsonl-parser.js` | JSONL Parser | `parseJSONLLine()`, `readJSONL()`, `extractTimestamp()`, `formatMessage()`, `formatContext()` | -| `mcp-server.js` | @module mcp-server — Sidecar MCP Server (stdio transport) | `handlers()`, `startMcpServer()`, `getProjectDir()` | -| `mcp-tools.js` | Zod pattern for safe task IDs (alphanumeric, hyphens, underscores only) | `getTools()`, `getGuideText()`, `safeTaskId()`, `safeModel()` | -| `opencode-client.js` | OpenCode SDK Client Wrapper | `parseModelString()`, `createClient()`, `createSession()`, `createChildSession()`, `sendPrompt()` | -| `prompt-builder.js` | System Prompt Builder | `buildSystemPrompt()`, `buildPrompts()`, `buildEnvironmentSection()`, `getSummaryTemplate()`, `SUMMARY_TEMPLATE()` | -| `session-manager.js` | * Session status constants | `createSession()`, `updateSession()`, `getSession()`, `saveConversation()`, `saveSummary()` | -| `session.js` | Session Resolver | `encodeProjectPath()`, `decodeProjectPath()`, `getSessionDirectory()`, `getSessionId()`, `resolveSession()` | -| `prompts/cowork-agent-prompt.js` | Cowork Agent Prompt | `buildCoworkAgentPrompt()` | -| `sidecar/context-builder.js` | Context Builder Module | `buildContext()`, `parseDuration()`, `resolveSessionFile()`, `applyContextFilters()`, `findCoworkSession()` | -| `sidecar/continue.js` | Load previous session data (metadata, summary, conversation) | `loadPreviousSession()`, `buildContinuationContext()`, `createContinueSessionMetadata()`, `continueSidecar()` | -| `sidecar/crash-handler.js` | Crash Handler - Updates metadata to 'error' on uncaught exceptions | `installCrashHandler()` | -| `sidecar/interactive.js` | Check if Electron is available (lazy loading guard) | `getElectronPath()`, `checkElectronAvailable()`, `buildElectronEnv()`, `handleElectronProcess()`, `runInteractive()` | -| `sidecar/progress.js` | Lifecycle stage labels | `readProgress()`, `writeProgress()`, `extractLatest()`, `computeLastActivity()`, `STAGE_LABELS()` | -| `sidecar/read.js` | Sidecar Read Operations Module | `formatAge()`, `listSidecars()`, `readSidecar()` | -| `sidecar/resume.js` | Load session metadata from session directory | `loadSessionMetadata()`, `loadInitialContext()`, `checkFileDrift()`, `buildDriftWarning()`, `buildResumeUserMessage()` | -| `sidecar/session-utils.js` | Standard heartbeat interval in milliseconds | `HEARTBEAT_INTERVAL()`, `SessionPaths()`, `saveInitialContext()`, `finalizeSession()`, `outputSummary()` | -| `sidecar/setup-window.js` | Setup Window Launcher | `launchSetupWindow()` | -| `sidecar/setup.js` | Sidecar Setup Wizard | `addAlias()`, `createDefaultConfig()`, `detectApiKeys()`, `runInteractiveSetup()`, `runReadlineSetup()` | -| `sidecar/start.js` | Generate a unique 8-character hex task ID | `generateTaskId()`, `createSessionMetadata()`, `buildMcpConfig()`, `checkElectronAvailable()`, `runInteractive()` | -| `utils/agent-mapping.js` | * All OpenCode native agent names (lowercase) | `PRIMARY_AGENTS()`, `OPENCODE_AGENTS()`, `HEADLESS_SAFE_AGENTS()`, `mapAgentToOpenCode()`, `isValidAgent()` | -| `utils/alias-resolver.js` | Alias Resolver Utilities | `applyDirectApiFallback()`, `autoRepairAlias()` | -| `utils/api-key-store.js` | Maps provider IDs to environment variable names | `getEnvPath()`, `readApiKeys()`, `readApiKeyHints()`, `readApiKeyValues()`, `saveApiKey()` | -| `utils/api-key-validation.js` | Validation endpoints per provider | `validateApiKey()`, `validateOpenRouterKey()`, `VALIDATION_ENDPOINTS()` | -| `utils/auth-json.js` | Known provider IDs that map to sidecar's PROVIDER_ENV_MAP | `readAuthJsonKeys()`, `importFromAuthJson()`, `checkAuthJson()`, `removeFromAuthJson()`, `AUTH_JSON_PATH()` | -| `utils/config.js` | Default model alias map — short names to full OpenRouter model identifiers | `getConfigDir()`, `getConfigPath()`, `loadConfig()`, `saveConfig()`, `getDefaultAliases()` | -| `utils/logger.js` | Structured Logger Module | `logger()`, `LOG_LEVELS()` | -| `utils/mcp-discovery.js` | MCP Discovery - Discovers MCP servers from parent LLM configuration | `discoverParentMcps()`, `discoverClaudeCodeMcps()`, `discoverCoworkMcps()`, `normalizeMcpJson()` | -| `utils/mcp-validators.js` | MCP Validators | `validateMcpSpec()`, `validateMcpConfigFile()` | -| `utils/model-fetcher.js` | Hardcoded Anthropic models (no public listing endpoint) | `fetchModelsFromProvider()`, `fetchAllModels()`, `groupModelsByFamily()`, `ANTHROPIC_MODELS()`, `PROVIDER_FAMILY_NAMES()` | -| `utils/model-validator.js` | Alias-to-search-term mapping for filtering provider model lists | `validateDirectModel()`, `filterRelevantModels()`, `normalizeModelId()` | -| `utils/path-setup.js` | Ensures that the project's node_modules/.bin directory is included in the PATH. | `ensureNodeModulesBinInPath()` | -| `utils/server-setup.js` | Server Setup Utilities | `DEFAULT_PORT()`, `isPortInUse()`, `getPortPid()`, `killPortProcess()`, `ensurePortAvailable()` | -| `utils/start-helpers.js` | Start Command Helpers | `resolveModelFromArgs()`, `validateFallbackModel()` | -| `utils/thinking-validators.js` | Thinking Level Validators | `MODEL_THINKING_SUPPORT()`, `getSupportedThinkingLevels()`, `validateThinkingLevel()` | -| `utils/updater.js` | @type {import('update-notifier').UpdateNotifier|null} | `initUpdateCheck()`, `getUpdateInfo()`, `notifyUpdate()`, `performUpdate()` | -| `utils/validators.js` | * Provider to API key mapping | `VALID_AGENT_MODES()`, `PROVIDER_KEY_MAP()`, `MODEL_THINKING_SUPPORT()`, `TASK_ID_PATTERN()`, `validateTaskId()` | +| `cli-handlers.js` | CLI Command Handlers | `handleSetup`, `handleAbort`, `handleUpdate`, `handleMcp`, `handleAutoSkills` | +| `cli-usage.js` | Commands section of usage text | `getUsage` | +| `cli.js` | * Default values per spec §4.1 | `parseArgs`, `validateStartArgs`, `getUsage`, `DEFAULTS` | +| `conflict.js` | File Conflict Detection Module | `detectConflicts`, `formatConflictWarning` | +| `context-compression.js` | Context Compression Module | `compressContext`, `estimateTokenCount`, `buildPreamble`, `DEFAULT_TOKEN_LIMIT` | +| `context.js` | Context Filtering Module | `filterContext`, `parseDuration`, `estimateTokens`, `takeLastNTurns` | +| `drift.js` | Context Drift Detection Module | `calculateDrift`, `formatDriftWarning`, `countTurnsSince`, `isDriftSignificant` | +| `environment.js` | Environment Detection Module | `inferClient`, `getSessionRoot`, `detectEnvironment`, `VALID_CLIENTS` | +| `headless.js` | * Default timeout: 15 minutes per spec §6.2 | `runHeadless`, `waitForServer`, `extractSummary`, `formatFoldOutput`, `DEFAULT_TIMEOUT` | +| `index.js` | Claude Sidecar - Main Module | `APIs`, `startSidecar`, `listSidecars`, `resumeSidecar`, `continueSidecar` | +| `jsonl-parser.js` | JSONL Parser | `parseJSONLLine`, `readJSONL`, `extractTimestamp`, `formatMessage`, `formatContext` | +| `mcp-server.js` | @module mcp-server — Sidecar MCP Server (stdio transport) | `handlers`, `startMcpServer`, `getProjectDir` | +| `mcp-tools.js` | Zod pattern for safe task IDs (alphanumeric, hyphens, underscores only) | `getTools`, `getGuideText`, `safeTaskId`, `safeModel` | +| `opencode-client.js` | OpenCode SDK Client Wrapper | `parseModelString`, `createClient`, `createSession`, `createChildSession`, `sendPrompt` | +| `prompt-builder.js` | System Prompt Builder | `buildSystemPrompt`, `buildPrompts`, `buildEnvironmentSection`, `getSummaryTemplate`, `SUMMARY_TEMPLATE` | +| `session-manager.js` | * Session status constants | `createSession`, `updateSession`, `getSession`, `saveConversation`, `saveSummary` | +| `session.js` | Session Resolver | `encodeProjectPath`, `decodeProjectPath`, `getSessionDirectory`, `getSessionId`, `resolveSession` | +| `prompts/cowork-agent-prompt.js` | Cowork Agent Prompt | `buildCoworkAgentPrompt` | +| `sidecar/context-builder.js` | Context Builder Module | `buildContext`, `parseDuration`, `resolveSessionFile`, `applyContextFilters`, `findCoworkSession` | +| `sidecar/continue.js` | Load previous session data (metadata, summary, conversation) | `loadPreviousSession`, `buildContinuationContext`, `createContinueSessionMetadata`, `continueSidecar` | +| `sidecar/crash-handler.js` | Crash Handler - Updates metadata to 'error' on uncaught exceptions | `installCrashHandler` | +| `sidecar/interactive.js` | Check if Electron is available (lazy loading guard) | `getElectronPath`, `checkElectronAvailable`, `buildElectronEnv`, `handleElectronProcess`, `runInteractive` | +| `sidecar/progress.js` | Lifecycle stage labels | `readProgress`, `writeProgress`, `extractLatest`, `computeLastActivity`, `STAGE_LABELS` | +| `sidecar/read.js` | Sidecar Read Operations Module | `formatAge`, `listSidecars`, `readSidecar` | +| `sidecar/resume.js` | Load session metadata from session directory | `loadSessionMetadata`, `loadInitialContext`, `checkFileDrift`, `buildDriftWarning`, `buildResumeUserMessage` | +| `sidecar/session-utils.js` | Standard heartbeat interval in milliseconds | `HEARTBEAT_INTERVAL`, `SessionPaths`, `saveInitialContext`, `finalizeSession`, `outputSummary` | +| `sidecar/setup-window.js` | Setup Window Launcher | `launchSetupWindow` | +| `sidecar/setup.js` | Sidecar Setup Wizard | `addAlias`, `createDefaultConfig`, `detectApiKeys`, `runInteractiveSetup`, `runReadlineSetup` | +| `sidecar/start.js` | Generate a unique 8-character hex task ID | `generateTaskId`, `createSessionMetadata`, `buildMcpConfig`, `checkElectronAvailable`, `runInteractive` | +| `utils/agent-mapping.js` | * All OpenCode native agent names (lowercase) | `PRIMARY_AGENTS`, `OPENCODE_AGENTS`, `HEADLESS_SAFE_AGENTS`, `mapAgentToOpenCode`, `isValidAgent` | +| `utils/alias-resolver.js` | Alias Resolver Utilities | `applyDirectApiFallback`, `autoRepairAlias` | +| `utils/api-key-store.js` | Maps provider IDs to environment variable names | `getEnvPath`, `readApiKeys`, `readApiKeyHints`, `readApiKeyValues`, `saveApiKey` | +| `utils/api-key-validation.js` | Validation endpoints per provider | `validateApiKey`, `validateOpenRouterKey`, `VALIDATION_ENDPOINTS` | +| `utils/auth-json.js` | Known provider IDs that map to sidecar's PROVIDER_ENV_MAP | `readAuthJsonKeys`, `importFromAuthJson`, `checkAuthJson`, `removeFromAuthJson`, `AUTH_JSON_PATH` | +| `utils/auto-skills-config.js` | Valid auto-skill names (keys in autoSkills config) | `VALID_SKILL_NAMES`, `SKILL_LABELS`, `getAutoSkillsConfig`, `isSkillEnabled`, `isMonitoringEnabled` | +| `utils/config.js` | Default model alias map — short names to full OpenRouter model identifiers | `getConfigDir`, `getConfigPath`, `loadConfig`, `saveConfig`, `getDefaultAliases` | +| `utils/logger.js` | Structured Logger Module | `logger`, `LOG_LEVELS` | +| `utils/mcp-discovery.js` | MCP Discovery - Discovers MCP servers from parent LLM configuration | `discoverParentMcps`, `discoverClaudeCodeMcps`, `discoverCoworkMcps`, `normalizeMcpJson` | +| `utils/mcp-validators.js` | MCP Validators | `validateMcpSpec`, `validateMcpConfigFile` | +| `utils/model-fetcher.js` | Hardcoded Anthropic models (no public listing endpoint) | `fetchModelsFromProvider`, `fetchAllModels`, `groupModelsByFamily`, `ANTHROPIC_MODELS`, `PROVIDER_FAMILY_NAMES` | +| `utils/model-validator.js` | Alias-to-search-term mapping for filtering provider model lists | `validateDirectModel`, `filterRelevantModels`, `normalizeModelId` | +| `utils/path-setup.js` | Ensures that the project's node_modules/.bin directory is included in the PATH. | `ensureNodeModulesBinInPath` | +| `utils/server-setup.js` | Server Setup Utilities | `DEFAULT_PORT`, `isPortInUse`, `getPortPid`, `killPortProcess`, `ensurePortAvailable` | +| `utils/start-helpers.js` | Start Command Helpers | `resolveModelFromArgs`, `validateFallbackModel` | +| `utils/thinking-validators.js` | Thinking Level Validators | `MODEL_THINKING_SUPPORT`, `getSupportedThinkingLevels`, `validateThinkingLevel` | +| `utils/updater.js` | @type {import('update-notifier').UpdateNotifier|null} | `initUpdateCheck`, `getUpdateInfo`, `notifyUpdate`, `performUpdate` | +| `utils/validators.js` | * Provider to API key mapping | `VALID_AGENT_MODES`, `PROVIDER_KEY_MAP`, `MODEL_THINKING_SUPPORT`, `TASK_ID_PATTERN`, `validateTaskId` | --- @@ -369,4 +374,3 @@ GEMINI.md and AGENTS.md are symlinks to CLAUDE.md -- no sync needed. - [docs/electron-testing.md](docs/electron-testing.md) - CDP patterns - [docs/jsdoc-setup.md](docs/jsdoc-setup.md) - JSDoc, `.d.ts` generation - [evals/README.md](evals/README.md) - Agentic eval system -- [docs/plans/index.md](docs/plans/index.md) - Design plans diff --git a/bin/sidecar.js b/bin/sidecar.js index 5b288f5..122b48d 100755 --- a/bin/sidecar.js +++ b/bin/sidecar.js @@ -21,7 +21,7 @@ if (process.env.GEMINI_API_KEY && !process.env.GOOGLE_GENERATIVE_AI_API_KEY) { const { parseArgs, validateStartArgs, getUsage } = require('../src/cli'); const { validateTaskId } = require('../src/utils/validators'); const { resolveModelFromArgs, validateFallbackModel } = require('../src/utils/start-helpers'); -const { handleSetup, handleAbort, handleUpdate, handleMcp } = require('../src/cli-handlers'); +const { handleSetup, handleAbort, handleUpdate, handleMcp, handleAutoSkills } = require('../src/cli-handlers'); const VERSION = require('../package.json').version; @@ -103,6 +103,9 @@ async function main() { case 'update': await handleUpdate(); break; + case 'auto-skills': + await handleAutoSkills(args); + break; default: console.error(`Unknown command: ${command}`); console.log(getUsage()); diff --git a/docs/activity-monitoring-research.md b/docs/activity-monitoring-research.md new file mode 100644 index 0000000..620e8d2 --- /dev/null +++ b/docs/activity-monitoring-research.md @@ -0,0 +1,446 @@ +# Activity-Monitoring Auto-Skill Triggers: Research + +## Problem Statement + +Sidecar's four auto-skills (`auto-unblock`, `auto-review`, `auto-security`, `auto-bmad-check`) currently rely on a single triggering mechanism: Claude pattern-matching skill descriptions against conversation state every turn. Each skill's `description` field in its SKILL.md frontmatter contains trigger conditions (e.g., "5 or more different approaches"), and Claude evaluates these against its perception of the conversation. + +This mechanism has three fundamental weaknesses: + +1. **Self-assessment is unreliable.** Claude must count its own failed attempts, judge when "implementation is complete," or recognize it's looping — but it often doesn't count accurately, misses the threshold, or rationalizes that it's making progress when it isn't. + +2. **Description-matching is probabilistic.** Whether Claude invokes a skill depends on how well it pattern-matches the description against the current conversational state. This varies with context window pressure, conversation length, and model behavior. There's no guarantee a skill fires at the right moment. + +3. **No objective evidence.** The trigger decision is entirely internal to Claude's reasoning. There is no external signal — like "Bash has failed 5 times in a row on the same file" — feeding into the decision. The skill description says "5 or more approaches" but nothing actually counts them. + +**The consequence:** Auto-skills fire inconsistently. `auto-unblock` might never trigger despite 10 failed attempts. `auto-security` might miss a `git commit` because Claude was focused on the commit message, not the security scan. `auto-review` might not fire because Claude doesn't consider its work "significant enough." + +**The goal:** Add a second, objective triggering path that uses behavioral evidence from tool call patterns to recommend skill invocation — working alongside the existing description-matching mechanism, not replacing it. + +--- + +## Hook System Capabilities + +Claude Code's [hooks system](https://code.claude.com/docs/en/hooks) provides lifecycle event handlers that can observe and influence Claude's behavior. This section documents the capabilities relevant to activity monitoring. + +### Hook Events Available + +The table below lists hook events relevant to activity monitoring. Claude Code supports 17 hook events total — see the [full reference](https://code.claude.com/docs/en/hooks) for the complete list including `PermissionRequest`, `Notification`, `SubagentStart/Stop`, `TeammateIdle`, `TaskCompleted`, `ConfigChange`, `WorktreeCreate/Remove`, `PreCompact`, `InstructionsLoaded`, and `SessionEnd`. + +| Event | When It Fires | Matcher Support | Can Block? | +|-------|--------------|-----------------|------------| +| `PreToolUse` | Before a tool call executes | Tool name regex | Yes — deny/allow/ask | +| `PostToolUse` | After a tool call succeeds | Tool name regex | Feedback only (`decision: "block"` shows reason to Claude, but tool already ran) | +| `PostToolUseFailure` | After a tool call fails | Tool name regex | Feedback only (tool already failed) | +| `Stop` | When Claude finishes responding | No matcher (always fires) | Yes — force continuation | +| `SessionStart` | When a session begins/resumes | Source type | No | +| `SessionEnd` | When a session terminates | Session end reason | No (side effects only) | +| `UserPromptSubmit` | When the user submits a prompt | No matcher (always fires) | Yes — block prompt | + +### Hook Input Contract + +All hooks receive JSON via stdin with these common fields: + +| Field | Description | +|-------|-------------| +| `session_id` | Current session identifier | +| `transcript_path` | Path to conversation JSONL file | +| `cwd` | Current working directory | +| `permission_mode` | Current permission mode | +| `hook_event_name` | Name of the event that fired | + +**Tool events** (`PreToolUse`, `PostToolUse`, `PostToolUseFailure`) additionally include: + +| Field | Description | +|-------|-------------| +| `tool_name` | Name of the tool (e.g., `Bash`, `Edit`, `Write`) | +| `tool_input` | Tool-specific input object | +| `tool_response` | (PostToolUse only) Result from the tool | +| `tool_use_id` | Unique ID for the tool call | +| `error` | (PostToolUseFailure only) Error description | +| `is_interrupt` | (PostToolUseFailure only) Whether the failure was caused by user interruption | + +**Stop** hooks additionally include: + +| Field | Description | +|-------|-------------| +| `stop_hook_active` | Whether Claude is already continuing from a stop hook | +| `last_assistant_message` | Text of Claude's final response | + +### Hook Output Contract + +Hooks communicate back via exit codes and JSON stdout: + +**Exit codes:** +- `0` — success; stdout parsed for JSON output +- `2` — blocking error (PreToolUse blocks the tool, Stop forces continuation) +- Other — non-blocking error, execution continues + +**JSON output fields** (on exit 0): + +| Field | Description | +|-------|-------------| +| `systemMessage` | Warning message shown to the user | +| `decision` | `"block"` to prevent the action (Stop, PostToolUse) | +| `reason` | Explanation when decision is "block" | +| `continue` | `false` to stop Claude entirely | +| `hookSpecificOutput` | Event-specific structured output | + +**Key capability for auto-skills:** The `Stop` hook can return `{ "decision": "block", "reason": "..." }` to prevent Claude from stopping and force it to continue with the reason as context. This is the primary mechanism for injecting auto-skill recommendations — the `reason` field tells Claude what skill to invoke and why. + +### Hook Registration + +Hooks are defined in JSON settings files at multiple scopes: + +| Location | Scope | +|----------|-------| +| `~/.claude/settings.json` | All projects (user-level) | +| `.claude/settings.json` | Single project | +| `.claude/settings.local.json` | Single project, gitignored | +| Plugin `hooks/hooks.json` | When plugin is enabled | +| Skill/agent frontmatter | While component is active | + +### Constraints + +- **Synchronous execution:** Command hooks run synchronously; Claude waits for the result before proceeding. Must be fast. +- **Default timeout:** 600 seconds for command hooks. +- **`async: true`:** Hooks can run in the background without blocking, but then they cannot return decisions. +- **Environment variables:** `$CLAUDE_PROJECT_DIR` for project root, `$CLAUDE_PLUGIN_ROOT` for plugin root. +- **Transcript access:** All hooks receive `transcript_path` — the full conversation JSONL, which can be parsed for historical analysis. +- **Hook types beyond shell:** Claude Code also supports `type: "prompt"` (single-turn LLM evaluation returning yes/no) and `type: "agent"` (subagent with tool access) hooks. These could theoretically replace custom Node.js analysis — e.g., a Stop prompt hook asking "Is Claude stuck in a loop?" However, they add per-turn LLM API costs and latency, making them better suited as a future refinement than as the primary mechanism. See [Future Extensions](#future-extensions) in the design spec. +- **Exit code 2 varies by event:** Exit code 2 means different things for different hooks — e.g., PreToolUse blocks the tool call, Stop forces continuation, UserPromptSubmit blocks and erases the prompt. Not all events support blocking via exit code 2. + +--- + +## Three Approaches + +### Approach A: Pure Hook Accumulator (Shell-based) + +**Architecture:** Shell scripts handle all logic — event collection, pattern detection, and decision output. + +``` +PostToolUse → post-tool-use.sh → append event to JSONL + inline checks +PostToolUseFailure → post-failure.sh → append event to JSONL + inline checks +Stop → stop-hook.sh → read accumulated JSONL + pattern analysis via jq/awk +PreToolUse(Bash) → pre-bash.sh → check for git commit/push patterns +``` + +**How it works:** +1. `PostToolUse` and `PostToolUseFailure` hooks append structured events to `$TMPDIR/sidecar-monitor-$SESSION_ID.jsonl` using `jq` one-liners +2. Same hooks do inline pattern checks for immediate triggers (e.g., BMAD artifact writes) +3. `Stop` hook reads the accumulated event file, counts patterns with `jq`/`awk`, and decides whether to block stopping +4. `PreToolUse` hook on Bash checks if the command matches `git commit|push|gh pr create` + +**Example PostToolUse hook:** +```bash +#!/bin/bash +INPUT=$(cat) +TOOL=$(echo "$INPUT" | jq -r '.tool_name') +FILE=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty') +SESSION=$(echo "$INPUT" | jq -r '.session_id') +EVENTS="$TMPDIR/sidecar-monitor-$SESSION.jsonl" + +# Append event +echo "$INPUT" | jq -c "{ts: now | todate, tool: .tool_name, file: (.tool_input.file_path // null), success: true}" >> "$EVENTS" + +# Quick check: BMAD artifact write +if [ "$TOOL" = "Write" ] || [ "$TOOL" = "Edit" ]; then + if echo "$FILE" | grep -q '_bmad-output/'; then + jq -n '{ hookSpecificOutput: { hookEventName: "PostToolUse", additionalContext: "IMPORTANT: A BMAD artifact was just written. Consider invoking sidecar-auto-bmad-method-check." } }' + fi +fi +exit 0 +``` + +**Pros:** +- Zero dependencies beyond `jq` (standard on most systems) +- Self-contained — no Node.js subprocess, no daemon +- Uses existing hook infrastructure directly +- Fast for simple immediate triggers + +**Cons:** +- Complex pattern detection (stuck loops, revert-like edits) is extremely fragile in shell/jq +- String comparison for error deduplication requires careful escaping +- No access to structured transcript parsing — would need to parse JSONL with jq +- Maintenance burden: shell scripts with complex logic are hard to test and debug +- Runs synchronously on every tool call — must stay fast + +**Best for:** Simple, immediate triggers where the pattern is a single event (git commit → auto-security, BMAD write → auto-bmad-check). + +--- + +### Approach B: Hook Collector + Node.js Analyzer (Recommended) + +**Architecture:** Lightweight shell hooks for event collection and immediate triggers; Node.js module for complex analysis at natural decision points (Stop, PreToolUse). + +``` +PostToolUse → post-tool-use.sh → fast event append + quick BMAD check +PostToolUseFailure → post-failure.sh → fast event append +Stop → stop-hook.sh → node hooks/analyze-patterns.js → block/allow +PreToolUse(Bash) → pre-bash.sh → check for git commit/push/pr +``` + +**How it works:** +1. `PostToolUse` / `PostToolUseFailure` hooks: fast shell scripts that append a structured event to the session's event file. Also handle immediate triggers (BMAD artifact writes) with quick regex checks. +2. `Stop` hook: shell script invokes `node hooks/analyze-patterns.js` with the transcript path and event file. The Node.js analyzer reads both files, runs pattern detection functions, and outputs a JSON decision. +3. `PreToolUse` (Bash): shell script checks if the command matches commit/push/PR patterns. Pure shell — no Node.js needed for this fast path. + +**Why analysis at Stop, not on every tool call:** +- The Stop hook fires when Claude finishes responding — the natural decision point for "should Claude do something else before it stops?" +- Analysis happens once per Claude turn, not once per tool call, so a ~200ms Node.js startup is acceptable +- The Stop hook has access to `last_assistant_message` (Claude's final response) which provides additional signal +- For patterns like "stuck loop" and "implementation complete," the relevant question is always "should Claude stop now, or should it invoke a skill first?" + +**Example Stop hook:** +```bash +#!/bin/bash +INPUT=$(cat) +SESSION=$(echo "$INPUT" | jq -r '.session_id') +TRANSCRIPT=$(echo "$INPUT" | jq -r '.transcript_path') +STOP_ACTIVE=$(echo "$INPUT" | jq -r '.stop_hook_active') +EVENTS="$TMPDIR/sidecar-monitor-$SESSION.jsonl" + +# Prevent infinite loops: if we already forced continuation, don't do it again +if [ "$STOP_ACTIVE" = "true" ]; then + exit 0 +fi + +# Run Node.js analyzer +node "$(dirname "$0")/analyze-patterns.js" "$TRANSCRIPT" "$EVENTS" "$SESSION" +``` + +**Node.js analyzer outputs:** +```json +{ + "decision": "block", + "reason": "IMPORTANT: You appear to be stuck in a debugging loop (5 edit→fail cycles on src/auth.js). Consider invoking the sidecar-auto-unblock skill to get fresh ideas from a different model." +} +``` + +**Pros:** +- Rich analysis: Node.js can parse JSONL transcripts, do fuzzy string matching, detect edit/revert cycles +- Runs at natural decision points (Stop = "should Claude keep going?") +- Shell hooks for fast-path triggers stay lightweight (<10ms per tool call) +- Testable: Node.js module with unit tests, mock transcripts +- Extensible: adding a new pattern is adding a function, not rewriting shell logic + +**Cons:** +- ~200ms Node.js startup for the analyzer (acceptable at Stop, not on every tool call) +- Requires Node.js on the system (safe assumption for sidecar users) +- Analysis only at Stop/PreToolUse points — not continuous +- Two languages (shell + Node.js) to maintain + +**Best for:** The full spectrum of auto-skill triggers. Simple patterns (git commit, BMAD writes) are handled by fast shell hooks. Complex patterns (stuck loops, implementation completion) are handled by the Node.js analyzer at Stop. + +--- + +### Approach C: Background Watcher Daemon + +**Architecture:** A persistent Node.js process runs alongside Claude Code, tailing the transcript file and maintaining real-time behavioral models. + +``` +SessionStart → start-watcher.sh → spawn background Node.js process +PostToolUse → post-tool-use.sh → append event (watcher also reads transcript) +Stop → stop-hook.sh → read watcher's recommendation file +SessionEnd → cleanup-watcher.sh → kill the daemon +``` + +**How it works:** +1. `SessionStart` hook spawns a background Node.js process that watches the transcript file (`fs.watch` on `transcript_path`) +2. The watcher maintains in-memory models: edit history per file, error frequency, tool call sequences +3. When patterns are detected, the watcher writes recommendations to `$TMPDIR/sidecar-recommendations-$SESSION_ID.json` +4. `Stop` hook reads the recommendations file and returns decisions +5. `SessionEnd` hook cleans up the daemon process + +**Pros:** +- Real-time detection — patterns are identified as they happen, not only at Stop +- Stateful models — can track complex sequences across many turns +- No startup cost at Stop — analysis is pre-computed +- Could enable cross-session learning (persist models between sessions) +- Future potential: proactive mid-turn coaching + +**Cons:** +- Daemon lifecycle management: must handle crashes, restarts, orphaned processes +- Still needs hooks for delivery — the daemon can't inject messages directly +- Most complex to implement and debug +- Resource usage: persistent Node.js process per session +- `fs.watch` behavior varies across OS/filesystem +- Overkill for the current set of patterns + +**Best for:** A hypothetical future with real-time coaching, proactive mid-turn suggestions, and cross-session pattern learning. Not justified for the current four auto-skills. + +--- + +## Pattern Catalog + +Specific detection patterns for each auto-skill, mapping to hook events and signal types. + +### auto-unblock + +| Hook | Pattern | Signal | Detection Method | +|------|---------|--------|-----------------| +| Stop | Edit→Bash(fail) cycles | 3+ cycles editing the same files followed by failing Bash commands | Parse transcript for interleaved Edit/Write→Bash sequences where Bash exits non-zero. Group by target file. | +| Stop | Same error repeated | Error string appears 3+ times in Bash results | Extract error strings from PostToolUseFailure events and Bash tool responses. Fuzzy-match for repeated patterns (strip line numbers, paths). | +| Stop | Revert-like edits | Edit `new_string` ≈ earlier `old_string` on same file | Compare accumulated Edit events: if a later edit's `new_string` closely matches an earlier edit's `old_string` on the same file, Claude is reverting. | +| Stop | Growing transcript without progress | Many tool calls, few new files/changes | Count unique file paths in Edit/Write events vs. total tool call count. High ratio = thrashing. | + +### auto-review + +| Hook | Pattern | Signal | Detection Method | +|------|---------|--------|-----------------| +| Stop | Implementation complete | Test pass after 5+ Edit/Write calls, Claude about to stop | Count Edit/Write calls in transcript. Check last Bash results for test-passing patterns (`passing`, `✓`, exit 0 after `npm test`/`pytest`). Claude's `last_assistant_message` contains completion language ("done", "implemented", "complete"). | +| Stop | Large change set | Many Edit/Write calls, shift from writes to reads | Track the ratio of write-tools to read-tools over recent turns. A shift from predominantly Edit/Write to Read/Grep suggests implementation is winding down. | +| Stop | Branch with uncommitted changes | `git status` shows modified files, Claude stopping | Check if recent Bash results contain `git status` output with modified files. | + +### auto-security + +| Hook | Pattern | Signal | Detection Method | +|------|---------|--------|-----------------| +| PreToolUse | Pre-commit gate | Bash command matches `git commit`, `git push`, `gh pr create` | Regex on `tool_input.command`: `/^\s*(git\s+(commit|push)|gh\s+pr\s+create)/`. Fast shell check. | +| PostToolUse | Staged files | `git add` detected | Regex on Bash `tool_input.command` for `git add`. Set a flag in the event file so the Stop hook knows files were staged. | + +### auto-bmad-check + +| Hook | Pattern | Signal | Detection Method | +|------|---------|--------|-----------------| +| PostToolUse | Artifact written | Write/Edit to `_bmad-output/` path | Check `tool_input.file_path` against `_bmad-output/` prefix. Immediate `additionalContext` trigger — no need to wait for Stop. | +| PostToolUse | Substantial artifact update | Edit with large `new_string` to `_bmad-output/` | Same path check, plus heuristic on content size (>500 chars of new content suggests substantive change, not a typo fix). | + +--- + +## Comparison Matrix + +| Dimension | A: Pure Shell | B: Hook + Node.js (Rec.) | C: Background Daemon | +|-----------|--------------|--------------------------|---------------------| +| **Implementation complexity** | Low | Medium | High | +| **Pattern detection reliability** | Low for complex patterns | High — full Node.js capabilities | Highest — stateful models | +| **Latency impact** | <10ms per hook (fast) | <10ms per tool hook; ~200ms at Stop | <10ms per hook (pre-computed) | +| **Maintenance burden** | High (shell complexity grows) | Medium (two languages, but testable) | High (daemon lifecycle) | +| **Extensibility** | Hard to add complex patterns | Easy — add a function | Easy — add a model | +| **Testability** | Hard (shell scripts) | Good (Node.js unit tests) | Good but integration is complex | +| **Dependencies** | `jq` only | Node.js + `jq` | Node.js + `jq` + process management | +| **Failure mode** | Silent (no output = allow) | Silent (analyzer crash = allow) | Daemon crash = no recommendations | +| **Resource usage** | Negligible | Negligible (Node.js only at Stop) | Persistent process per session | +| **Transcript analysis** | Fragile (jq on JSONL) | Native (Node.js JSONL parsing) | Native + real-time | +| **Cross-session learning** | Not feasible | Possible with state files | Natural fit | +| **Current auto-skill coverage** | 2/4 reliable (security, bmad) | 4/4 reliable | 4/4 reliable | + +--- + +## Recommendation: Approach B + +**Approach B (Hook Collector + Node.js Analyzer)** is recommended for the following reasons: + +1. **Right tool for the job.** Simple triggers (git commit, BMAD writes) use fast shell hooks. Complex triggers (stuck loops, implementation completion) use Node.js at the Stop hook — where a 200ms startup is negligible compared to Claude's response time. + +2. **Natural decision point.** The Stop hook answers exactly the right question: "Should Claude stop, or should it invoke an auto-skill first?" This is when all four auto-skills need to be evaluated. + +3. **Testable and maintainable.** The Node.js analyzer can be unit-tested with mock transcripts. Each pattern detector is an independent function. Adding a new pattern means adding a function and a test, not rewriting shell logic. + +4. **Incremental deployment.** Start with `PreToolUse` (auto-security) and `PostToolUse` (auto-bmad-check) — these are pure shell, zero risk. Then add the Stop hook with the Node.js analyzer for auto-unblock and auto-review. + +5. **Avoids daemon complexity.** Approach C's benefits (real-time detection, stateful models) aren't needed for the current four auto-skills. The Stop hook fires frequently enough — every time Claude finishes responding — to catch patterns in time. + +6. **Builds toward Approach C.** If future auto-skills need real-time detection, the event file and analyzer module from Approach B are directly reusable as the foundation for a daemon. + +### Migration path + +- **Phase 1:** Shell hooks for PreToolUse (security) and PostToolUse (BMAD) — immediate, no Node.js +- **Phase 2:** Stop hook with Node.js analyzer for unblock and review patterns +- **Phase 3 (future):** If needed, promote the analyzer to a persistent daemon (Approach C) + +--- + +## Phase 1 Must-Haves + +Beyond hook scripts and pattern detection, Phase 1 requires user-facing controls, configuration, and clear communication about what gets installed. This section covers the five must-haves that ship alongside the hooks themselves. + +### 1. Disabling Auto-Skills + +Two trigger paths exist, and both need disable controls: + +**Description-matching path** (SKILL.md-based): Each SKILL.md gains a Step 0 that checks `autoSkills..enabled` in sidecar config before doing anything. If the skill is disabled, it silently skips — no output, no side effects. + +**Activity monitoring path** (shell hooks): The hooks themselves check `monitoring.enabled` and per-pattern `enabled` flags in config before collecting events or making decisions. A disabled hook exits immediately with code 0. + +**Master kill switch:** Setting `autoSkills.enabled: false` disables both paths entirely — no SKILL.md Step 0 proceeds, no shell hook collects events or fires triggers. + +**Per-project override (future):** A `.sidecar.json` in the project root could override user-level config (e.g., disable auto-security for a trusted internal repo). Not in Phase 1, but the config namespace is designed to support it. + +**In-session:** Users can say "don't use auto-skills" in conversation. This works today — all auto-skills already ask for confirmation before proceeding, so Claude simply won't invoke them if told not to. No config change needed for this path. + +### 2. Config Namespace + +All auto-skill configuration lives under `autoSkills` in `~/.config/sidecar/config.json`: + +```json +{ + "autoSkills": { + "enabled": true, + "review": { "enabled": true }, + "unblock": { "enabled": true }, + "security": { "enabled": true }, + "bmadMethodCheck": { "enabled": true } + }, + "monitoring": { "enabled": true, "patterns": { "..." : "..." } } +} +``` + +Each SKILL.md's Step 0 reads this config via `sidecar_guide` or a dedicated config-check mechanism to determine whether to proceed. Shell hooks read the same config at the shell level (parsed with `jq` from the JSON file) to decide whether to collect events or fire triggers. + +The `monitoring` key controls the activity-monitoring path independently — it can be enabled while individual skills under `autoSkills` are disabled, or vice versa. This separation allows event collection to continue (for debugging or future analysis) even when specific skill triggers are turned off. + +### 3. CLI Command (`sidecar auto-skills`) + +A new CLI command provides the primary interface for managing auto-skill state: + +| Command | Effect | +|---------|--------| +| `sidecar auto-skills` | Lists status of all auto-skills (enabled/disabled) | +| `sidecar auto-skills --off` | Disables all auto-skills | +| `sidecar auto-skills --on` | Enables all auto-skills | +| `sidecar auto-skills --off review security` | Disables specific skills by name | +| `sidecar auto-skills --on unblock` | Enables a specific skill by name | + +All commands read and write `~/.config/sidecar/config.json`. The status display shows both the master switch and per-skill state, so users can see at a glance what's active. Invalid skill names produce a clear error listing valid options. + +### 4. First-Run Notice + +When `scripts/postinstall.js` registers hooks during installation, it prints a clear message to the terminal: + +- **What was installed:** Which hooks were added and where (e.g., "Registered PreToolUse and PostToolUse hooks in `~/.claude/settings.json`") +- **What they do:** One-sentence summary of the auto-skill system ("Auto-skills monitor tool usage patterns and suggest security scans, code reviews, and unblock assistance") +- **How to disable:** The exact command to turn everything off (`sidecar auto-skills --off`) +- **Where config lives:** The path to `~/.config/sidecar/config.json` + +This notice fires once — postinstall only runs on `npm install`. Users who install via global npm (`npm install -g claude-sidecar`) see it in their terminal. Users who clone and `npm install` locally see it there. + +### 5. Phase 1 vs Phase 2 Scope + +**Phase 1 (this PR):** The minimal viable set of hooks, config, and controls: + +- Shell hooks for `PreToolUse` (auto-security gate on git commit/push/PR creation) +- Shell hooks for `PostToolUse` (BMAD artifact trigger + event collection to session JSONL) +- `autoSkills` config namespace in `~/.config/sidecar/config.json` +- `sidecar auto-skills` CLI command for enable/disable management +- SKILL.md Step 0 checks in each auto-skill's frontmatter +- First-run notice in `postinstall.js` + +**Phase 2 (future PR):** The Node.js analyzer and complex pattern detection: + +- `Stop` hook invoking `node hooks/analyze-patterns.js` for decision-point analysis +- Auto-unblock pattern detection (edit→fail cycles, repeated errors, revert detection) +- Auto-review pattern detection (implementation completion, large change sets, test passing) +- Transcript parsing and fuzzy matching in the Node.js analyzer module +- Unit tests with mock transcripts for each pattern detector + +This split follows the recommendation in [Approach B](#approach-b-hook-collector--nodejs-analyzer-recommended): start with the fast shell hooks that carry zero risk, then layer in the sophisticated analysis once the foundation is proven. + +--- + +## References + +- [Claude Code Hooks Reference](https://code.claude.com/docs/en/hooks) — official hooks API documentation +- [docs/auto-skill-invocation-proposal.md](auto-skill-invocation-proposal.md) — existing proposal (Proposals A/B/C for skill discovery) +- `skill/auto-*/SKILL.md` — current trigger descriptions and thresholds +- `scripts/postinstall.js` — hook registration would be added here +- `src/mcp-tools.js` — MCP tool definitions for context diff --git a/docs/auto-skill-invocation-proposal.md b/docs/auto-skill-invocation-proposal.md new file mode 100644 index 0000000..1c08e74 --- /dev/null +++ b/docs/auto-skill-invocation-proposal.md @@ -0,0 +1,140 @@ +# Proposal: Improving Auto-Skill Invocation Reliability + +## Context + +PR #7 (`feat/auto-sidecar-skills`) introduces four auto-skills for sidecar: `auto-review`, `auto-unblock`, `auto-security`, and `auto-bmad-method-check`. These are contextual skills that fire automatically at key workflow moments (post-implementation review, stuck debugging, pre-commit security scan, BMAD artifact checkpoints). + +During development of the PR, we researched how Claude Code discovers and invokes skills, and found a significant reliability issue with how skills are installed. This proposal documents the findings and the changes made in the PR to address them, plus suggestions for further improvements that could be made in sidecar independently. + +## Summary + +Auto-skills need to be reliably discovered and invoked by Claude Code when their trigger conditions are met. The key finding: **Claude Code only scans top-level `~/.claude/skills/*/SKILL.md`** for its available skills list. Skills nested deeper than one level are invisible. + +## The Problem + +The initial version of the PR installed auto-skills as nested subdirectories under the main sidecar skill: + +```text +~/.claude/skills/sidecar/ ← discovered ✅ +~/.claude/skills/sidecar/auto-review/ ← NOT discovered ❌ +~/.claude/skills/sidecar/auto-unblock/ ← NOT discovered ❌ +~/.claude/skills/sidecar/auto-security/ ← NOT discovered ❌ +``` + +Nested skills don't appear in the system reminder Claude evaluates every turn, so Claude has no reason to invoke them unless something else prompts a skill check. + +## How Claude Code Skill Discovery Works + +Every turn, Claude sees a system reminder listing available skills with their `description` fields from SKILL.md frontmatter. This is the primary mechanism for contextual skill invocation — Claude pattern-matches descriptions against conversation state and invokes matching skills. + +The pipeline: +1. Skill tool scans `~/.claude/skills/*/SKILL.md` and plugin skill directories +2. Skill names + descriptions appear in the "available skills" system reminder +3. Claude evaluates descriptions against conversation state each turn +4. If a description matches → Claude reads the full SKILL.md and follows its procedure + +Skills not in this list are effectively invisible unless something else (like the Superpowers plugin) forces Claude to do a broader filesystem scan. + +## What's Been Addressed in PR #7 + +The following changes are included in the PR to address the discovery issue. + +### 1. Top-level installation + +Auto-skills are installed as top-level skill directories: + +```text +~/.claude/skills/sidecar-auto-review/ ← discovered ✅ +~/.claude/skills/sidecar-auto-unblock/ ← discovered ✅ +~/.claude/skills/sidecar-auto-security/ ← discovered ✅ +~/.claude/skills/sidecar-auto-bmad-method-check/ ← discovered ✅ +``` + +The `sidecar-` prefix keeps them namespaced. `postinstall.js` handles the installation and cleans up the old nested location on upgrade. + +### 2. Dual invocation: auto-fire + manual + +Each auto-skill now supports both: +- **Auto-fire** — Claude sees trigger descriptions in the skills list every turn and invokes when conditions match +- **Manual invocation** — users can type `/sidecar-auto-review`, `/sidecar-auto-security`, etc. to force invocation + +### 3. Cross-reference in main SKILL.md + +The main `skill/SKILL.md` now includes an "Auto-Skills" section with a table listing all four auto-skills and their trigger conditions. This gives Claude a second discovery path when reading the main sidecar skill. + +### Verified result + +After installing locally with these changes, the system reminder includes all four auto-skills: + +```text +- sidecar-auto-review: Use after completing a feature implementation, bug fix, or + significant code change — before claiming the work is done... +- sidecar-auto-unblock: Use when you have attempted 5 or more different approaches + to fix a bug, pass a test, or solve a problem and none have worked... +- sidecar-auto-security: Use when the user asks to commit changes, push code, or + create a pull request... +- sidecar-auto-bmad-method-check: Use when a BMAD-METHOD workflow has just produced + an output artifact... +``` + +## Suggestions for Further Improvement + +The following are ideas for future work in sidecar — separate from PR #7 — that could further improve auto-skill reliability. + +### Proposal A: SessionStart Hook + +**What:** Register a lightweight SessionStart hook that injects a compact auto-skills reminder into the system prompt on every session start, resume, clear, and compact. + +**Why:** This is the pattern used by the Superpowers plugin to achieve near-100% invocation reliability. Superpowers registers a synchronous SessionStart hook that injects skill-checking instructions into the system prompt. The skills list visibility from PR #7 is good, but a SessionStart hook would add a second independent triggering path — particularly valuable for users who don't have Superpowers installed. + +**Sketch:** A `hooks/hooks.json` registered during postinstall, with a SessionStart hook that injects a compact reminder listing the four auto-skills and their trigger conditions. The hook would fire on startup, resume, clear, and compact events. + +**Open question:** Does the Claude Code hooks API support system prompt injection for third-party packages, or only for plugins? This would affect the implementation approach. + +**Trade-off:** Adds ~200 bytes to the system prompt on every session. Minimal cost for significant reliability improvement. + +### Proposal B: Auto-Skills Config Namespace + +**What:** Add an `autoSkills` section to `~/.config/sidecar/config.json` for user-configurable enable/disable and defaults. + +**Why:** Users may want to disable specific auto-skills (e.g., skip security scans on personal projects) or set default models per skill (e.g., always use `gemini-pro` for code review). + +**Example config:** +```json +{ + "autoSkills": { + "review": { "enabled": true, "model": "gemini" }, + "unblock": { "enabled": true, "attemptThreshold": 5 }, + "security": { "enabled": true }, + "bmadMethodCheck": { "enabled": true, "artifactDir": "_bmad-output/" } + } +} +``` + +**How it would work:** Each auto-skill's SKILL.md would check config on invocation (via `sidecar_guide` or a new MCP tool) and skip if disabled. Default models from config would be used when the user doesn't specify one. + +**Trade-off:** Higher implementation effort. Probably best deferred until auto-skills have been used in practice and real user preferences emerge. + +### Proposal C: Community Auto-Skills + +**What:** Document the auto-skill convention so third parties can contribute their own. + +**Why:** The infrastructure already supports it — `postinstall.js` uses `fs.readdirSync` to dynamically discover `skill/auto-*` directories, so adding a new auto-skill is just adding a directory with a `SKILL.md`. No hardcoded lists to update. + +**Convention:** +1. Create `skill/auto-/SKILL.md` with standard frontmatter +2. Use `name: sidecar-auto-` in frontmatter +3. Include `TRIGGER when:` clause in description +4. Postinstall automatically discovers and installs it as `~/.claude/skills/sidecar-auto-/` + +**Trade-off:** Mainly a documentation effort. Could be a section in the README or a `CONTRIBUTING.md` addition. + +## Recommended Priority + +| Proposal | Effort | Impact | Suggested Timeline | +|----------|--------|--------|-------------------| +| **A: SessionStart hook** | Medium | High — guarantees awareness every session | Next PR | +| **B: Config namespace** | High | Medium — useful once patterns are established | After community feedback | +| **C: Community auto-skills** | Low | Medium — enables ecosystem growth | Anytime (docs only) | + +Happy to discuss any of these or help with implementation. The auto-skills PR (#7) is independent of these proposals — they're suggestions for future sidecar development regardless of whether the PR is merged as-is or modified. diff --git a/docs/bmad-workflow.md b/docs/bmad-workflow.md new file mode 100644 index 0000000..f6b0b02 --- /dev/null +++ b/docs/bmad-workflow.md @@ -0,0 +1,110 @@ +# BMAD-METHOD Workflow Reference + +Reference for the auto-bmad-method-check skill. Captures all workflows, artifacts, dependencies, and checkpoints. + +## Phases & Workflows + +| Phase | Workflow | Agent | Input Dependencies | Output Artifact | Checkpoint | +|---|---|---|---|---|---| +| **1: Analysis** (optional) | Brainstorming | Mary (Analyst) | None | `brainstorming-report.md` | Human reviews report | +| | Market Research | Mary | None (needs web search) | `research-*.md` | Human reviews findings | +| | Domain Research | Mary | None | `research-*.md` | Human reviews findings | +| | Technical Research | Mary | None | `research-*.md` | Human reviews findings | +| | Create Product Brief | Mary | Brainstorming/research (optional) | `product-brief.md` | Human reviews brief | +| **2: Planning** (required) | Create PRD | John (PM) | Product brief (optional) | `PRD.md` | Human reviews PRD | +| | Validate PRD | John | `PRD.md` | Validation report | Human reviews findings | +| | Edit PRD | John | `PRD.md` | Updated `PRD.md` | Human reviews edits | +| | Create UX Design | Sally (UX) | `PRD.md` | `ux-design-specification.md` | Human reviews UX spec | +| **3: Solutioning** | Create Architecture | Winston (Architect) | `PRD.md` (+ UX spec optional) | `architecture.md` + ADRs | Human reviews arch decisions | +| | Create Epics & Stories | John | `PRD.md` + `architecture.md` | `epics.md` (or sharded) | Human reviews breakdown | +| | Check Implementation Readiness | Winston/John | PRD + Architecture + Epics (+ UX) | PASS / CONCERNS / FAIL | **Gate** — must pass before Phase 4 | +| **4: Implementation** | Sprint Planning | Bob (SM) | Epics files | `sprint-status.yaml` | One-time setup | +| | Sprint Status | Bob | `sprint-status.yaml` | Status summary + risk flags | Informational | +| | Create Story | Bob | Sprint status + Epics + all artifacts | `story-{e}-{s}-{slug}.md` | Human reviews story before dev | +| | Dev Story | Amelia (Dev) | Story file (status: ready-for-dev) | Working code + tests | HALTs on blockers | +| | Code Review | Amelia/Barry | Story file + git changes | Approved or Changes Requested | Human decides on action items | +| | Correct Course | Bob/John | PRD + Epics + sprint context | `sprint-change-proposal-*.md` | Human approves proposal | +| | Retrospective | Bob | All completed stories in epic | `epic-{N}-retro-{date}.md` | Significant Discovery Alert if assumptions shifted | +| **Quick Flow** | Quick Spec | Barry (Solo Dev) | None | `tech-spec.md` | Human reviews spec | +| | Quick Dev | Barry | `tech-spec.md` or direct instructions | Working code + tests | Self-review then human | +| **Cross-cutting** | Generate Project Context | Mary | Codebase scan | `project-context.md` | Used by 7+ workflows | +| | BMad Help | Any | Project state inspection | Next-step guidance | Runs after every workflow | + +## Dependency Chain + +```text +Brainstorming/Research ──> Product Brief ──> PRD ──> UX Design (optional) + ├──> Architecture + └──> Architecture ──> Epics/Stories + │ + Implementation Readiness <┘ (GATE) + │ + Sprint Planning (once) + │ + Create Story ──> Dev Story ──> Code Review + ^ │ + └─────── (next story) ────────┘ + │ + Epic complete ──> Retrospective ───┘ +``` + +## Artifact-to-Input Mapping + +Used by auto-bmad-method-check to determine which input documents to include in sidecar reviews. + +| Output Artifact | Input Documents | +|---|---| +| `brainstorming-report.md` | None (freeform ideation) | +| `research-*.md` | None (primary research) | +| `product-brief.md` | `brainstorming-report.md`, `research-*.md` (if they exist) | +| `PRD.md` | `product-brief.md` (if exists) | +| `ux-design-specification.md` | `PRD.md` | +| `architecture.md` | `PRD.md`, `ux-design-specification.md` (if exists) | +| `epics.md` | `PRD.md`, `architecture.md` | +| Implementation Readiness | `PRD.md`, `architecture.md`, `epics.md`, `ux-design-specification.md` (if exists) | +| `sprint-status.yaml` | `epics.md` | +| `story-*.md` | `epics.md`, `PRD.md`, `architecture.md`, `sprint-status.yaml` | +| `sprint-change-proposal-*.md` | `PRD.md`, `epics.md`, affected `story-*.md` files | +| `epic-*-retro-*.md` | All `story-*.md` in that epic, previous retro (if exists) | +| `tech-spec.md` | None (Quick Flow — standalone) | + +## Agents + +| Agent | Name | Personality | Primary Workflows | +|---|---|---|---| +| Analyst | Mary | "Excited treasure hunter" | Brainstorming, Research, Product Brief, Project Context | +| Product Manager | John | "Asks WHY relentlessly like a detective" | PRD, Validate/Edit PRD, Epics, Readiness Check, Course Correction | +| Architect | Winston | "Calm, pragmatic tones" | Architecture, Readiness Check | +| Scrum Master | Bob | "Crisp, checklist-driven, zero ambiguity tolerance" | Sprint Planning/Status, Create Story, Retrospective, Course Correction | +| Developer | Amelia | "Ultra-succinct, speaks in file paths" | Dev Story, Code Review | +| UX Designer | Sally | "Paints pictures with words" | UX Design | +| Quick Flow Solo Dev | Barry | "Direct, no fluff, just results" | Quick Spec, Quick Dev, Code Review | +| Tech Writer | Paige | "Patient educator" | Document Project | + +## Key Design Principles + +- **Micro-file architecture**: Steps loaded one at a time to prevent LLM "lost in middle" issues +- **Human must approve** every step transition — no autonomous progression +- **Fresh conversations** per workflow to keep context clean +- **Scale-adaptive**: Quick Flow (1-15 stories), BMad Method (10-50+), Enterprise (30+) +- **`project-context.md`** acts as the "constitution" for consistent AI agent behavior + +## Standard Artifact Locations + +```text +_bmad-output/ + planning-artifacts/ + brainstorming-report.md + product-brief.md + research-*.md + PRD.md + ux-design-specification.md + architecture.md + epics.md (or epics/ directory) + sprint-change-proposal-*.md + implementation-artifacts/ + sprint-status.yaml + story-*.md + epic-*-retro-*.md + project-context.md +``` diff --git a/docs/publishing.md b/docs/publishing.md index 49dc71c..b4ad8ae 100644 --- a/docs/publishing.md +++ b/docs/publishing.md @@ -14,7 +14,7 @@ The `.github/workflows/publish.yml` workflow triggers on `v*` tags and publishes ## Publishing Setup -- **Trusted Publisher**: Configured on npm for `jrenaldi79/sidecar` + `publish.yml` (OIDC-based, no manual token management) -- **NPM_TOKEN**: Granular access token stored as GitHub secret (bypass 2FA enabled, scoped to `claude-sidecar`) +- **Trusted Publisher**: Configured on npm for `jrenaldi79/sidecar` + `publish.yml` (OIDC-based provenance attestation) +- **NPM_TOKEN**: Granular automation token stored as GitHub secret (scoped to `claude-sidecar`, required for `npm publish`) - **OIDC provenance**: `--provenance` flag adds Sigstore attestation (requires `id-token: write` permission) - **Trusted publisher config**: https://www.npmjs.com/package/claude-sidecar/access (Settings tab) diff --git a/electron/ipc-setup.js b/electron/ipc-setup.js index a59d315..f077e58 100644 --- a/electron/ipc-setup.js +++ b/electron/ipc-setup.js @@ -40,10 +40,14 @@ function registerSetupHandlers(ipcMain, getMainWindow) { const { removeApiKey } = require('../src/utils/api-key-store'); const { removeFromAuthJson } = require('../src/utils/auth-json'); const result = removeApiKey(provider); - // Always clean auth.json too — prevents auto-import from re-adding the key + // Clean auth.json when present — prevents auto-import from re-adding the key if (result.alsoInAuthJson) { - removeFromAuthJson(provider); - result.alsoInAuthJson = false; + try { + removeFromAuthJson(provider); + result.alsoInAuthJson = false; + } catch (authErr) { + logger.warn('Failed to remove from auth.json', { provider, error: authErr.message }); + } } return result; } catch (err) { @@ -99,21 +103,32 @@ function registerSetupHandlers(ipcMain, getMainWindow) { }); ipcMain.handle('sidecar:get-api-keys', () => { - const { readApiKeys, readApiKeyHints, saveApiKey } = require('../src/utils/api-key-store'); - const { importFromAuthJson } = require('../src/utils/auth-json'); - const status = readApiKeys(); - const hints = readApiKeyHints(); + try { + const { readApiKeys, readApiKeyHints, saveApiKey } = require('../src/utils/api-key-store'); + const { importFromAuthJson } = require('../src/utils/auth-json'); + const status = readApiKeys(); + const hints = readApiKeyHints(); - // Auto-import keys from auth.json that sidecar doesn't have yet - const { imported } = importFromAuthJson(status); - for (const entry of imported) { - saveApiKey(entry.provider, entry.key); - status[entry.provider] = true; - const visible = entry.key.slice(0, 8); - hints[entry.provider] = visible + '\u2022'.repeat(Math.max(0, Math.min(entry.key.length - 8, 12))); - } + // Auto-import keys from auth.json that sidecar doesn't have yet + const { imported } = importFromAuthJson(status); + const successfullyImported = []; + for (const entry of imported) { + const result = saveApiKey(entry.provider, entry.key); + if (result.success) { + status[entry.provider] = true; + const visible = entry.key.slice(0, 8); + hints[entry.provider] = visible + '\u2022'.repeat(Math.max(0, Math.min(entry.key.length - 8, 12))); + successfullyImported.push(entry.provider); + } else { + logger.warn('Failed to import key from auth.json', { provider: entry.provider, error: result.error }); + } + } - return { status, hints, imported: imported.map(e => e.provider) }; + return { status, hints, imported: successfullyImported }; + } catch (err) { + logger.error('get-api-keys handler error', { error: err.message }); + return { status: {}, hints: {}, imported: [], error: err.message }; + } }); ipcMain.handle('sidecar:fetch-models', async () => { diff --git a/hooks/hooks.json b/hooks/hooks.json new file mode 100644 index 0000000..d977780 --- /dev/null +++ b/hooks/hooks.json @@ -0,0 +1,38 @@ +{ + "description": "Sidecar activity monitoring hooks for auto-skill triggers", + "hooks": { + "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "__HOOKS_DIR__/pre-bash.sh" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "Edit|Write|Bash|MultiEdit", + "hooks": [ + { + "type": "command", + "command": "__HOOKS_DIR__/post-tool-use.sh" + } + ] + } + ], + "PostToolUseFailure": [ + { + "matcher": "Bash|Edit|Write|MultiEdit", + "hooks": [ + { + "type": "command", + "command": "__HOOKS_DIR__/post-failure.sh" + } + ] + } + ] + } +} diff --git a/hooks/post-failure.sh b/hooks/post-failure.sh new file mode 100755 index 0000000..c5fbfe7 --- /dev/null +++ b/hooks/post-failure.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# PostToolUseFailure hook: failure event collection. +# +# Reads JSON from stdin (Claude Code hook contract). +# Appends a failure event to session JSONL for Phase 2 analysis. +# No triggers in Phase 1 — purely collecting data for the Stop hook analyzer. + +set -euo pipefail + +# Safety guard +[ -f "$0" ] || exit 0 + +# ── Config check ────────────────────────────────────────────────────── +# Note: jq's // operator treats false as falsy, so we use explicit type checks +CONFIG_PATH="${HOME}/.config/sidecar/config.json" +if [ -f "$CONFIG_PATH" ] && command -v jq >/dev/null 2>&1; then + MASTER=$(jq -r '.autoSkills.enabled | if type == "boolean" then . else true end' "$CONFIG_PATH" 2>/dev/null || echo "true") + MONITORING=$(jq -r '.monitoring.enabled | if type == "boolean" then . else true end' "$CONFIG_PATH" 2>/dev/null || echo "true") + if [ "$MASTER" = "false" ] || [ "$MONITORING" = "false" ]; then + exit 0 + fi +fi + +# ── Read stdin to temp file (avoid ARG_MAX on large payloads) ───────── +if ! command -v jq >/dev/null 2>&1; then + exit 0 +fi + +TMP_JSON=$(mktemp 2>/dev/null) || exit 0 +trap 'rm -f "$TMP_JSON"' EXIT +cat > "$TMP_JSON" + +SESSION_ID=$(jq -r '.session_id // ""' "$TMP_JSON" 2>/dev/null || echo "") +if [ -z "$SESSION_ID" ]; then + exit 0 +fi + +# ── Append failure event ───────────────────────────────────────────── +# Sanitize SESSION_ID to alphanumeric/hyphens only (prevent path traversal) +SAFE_SID=$(printf '%s' "$SESSION_ID" | tr -cd 'a-zA-Z0-9_-') +if [ -z "$SAFE_SID" ]; then + exit 0 +fi +EVENT_FILE="${TMPDIR:-/tmp}/sidecar-monitor-${SAFE_SID}.jsonl" + +EVENT=$(jq -rc --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" ' + { + ts: $ts, + tool: (.tool_name // "unknown"), + file: (if .tool_name == "MultiEdit" then (.tool_input.edits[0].file_path // "") + else (.tool_input.file_path // "") end), + success: false, + command: (if .tool_name == "Bash" then (.tool_input.command // "") else "" end), + errorSnippet: ((.error // .tool_response.error // .tool_response.stderr // "")[0:500]) + }' "$TMP_JSON" 2>/dev/null || echo "") + +if [ -n "$EVENT" ]; then + # Atomically create file with restrictive permissions (noclobber prevents TOCTOU) + (umask 177 && set -o noclobber && : > "$EVENT_FILE" 2>/dev/null) || true + # Verify ownership before appending + FILE_OWNER=$(stat -c '%u' "$EVENT_FILE" 2>/dev/null || stat -f '%u' "$EVENT_FILE" 2>/dev/null || echo "") + if [ "$FILE_OWNER" = "$(id -u)" ]; then + # Cap event file at 5MB to prevent unbounded growth + MAX_SIZE=5242880 + FILE_SIZE=$(wc -c < "$EVENT_FILE" 2>/dev/null || echo 0) + if [ "$FILE_SIZE" -lt "$MAX_SIZE" ]; then + echo "$EVENT" >> "$EVENT_FILE" + fi + fi +fi diff --git a/hooks/post-tool-use.sh b/hooks/post-tool-use.sh new file mode 100755 index 0000000..a00c3d3 --- /dev/null +++ b/hooks/post-tool-use.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# PostToolUse hook: event collection + BMAD artifact trigger. +# +# Reads JSON from stdin (Claude Code hook contract). +# 1. Appends a structured event to session JSONL file (for Phase 2 analysis). +# 2. If a Write/Edit/MultiEdit targets _bmad-output/, injects additionalContext. +# +# Phase 1: shell-only, no Node.js dependency. + +set -euo pipefail + +# Safety guard +[ -f "$0" ] || exit 0 + +# ── Config check ────────────────────────────────────────────────────── +# Note: jq's // operator treats false as falsy, so we use explicit type checks +CONFIG_PATH="${HOME}/.config/sidecar/config.json" +MONITORING_ON="true" +BMAD_ON="true" +MASTER_ON="true" + +if [ -f "$CONFIG_PATH" ] && command -v jq >/dev/null 2>&1; then + MASTER_ON=$(jq -r '.autoSkills.enabled | if type == "boolean" then . else true end' "$CONFIG_PATH" 2>/dev/null || echo "true") + BMAD_ON=$(jq -r '.autoSkills.bmadMethodCheck.enabled | if type == "boolean" then . else true end' "$CONFIG_PATH" 2>/dev/null || echo "true") + MONITORING_ON=$(jq -r '.monitoring.enabled | if type == "boolean" then . else true end' "$CONFIG_PATH" 2>/dev/null || echo "true") +fi + +# If master auto-skills switch is off, skip everything +if [ "$MASTER_ON" = "false" ]; then + exit 0 +fi + +# ── Read stdin to temp file (avoid ARG_MAX on large payloads) ───────── +if ! command -v jq >/dev/null 2>&1; then + exit 0 +fi + +TMP_JSON=$(mktemp 2>/dev/null) || exit 0 +trap 'rm -f "$TMP_JSON"' EXIT +cat > "$TMP_JSON" + +TOOL_NAME=$(jq -r '.tool_name // ""' "$TMP_JSON" 2>/dev/null || echo "") +SESSION_ID=$(jq -r '.session_id // ""' "$TMP_JSON" 2>/dev/null || echo "") + +# ── Event collection ───────────────────────────────────────────────── +# Append structured event to session-specific JSONL file (controlled by monitoring.enabled) +# Sanitize SESSION_ID to alphanumeric/hyphens only (prevent path traversal) +SAFE_SID="" +if [ "$MONITORING_ON" != "false" ]; then + SAFE_SID=$(printf '%s' "$SESSION_ID" | tr -cd 'a-zA-Z0-9_-') +fi +if [ -n "$SAFE_SID" ]; then + EVENT_FILE="${TMPDIR:-/tmp}/sidecar-monitor-${SAFE_SID}.jsonl" + + # Cap event file at 5MB to prevent unbounded growth in long sessions + MAX_SIZE=5242880 + if [ -f "$EVENT_FILE" ] && [ "$(wc -c < "$EVENT_FILE" 2>/dev/null || echo 0)" -gt "$MAX_SIZE" ]; then + : # Skip collection — file too large + else + EVENT=$(jq -rc --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" ' + { + ts: $ts, + tool: (.tool_name // "unknown"), + file: (if .tool_name == "Bash" then "" + elif .tool_name == "MultiEdit" then (.tool_input.edits[0].file_path // "") + else (.tool_input.file_path // "") end), + success: (if .tool_name == "Bash" then ((.tool_response.exit_code // 0) == 0) + else true end), # non-Bash tools: hook only fires on success + command: (if .tool_name == "Bash" then ((.tool_input.command // "")[0:200] | gsub("[A-Za-z0-9+/=]{40,}"; "[REDACTED]")) else "" end) + }' "$TMP_JSON" 2>/dev/null || echo "") + + if [ -n "$EVENT" ]; then + # Atomically create file with restrictive permissions (noclobber prevents TOCTOU) + (umask 177 && set -o noclobber && : > "$EVENT_FILE" 2>/dev/null) || true + # Verify ownership before appending + FILE_OWNER=$(stat -c '%u' "$EVENT_FILE" 2>/dev/null || stat -f '%u' "$EVENT_FILE" 2>/dev/null || echo "") + if [ "$FILE_OWNER" = "$(id -u)" ]; then + echo "$EVENT" >> "$EVENT_FILE" + fi + fi + fi +fi + +# ── BMAD artifact trigger ──────────────────────────────────────────── +# Only fire if master + bmadMethodCheck are enabled +if [ "$MASTER_ON" = "false" ] || [ "$BMAD_ON" = "false" ]; then + exit 0 +fi + +# Check if a Write, Edit, or MultiEdit targeted _bmad-output/ +# MultiEdit uses .tool_input.edits[].file_path; Write/Edit use .tool_input.file_path +if [ "$TOOL_NAME" = "Write" ] || [ "$TOOL_NAME" = "Edit" ] || [ "$TOOL_NAME" = "MultiEdit" ]; then + HAS_BMAD=$(jq -r ' + [.tool_input.file_path, (.tool_input.edits[]?.file_path)] + | map(select(. != null and contains("_bmad-output/"))) + | length > 0' "$TMP_JSON" 2>/dev/null || echo "false") + + if [ "$HAS_BMAD" = "true" ]; then + # Build JSON safely via jq to avoid injection from filenames with quotes/backslashes + jq -n --arg file "$(jq -r ' + [.tool_input.file_path, (.tool_input.edits[]?.file_path)] + | map(select(. != null and contains("_bmad-output/"))) + | first' "$TMP_JSON" 2>/dev/null || echo "_bmad-output/")" \ + '{hookSpecificOutput:{additionalContext:("A BMAD-METHOD artifact was just written or updated at " + $file + ". Consider running the sidecar-auto-bmad-method-check skill to get a second-opinion review from another model before finalizing this artifact. You can invoke it with: use the Skill tool with skill \u0027sidecar-auto-bmad-method-check\u0027. If the user has already reviewed this artifact or explicitly declined a check, proceed without one.")}}' + exit 0 + fi +fi diff --git a/hooks/pre-bash.sh b/hooks/pre-bash.sh new file mode 100755 index 0000000..5ed7d3d --- /dev/null +++ b/hooks/pre-bash.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# PreToolUse hook: intercept git commit/push/PR creation for auto-security. +# +# Reads JSON from stdin (Claude Code hook contract). +# If the Bash command is a git commit, git push, or gh pr create, +# injects additionalContext recommending auto-security scan. +# +# Phase 1: shell-only, no Node.js dependency. + +set -euo pipefail + +# Safety guard: exit if this script was removed but hook still registered +[ -f "$0" ] || exit 0 + +# ── Config check ────────────────────────────────────────────────────── +# Note: jq's // operator treats false as falsy, so we use explicit type checks +CONFIG_PATH="${HOME}/.config/sidecar/config.json" +if [ -f "$CONFIG_PATH" ] && command -v jq >/dev/null 2>&1; then + MASTER=$(jq -r '.autoSkills.enabled | if type == "boolean" then . else true end' "$CONFIG_PATH" 2>/dev/null || echo "true") + SECURITY=$(jq -r '.autoSkills.security.enabled | if type == "boolean" then . else true end' "$CONFIG_PATH" 2>/dev/null || echo "true") + if [ "$MASTER" = "false" ] || [ "$SECURITY" = "false" ]; then + exit 0 + fi +fi + +# ── Read stdin to temp file (avoid ARG_MAX on large payloads) ───────── +# If mktemp fails (disk full, permissions), allow the command through +# rather than blocking git operations with a non-zero exit. +TMP_JSON=$(mktemp 2>/dev/null) || exit 0 +trap 'rm -f "$TMP_JSON"' EXIT +cat > "$TMP_JSON" + +# Extract the Bash command from tool_input.command +COMMAND="" +if command -v jq >/dev/null 2>&1; then + COMMAND=$(jq -r '.tool_input.command // ""' "$TMP_JSON" 2>/dev/null || echo "") +fi + +# If jq not available or command empty, allow through +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# ── Pattern match ───────────────────────────────────────────────────── +# Check for git commit, git push, gh pr create +# Uses printf (not echo) to avoid flag injection with commands starting with -n/-e +# Includes semicolons in boundary pattern to catch "git add .; git commit" +IS_COMMIT=false +if printf '%s\n' "$COMMAND" | grep -qE '(^|[;&|]+[[:space:]]*)git[[:space:]]+commit([[:space:]]|$)'; then + IS_COMMIT=true +fi +if printf '%s\n' "$COMMAND" | grep -qE '(^|[;&|]+[[:space:]]*)git[[:space:]]+push([[:space:]]|$)'; then + IS_COMMIT=true +fi +if printf '%s\n' "$COMMAND" | grep -qE '(^|[;&|]+[[:space:]]*)gh[[:space:]]+pr[[:space:]]+create([[:space:]]|$)'; then + IS_COMMIT=true +fi + +if [ "$IS_COMMIT" = "false" ]; then + exit 0 +fi + +# ── Output recommendation ──────────────────────────────────────────── +# Use hookSpecificOutput.additionalContext to reach Claude's context +cat <<'HOOK_OUTPUT' +{ + "hookSpecificOutput": { + "additionalContext": "IMPORTANT: A git commit, push, or PR creation was detected. Before proceeding, consider running the sidecar-auto-security skill to scan these changes for security vulnerabilities (hardcoded secrets, injection flaws, auth bypass). You can invoke it with: use the Skill tool with skill 'sidecar-auto-security'. If the user has explicitly declined a security scan for these changes, proceed without one." + } +} +HOOK_OUTPUT diff --git a/package.json b/package.json index 268c667..3d3dd16 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,9 @@ "src/", "electron/", "skill/", - "scripts/postinstall.js" + "hooks/", + "scripts/postinstall.js", + "scripts/preuninstall.js" ], "scripts": { "start": "node --experimental-top-level-await --experimental-vm-modules bin/sidecar.js", @@ -44,6 +46,8 @@ "posttest": "git rev-parse HEAD > .test-passed 2>/dev/null || true", "lint": "eslint src/", "postinstall": "node scripts/postinstall.js", + "preuninstall": "node scripts/preuninstall.js", + "uninstall": "node scripts/preuninstall.js", "test:thinking": "node scripts/benchmark-thinking.js", "test:thinking:quick": "MODELS=gemini node scripts/benchmark-thinking.js", "refresh-models": "node scripts/refresh-model-capabilities.js", diff --git a/scripts/generate-docs-helpers.js b/scripts/generate-docs-helpers.js index b5bda0e..f726bc4 100644 --- a/scripts/generate-docs-helpers.js +++ b/scripts/generate-docs-helpers.js @@ -204,7 +204,7 @@ function collectModules(dirPath, rows, relPrefix = '') { rows.push({ module: modulePath, purpose: desc || '', - exports: exps.map(e => `\`${e}()\``).join(', '), + exports: exps.map(e => `\`${e}\``).join(', '), }); } diff --git a/scripts/postinstall.js b/scripts/postinstall.js index 2f6b7d4..7b980cf 100644 --- a/scripts/postinstall.js +++ b/scripts/postinstall.js @@ -6,6 +6,7 @@ * 1. Copies SKILL.md to ~/.claude/skills/sidecar/ * 2. Registers MCP server in Claude Code (~/.claude.json) * 3. Registers MCP server in Claude Desktop/Cowork config + * 4. Registers activity monitoring hooks in ~/.claude/settings.json */ const fs = require('fs'); @@ -13,10 +14,22 @@ const path = require('path'); const os = require('os'); const { execFileSync } = require('child_process'); -const SKILL_SOURCE = path.join(__dirname, '..', 'skill', 'SKILL.md'); +const SKILL_DIR = path.join(__dirname, '..', 'skill'); +const SKILL_SOURCE = path.join(SKILL_DIR, 'SKILL.md'); const SKILL_DEST_DIR = path.join(os.homedir(), '.claude', 'skills', 'sidecar'); const SKILL_DEST = path.join(SKILL_DEST_DIR, 'SKILL.md'); +let AUTO_SKILLS = []; +try { + AUTO_SKILLS = fs + .readdirSync(SKILL_DIR, { withFileTypes: true }) + .filter((entry) => entry.isDirectory() && entry.name.startsWith('auto-')) + .map((entry) => entry.name) + .sort(); +} catch { + // skill/ directory missing — continue with empty list +} + const MCP_CONFIG = { command: 'npx', args: ['-y', 'claude-sidecar@latest', 'mcp'] }; /** @@ -50,7 +63,7 @@ function addMcpToConfigFile(configPath, name, config) { return status; } -/** Install skill file to ~/.claude/skills/sidecar/ */ +/** Install skill files to ~/.claude/skills/ */ function installSkill() { try { fs.mkdirSync(SKILL_DEST_DIR, { recursive: true }); @@ -59,6 +72,32 @@ function installSkill() { } catch (err) { console.error(`[claude-sidecar] Warning: Could not install skill: ${err.message}`); } + + const skillsRoot = path.join(os.homedir(), '.claude', 'skills'); + for (const name of AUTO_SKILLS) { + try { + const src = path.join(SKILL_DIR, name, 'SKILL.md'); + // Install as top-level skill (e.g., ~/.claude/skills/sidecar-auto-review/) + // so Claude Code discovers it in the available skills list + const destDir = path.join(skillsRoot, `sidecar-${name}`); + fs.mkdirSync(destDir, { recursive: true }); + fs.copyFileSync(src, path.join(destDir, 'SKILL.md')); + console.log(`[claude-sidecar] Skill installed: sidecar-${name}`); + + // Clean up old nested location (~/.claude/skills/sidecar//) + const oldDir = path.join(SKILL_DEST_DIR, name); + try { + if (fs.existsSync(path.join(oldDir, 'SKILL.md'))) { + fs.unlinkSync(path.join(oldDir, 'SKILL.md')); + fs.rmdirSync(oldDir); + } + } catch { + // Old location doesn't exist or already cleaned — ignore + } + } catch (err) { + console.error(`[claude-sidecar] Warning: Could not install ${name} skill: ${err.message}`); + } + } } /** Register MCP server in Claude Code config */ @@ -110,11 +149,111 @@ function registerClaudeDesktop() { } } +/** + * Merge sidecar hook matchers into settings, deduplicating by script basename. + * Only counts a hook as "registered" if it actually changed. + * Uses full command path comparison to avoid removing unrelated user hooks. + * @param {object} settings - Claude settings object (mutated in place) + * @param {object} hooksConfig - Parsed hooks.json with resolved paths + * @returns {number} Count of newly registered or changed hooks + */ +function mergeHooks(settings, hooksConfig) { + if (!settings.hooks) { settings.hooks = {}; } + let registered = 0; + + for (const [event, matchers] of Object.entries(hooksConfig.hooks || {})) { + if (!settings.hooks[event]) { settings.hooks[event] = []; } + + for (const matcher of matchers) { + const cmd = (matcher.hooks && matcher.hooks[0] && matcher.hooks[0].command) || ''; + // Check if this exact hook already exists (same command path AND same matcher) + const exactMatch = settings.hooks[event].some((existing) => { + return existing.matcher === matcher.matcher && + existing.hooks && existing.hooks.some((h) => h.command === cmd); + }); + if (exactMatch) { continue; } + // Remove old sidecar hook commands from existing matchers (upgrade scenario) + const basename = path.basename(cmd); + settings.hooks[event] = settings.hooks[event] + .map((existing) => { + if (!existing.hooks) { return existing; } + const filtered = existing.hooks.filter((h) => { + if (typeof h.command !== 'string') { return true; } + if (h.command === cmd) { return false; } + const hBase = path.basename(h.command); + return !(hBase === basename && h.command.includes('claude-sidecar')); + }); + return { ...existing, hooks: filtered }; + }) + .filter((existing) => !existing.hooks || existing.hooks.length > 0); + settings.hooks[event].push(matcher); + registered++; + } + } + return registered; +} + +/** + * Register activity monitoring hooks in ~/.claude/settings.json. + * Resolves absolute paths to hook scripts at install time. + */ +function registerHooks() { + const hooksDir = path.join(__dirname, '..', 'hooks'); + const hooksConfigPath = path.join(hooksDir, 'hooks.json'); + if (!fs.existsSync(hooksConfigPath)) { return; } + + let hooksConfig; + try { + const raw = fs.readFileSync(hooksConfigPath, 'utf-8'); + // Use JSON.stringify to properly escape all special chars (quotes, backslashes, etc.) + const safeHooksDir = JSON.stringify(hooksDir).slice(1, -1); + hooksConfig = JSON.parse(raw.replace(/__HOOKS_DIR__/g, () => safeHooksDir)); + } catch (err) { + console.error(`[claude-sidecar] Warning: Could not read hooks config: ${err.message}`); + return; + } + + const settingsPath = path.join(os.homedir(), '.claude', 'settings.json'); + let settings = {}; + try { + settings = JSON.parse(fs.readFileSync(settingsPath, 'utf-8')); + } catch { + // File doesn't exist or invalid — start fresh + } + + const registered = mergeHooks(settings, hooksConfig); + if (registered > 0) { + const settingsDir = path.dirname(settingsPath); + if (!fs.existsSync(settingsDir)) { + fs.mkdirSync(settingsDir, { recursive: true, mode: 0o700 }); + } + fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2), { mode: 0o600 }); + console.log(''); + console.log(`[claude-sidecar] Activity monitoring hooks registered (${registered}):`); + console.log(' - PreToolUse: auto-security gate (git commit/push/PR)'); + console.log(' - PostToolUse: BMAD artifact trigger + event collection'); + console.log(' - PostToolUseFailure: failure event collection'); + console.log(' To disable: sidecar auto-skills --off'); + } +} + function main() { console.log('[claude-sidecar] Installing...'); installSkill(); registerClaudeCode(); registerClaudeDesktop(); + registerHooks(); + + // Warn if jq is not available (hooks degrade gracefully but lose functionality) + try { + execFileSync('jq', ['--version'], { stdio: 'pipe', timeout: 5000 }); + } catch { + console.log(''); + console.log('[claude-sidecar] Warning: `jq` is not installed.'); + console.log(' Activity monitoring hooks require jq for JSON parsing.'); + console.log(' Install it: brew install jq (macOS) or apt install jq (Linux)'); + console.log(' Without jq, hooks will degrade gracefully (auto-skills still work via description-matching).'); + } console.log(''); console.log('[claude-sidecar] Setup:'); @@ -127,4 +266,4 @@ if (require.main === module) { main(); } -module.exports = { addMcpToConfigFile }; +module.exports = { addMcpToConfigFile, mergeHooks, registerHooks }; diff --git a/scripts/preuninstall.js b/scripts/preuninstall.js new file mode 100644 index 0000000..dd132b1 --- /dev/null +++ b/scripts/preuninstall.js @@ -0,0 +1,66 @@ +#!/usr/bin/env node + +/** + * Pre-uninstall script for claude-sidecar + * + * Removes activity monitoring hooks from ~/.claude/settings.json + * that were registered by postinstall.js. Leaves other user hooks intact. + */ + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +// Hook script basenames registered by postinstall — used to identify sidecar hooks during removal. +const SIDECAR_HOOK_SCRIPTS = ['pre-bash.sh', 'post-tool-use.sh', 'post-failure.sh']; + +function isSidecarHookCommand(command) { + if (!command || typeof command !== 'string') { return false; } + // Require both matching basename AND a sidecar-related path to avoid removing user hooks + const basename = path.basename(command); + return SIDECAR_HOOK_SCRIPTS.includes(basename) && command.includes('claude-sidecar'); +} + +function removeHooks() { + const settingsPath = path.join(os.homedir(), '.claude', 'settings.json'); + let settings; + try { + settings = JSON.parse(fs.readFileSync(settingsPath, 'utf-8')); + } catch { + return; // No settings file — nothing to clean up + } + + if (!settings.hooks) { return; } + + let removed = 0; + for (const [event, matchers] of Object.entries(settings.hooks)) { + if (!Array.isArray(matchers)) { continue; } + const filtered = matchers.filter((matcher) => { + const hooks = matcher.hooks || []; + const isSidecar = hooks.some((h) => isSidecarHookCommand(h.command)); + if (isSidecar) { removed++; } + return !isSidecar; + }); + if (filtered.length === 0) { + delete settings.hooks[event]; + } else { + settings.hooks[event] = filtered; + } + } + + // Clean up empty hooks object + if (Object.keys(settings.hooks).length === 0) { + delete settings.hooks; + } + + if (removed > 0) { + fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2), { mode: 0o600 }); + console.log(`[claude-sidecar] Removed ${removed} hook(s) from ~/.claude/settings.json`); + } +} + +if (require.main === module) { + removeHooks(); +} + +module.exports = { removeHooks }; diff --git a/site/social-card-render.html b/site/social-card-render.html index b081ccc..e2d6f86 100644 --- a/site/social-card-render.html +++ b/site/social-card-render.html @@ -1,7 +1,8 @@ - + +Claude Sidecar - Social Card