diff --git a/.github/workflows/recording-eval.yaml b/.github/workflows/recording-eval.yaml index eedb9b2..e547a74 100644 --- a/.github/workflows/recording-eval.yaml +++ b/.github/workflows/recording-eval.yaml @@ -80,9 +80,9 @@ jobs: with: header: recording-eval message: | - ## Recording Evaluation - - 🎬 **[View recording comparisons](${{ steps.preview.outputs.preview-url }})** + ## Evidence + ### Screenshots and Recordings + **[View Recordings & Review](${{ steps.preview.outputs.preview-url }})** - name: Upload recordings if: github.event.action != 'closed' diff --git a/CLAUDE.md b/CLAUDE.md index 197f74b..ff7987e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -32,6 +32,10 @@ npm run dev # Development with hot-reload - **PTY** - Uses `node-pty` for pseudo-terminal management - **File Server** - HTTP file server runs on port 7498 regardless of MCP transport mode (stdio or HTTP). Tool results include `download_url` which is always valid. +## MCP Tools + +Tool names, descriptions, and parameters are registered in `src/index.ts` with schemas in `src/tools/`. The README.md [MCP Tools](#mcp-tools) section documents each tool with examples. **Any changes to tool schemas, parameters, or descriptions must be reflected in README.md.** + ## Commit Format Use conventional commits: `feat:`, `fix:`, `docs:`, `chore:`, `refactor:`, `test:` diff --git a/README.md b/README.md index 4f783e7..5d1734d 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,12 @@ Open [`k9s`](https://k9scli.io/) and show [Ark](https://github.com/mckinsey/agen ![Screenshot: Examples - K9S Agents](./docs/examples/k9s-agents.gif) +Take a bordered screenshot: + +> Open Claude Code. Take a screenshot with a macOS window border titled "Shellwright". + +![Screenshot: Examples - Bordered Screenshot](./docs/examples/claude-code-bordered.png) + Use [`htop`](https://github.com/htop-dev/htop): > Open htop and show the most resource intensive process. @@ -280,12 +286,13 @@ drwxr-xr-x 10 user staff 320 Dec 18 09:00 .. ### **shell_screenshot** -Capture terminal as PNG. Also saves SVG, ANSI, and plain text versions: +Capture terminal as PNG. Also saves SVG, ANSI, and plain text versions. Pass `name` without extension (`.png` is added automatically). Optionally add a macOS-style window border (off by default): ```json { "session_id": "shell-session-a1b2c3", - "name": "my-screenshot" + "name": "my-screenshot", + "border": { "style": "macos", "title": "Terminal" } } ``` @@ -301,12 +308,13 @@ The response contains a `download_url` for curl to save the file locally: ### **shell_record_start** -Start recording frames for GIF export. Frames are captured at the specified FPS (default 10, max 30, compression occurs by deduplicating identical frames): +Start recording frames for GIF export. Frames are captured at the specified FPS (default 10, max 30, compression occurs by deduplicating identical frames). The optional `border` parameter (same as [`shell_screenshot`](#shell_screenshot)) applies window chrome to every frame: ```json { "session_id": "shell-session-a1b2c3", - "fps": 10 + "fps": 10, + "border": { "style": "macos", "title": "Terminal" } } ``` diff --git a/docs/examples/claude-code-bordered.png b/docs/examples/claude-code-bordered.png new file mode 100644 index 0000000..fc3e871 Binary files /dev/null and b/docs/examples/claude-code-bordered.png differ diff --git a/evaluations/README.md b/evaluations/README.md index 62107ee..4cc4332 100644 --- a/evaluations/README.md +++ b/evaluations/README.md @@ -1,6 +1,6 @@ # Evaluations -Automated recording evaluations using Claude API with shellwright. +Automated recording and screenshot evaluations using Claude API with shellwright. ## Usage @@ -9,24 +9,87 @@ Automated recording evaluations using Claude API with shellwright. ```bash # Requires ANTHROPIC_API_KEY npm run eval + +# Run a single scenario +npm run eval -- screenshot-border ``` ### Generate comparison table ```bash npm run eval:compare +open scenarios/index.html ``` ## Adding a new scenario 1. Create a folder in `scenarios/` 2. Add a `prompt.md` with instructions for Claude -3. Run evaluations to generate the recording +3. Run evaluations to generate artifacts (GIFs, PNGs) + +## Baselines + +Baselines are reference artifacts committed to the repo for visual comparison. Each artifact `.` can have two baselines: + +| File | Source | +|------|--------| +| `baseline-local-.` | Developer machine | +| `baseline-cicd-.` | CI environment | + +### Updating baselines + +**Local baseline:** Run the eval locally and copy the output: + +```bash +npm run eval -- screenshot-border +cp scenarios/screenshot-border/screenshot.png scenarios/screenshot-border/baseline-local-screenshot.png +``` + +**CI/CD baseline:** Download the artifact from the PR preview and commit it: + +```bash +curl -o scenarios/vim-session/baseline-cicd-recording.gif \ + https://dwmkerr.github.io/shellwright/pr-preview/pr-XX/vim-session/recording.gif +``` + +The comparison page auto-discovers baselines by scanning for `baseline-{local,cicd}-*` files matching each artifact. + +## MCP tools available in scenarios + +Scenario prompts instruct Claude to use these shellwright MCP tools: + +| Tool | Description | +|------|-------------| +| `shell_start` | Start a new PTY session with a command | +| `shell_send` | Send input to a PTY session (use `\r` for Enter) | +| `shell_read` | Read the current terminal buffer as plain text | +| `shell_screenshot` | Capture terminal screenshot as PNG | +| `shell_record_start` | Start recording a terminal session (captures frames for GIF) | +| `shell_record_stop` | Stop recording and save GIF | +| `shell_stop` | Stop a PTY session | + +### Key parameters + +**`shell_start`** — `command`, `args`, `cols`, `rows`, `theme` (e.g., `one-dark`) + +**`shell_send`** — `input` (with escape sequences: `\r`=Enter, `\x1b`=Escape, `\x03`=Ctrl+C) + +**`shell_screenshot`** — `name` (without extension), `border: { style: "macos", title: "..." }` + +**`shell_record_start`** — `fps` (default: 10, max: 30) + +**`shell_record_stop`** — `name` (without extension, `.gif` added automatically) + +### Artifact naming + +Tools append extensions automatically — pass names **without** extensions: +- `name: "recording"` → `recording.gif` +- `name: "screenshot"` → `screenshot.png` ## CI Integration The `recording-eval.yaml` workflow runs on every PR: 1. Executes all scenarios 2. Generates comparison table -3. Uploads recordings as artifacts -4. Posts summary to PR +3. Deploys to GitHub Pages as PR preview +4. Uploads GIF and PNG artifacts diff --git a/evaluations/scenarios/screenshot-border/baseline-cicd-screenshot-bordered.png b/evaluations/scenarios/screenshot-border/baseline-cicd-screenshot-bordered.png new file mode 100644 index 0000000..0ad1058 Binary files /dev/null and b/evaluations/scenarios/screenshot-border/baseline-cicd-screenshot-bordered.png differ diff --git a/evaluations/scenarios/screenshot-border/baseline-cicd-screenshot.png b/evaluations/scenarios/screenshot-border/baseline-cicd-screenshot.png new file mode 100644 index 0000000..a983b4b Binary files /dev/null and b/evaluations/scenarios/screenshot-border/baseline-cicd-screenshot.png differ diff --git a/evaluations/template.html b/evaluations/template.html index e4581f4..639a1c6 100644 --- a/evaluations/template.html +++ b/evaluations/template.html @@ -3,7 +3,7 @@ - Recording Evaluation + Evidence: Screenshots and Recordings -

Recording Evaluation

+

Evidence: Screenshots and Recordings

Baselines: Local = developer machine, CI/CD = previous CI run.
To update a baseline: download the PR artifact and commit as baseline-local-<name>.ext or baseline-cicd-<name>.ext diff --git a/src/index.ts b/src/index.ts index 57f0b59..cee2658 100644 --- a/src/index.ts +++ b/src/index.ts @@ -202,7 +202,7 @@ Tips: server.tool( "shell_screenshot", - "Capture terminal screenshot as PNG. Returns a download_url - use curl to save the file locally (e.g., curl -o screenshot.png )", + "Capture terminal screenshot as PNG. Optionally add a macOS-style window border with border: { style: \"macos\", title: \"...\" } (off by default). Returns a download_url - use curl to save the file locally (e.g., curl -o screenshot.png )", shellScreenshotSchema, async (params) => shellScreenshot(params, toolContext) ); @@ -216,7 +216,7 @@ Tips: server.tool( "shell_record_start", - "Start recording a terminal session (captures frames for GIF/video export)", + "Start recording a terminal session (captures frames for GIF export). Optionally add a macOS-style window border to every frame with border: { style: \"macos\", title: \"...\" } (off by default).", shellRecordStartSchema, async (params) => shellRecordStart(params, toolContext) ); diff --git a/src/tools/shell-record-start.ts b/src/tools/shell-record-start.ts index 2525e70..0ed0551 100644 --- a/src/tools/shell-record-start.ts +++ b/src/tools/shell-record-start.ts @@ -8,13 +8,17 @@ import { ToolContext } from "./types.js"; export const shellRecordStartSchema = { session_id: z.string().describe("Session ID"), fps: z.number().optional().describe("Frames per second (default: 10, max: 30)"), + border: z.object({ + style: z.enum(["macos"]).describe("Border style"), + title: z.string().optional().describe("Title text in the title bar"), + }).optional().describe("Optional window border decoration applied to every frame"), }; export async function shellRecordStart( - params: { session_id: string; fps?: number }, + params: { session_id: string; fps?: number; border?: { style: "macos"; title?: string } }, context: ToolContext ) { - const { session_id, fps } = params; + const { session_id, fps, border } = params; const session = context.sessions.get(session_id); if (!session) { throw new Error(`Session not found: ${session_id}`); @@ -34,14 +38,16 @@ export async function shellRecordStart( framesDir, frameCount: 0, fps: recordingFps, + border, interval: setInterval(async () => { if (!session.recording) return; const frameNum = session.recording.frameCount++; - const svg = bufferToSvg(session.terminal, session.cols, session.rows, { - theme: session.theme, - fontSize: context.config.FONT_SIZE, - fontFamily: context.config.FONT_FAMILY + const svg = bufferToSvg(session.terminal, session.cols, session.rows, { + theme: session.theme, + fontSize: context.config.FONT_SIZE, + fontFamily: context.config.FONT_FAMILY, + border, }); const png = new Resvg(svg, context.resvgOptions).render().asPng(); const framePath = path.join(framesDir, `frame${String(frameNum).padStart(6, "0")}.png`); diff --git a/src/tools/types.ts b/src/tools/types.ts index d41a09f..4c1b721 100644 --- a/src/tools/types.ts +++ b/src/tools/types.ts @@ -8,6 +8,7 @@ export interface RecordingState { frameCount: number; interval: ReturnType; fps: number; + border?: { style: string; title?: string }; } export interface Session {