From 137da255b2abc648bc9cb68e7fce308585b40caa Mon Sep 17 00:00:00 2001 From: William Weishuhn Date: Mon, 23 Mar 2026 12:23:51 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20audit=20fixes=20=E2=80=94=20MCP=20tools,?= =?UTF-8?q?=20telemetry,=20matrix=20wiring,=20docs,=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses 9 issues from codebase audit: Critical: - Expose lock_verify, get_history, ci_report as MCP server tools - Wire telemetry enrichment fields into lock/history/ci-report/scan commands - Wire matrix comment renderer into scan --format pr-comment-matrix High: - Add score dedup sync comments between src/score.ts and api/worker.ts - Update README with lock, history, ci-report, badge, score commands - Add 5 CLI integration tests for lock, history, ci-report commands Medium: - Add 10 security tests for validateArgs/validatePath - Add status note to github-app/README.md - Add rationale comments to scoring weights and performance thresholds 302/302 tests pass. Build and lint clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 39 +++++++++++ action/README.md | 4 +- api/src/worker.ts | 18 ++--- github-app/README.md | 2 + src/commands/ci-report.ts | 8 +++ src/commands/history.ts | 5 ++ src/commands/lock.ts | 15 +++++ src/commands/scan.ts | 25 +++++-- src/score.ts | 15 +++-- src/server.ts | 120 +++++++++++++++++++++++++++++++++- src/telemetry.ts | 1 + tests/cli-entrypoint.test.ts | 42 ++++++++++++ tests/server-security.test.ts | 46 ++++++++++++- 13 files changed, 319 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 870342e..567dbdd 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,12 @@ Or add it manually to your config: | `watch ` | Watch a server for changes, alert on regressions | | `suggest` | Detect your stack and recommend MCP servers from the registry | | `serve` | Start as an MCP server for AI agents | +| `lock` | Snapshot MCP server schemas into a lock file | +| `lock verify` | Verify live servers match the lock file | +| `history` | Show health score trends for your MCP servers | +| `ci-report` | Generate CI report for GitHub issue creation | +| `score ` | Score an MCP server's health (0-100) | +| `badge ` | Generate an SVG health score badge for README | Run with no arguments for an interactive menu: @@ -162,8 +168,41 @@ jobs: security: true ``` +Action inputs: + +| Input | Description | Default | +|-------|-------------|---------| +| `command` | Server command to test | (required if no `target`) | +| `target` | Path to target config JSON | | +| `targets` | Path to MCP config file for multi-server matrix scan | | +| `deep` | Also invoke safe tools | `false` | +| `security` | Run security analysis | `false` | +| `fail-on-regression` | Fail the action on issues | `true` | +| `comment-on-pr` | Post report as PR comment | `true` | +| `set-status` | Set a commit status check (green/red) on the HEAD SHA | `true` | +| `github-token` | Token for PR comments and commit statuses | `${{ github.token }}` | + The action runs checks on every PR, comments a markdown report, and blocks merge on regressions. See [`action/README.md`](./action/README.md) for all options. +### Lock Files + +```bash +$ npx @kryptosai/mcp-observatory lock # Snapshot all server schemas +$ npx @kryptosai/mcp-observatory lock verify # Verify no drift since last lock +``` + +### Trend Tracking + +```bash +$ npx @kryptosai/mcp-observatory history # Show health trends over time +``` + +### Nightly Scans + +```bash +$ npx @kryptosai/mcp-observatory ci-report # Generate regression report for CI +``` + ## MCP Server Mode **No other testing tool is itself an MCP server.** Add Observatory as a server and your AI agent can autonomously test, diagnose, and monitor your other MCP servers. diff --git a/action/README.md b/action/README.md index e647e31..6c1e734 100644 --- a/action/README.md +++ b/action/README.md @@ -29,7 +29,9 @@ jobs: | `security` | Run security analysis | `false` | | `fail-on-regression` | Fail the action on issues | `true` | | `comment-on-pr` | Post report as PR comment | `true` | -| `github-token` | Token for PR comments | `${{ github.token }}` | +| `set-status` | Set a commit status check (green/red) on the HEAD SHA | `true` | +| `targets` | Path to MCP config file for multi-server matrix scan | | +| `github-token` | Token for PR comments and commit statuses | `${{ github.token }}` | | `node-version` | Node.js version | `22` | ## Outputs diff --git a/api/src/worker.ts b/api/src/worker.ts index 7110fe3..544c338 100644 --- a/api/src/worker.ts +++ b/api/src/worker.ts @@ -134,9 +134,9 @@ interface RunArtifact { fatalError?: string; } -// --------------------------------------------------------------------------- -// Score computation (ported from src/score.ts) -// --------------------------------------------------------------------------- +// ── Score computation (duplicated from src/score.ts) ──────────────────────── +// IMPORTANT: This logic is duplicated from src/score.ts because the Worker +// can't import from the main package. Keep both files in sync when making changes. const STATUS_SCORES: Record = { pass: 100, @@ -214,6 +214,8 @@ function scorePerformance( ); const p95 = sorted[p95Index] ?? 0; + // p95 latency thresholds for performance scoring + // <500ms = excellent (100), <1s = good (80), <2s = acceptable (60), <5s = slow (40), >5s = poor (20) let score: number; if (p95 < 500) score = 100; else if (p95 < 1000) score = 80; @@ -237,11 +239,11 @@ function computeHealthScore( performanceMetrics?: PerformanceMetrics, ): HealthScore { const w = { - protocolCompliance: 0.3, - schemaQuality: 0.2, - security: 0.2, - reliability: 0.2, - performance: 0.1, + protocolCompliance: 0.3, // Highest — spec compliance is foundational for interop + schemaQuality: 0.2, // Good schemas enable AI agents to use tools correctly + security: 0.2, // Parity with quality — both critical for production use + reliability: 0.2, // Tools/prompts/resources actually responding as expected + performance: 0.1, // Lowest — latency matters less than correctness }; const dimensions: ScoreDimension[] = [ diff --git a/github-app/README.md b/github-app/README.md index 8f783cb..8c7203f 100644 --- a/github-app/README.md +++ b/github-app/README.md @@ -1,5 +1,7 @@ # MCP Observatory GitHub App +> **Status**: Planned feature — not yet deployed. This is the future hosted Observatory GitHub App. + A GitHub App that automatically analyzes MCP server configurations in pull requests and posts health score reports as PR comments. ## Setup diff --git a/src/commands/ci-report.ts b/src/commands/ci-report.ts index 230907f..095a9a7 100644 --- a/src/commands/ci-report.ts +++ b/src/commands/ci-report.ts @@ -2,6 +2,7 @@ import { readdir, readFile } from "node:fs/promises"; import path from "node:path"; import type { Command } from "commander"; import type { RunArtifact } from "../types.js"; +import { buildEvent, recordEvent } from "../telemetry.js"; import { validateRunArtifact } from "../validate.js"; import { defaultRunsDirectory } from "../storage.js"; @@ -96,6 +97,13 @@ export function registerCiReportCommands(program: Command): void { process.stdout.write(JSON.stringify(report, null, 2) + "\n"); } + recordEvent(buildEvent("command_complete", "ci-report", "cli", { + nightlyScan: true, + issueCreated: report.hasRegressions, + matrixServerCount: report.serverCount, + matrixFailCount: report.failCount, + })); + if (report.hasRegressions) { process.exitCode = 1; } diff --git a/src/commands/history.ts b/src/commands/history.ts index c8b35f2..ea39691 100644 --- a/src/commands/history.ts +++ b/src/commands/history.ts @@ -1,5 +1,6 @@ import type { Command } from "commander"; import { readHistory, getTrend, renderTrendLabel } from "../history.js"; +import { buildEvent, recordEvent } from "../telemetry.js"; import { ANSI, c } from "./helpers.js"; export function registerHistoryCommands(program: Command): void { @@ -58,5 +59,9 @@ export function registerHistoryCommands(program: Command): void { ` ${paddedId} ${c(gradeColor, current.grade)} (${current.healthScore}) ${label}\n`, ); } + + recordEvent(buildEvent("command_complete", "history", "cli", { + historyEntryCount: history.entries.length, + })); }); } diff --git a/src/commands/lock.ts b/src/commands/lock.ts index 55bfeb2..18207bd 100644 --- a/src/commands/lock.ts +++ b/src/commands/lock.ts @@ -1,6 +1,7 @@ import type { Command } from "commander"; import { scanForTargets } from "../discovery.js"; +import { buildEvent, recordEvent } from "../telemetry.js"; import { readLockFile, writeLockFile, @@ -82,6 +83,11 @@ export function registerLockCommands(program: Command): void { process.stdout.write( `\n ${c(ANSI.green, "✓")} Locked ${entries.length} server${entries.length === 1 ? "" : "s"} to ${lockPath}\n\n`, ); + + recordEvent(buildEvent("command_complete", "lock", "cli", { + lockFileExists: true, + lockServerCount: entries.length, + })); }); lockCmd @@ -109,6 +115,7 @@ export function registerLockCommands(program: Command): void { ); let anyFailed = false; + let totalDriftCount = 0; for (const t of targets) { const lockEntry = lockMap.get(t.config.targetId); @@ -129,6 +136,7 @@ export function registerLockCommands(program: Command): void { process.stdout.write(` ${c(ANSI.green, "✓")} ${t.config.targetId}\n`); } else { anyFailed = true; + totalDriftCount += result.drift.length; process.stdout.write(` ${c(ANSI.red, "✗")} ${t.config.targetId}\n`); for (const d of result.drift) { process.stdout.write( @@ -145,6 +153,13 @@ export function registerLockCommands(program: Command): void { process.stdout.write("\n"); + recordEvent(buildEvent("command_complete", "lock-verify", "cli", { + lockFileExists: true, + lockServerCount: lock.servers.length, + lockDriftDetected: anyFailed, + lockDriftCount: totalDriftCount, + })); + if (anyFailed) { process.exitCode = 1; } diff --git a/src/commands/scan.ts b/src/commands/scan.ts index 713909a..df8c634 100644 --- a/src/commands/scan.ts +++ b/src/commands/scan.ts @@ -7,12 +7,13 @@ import { } from "../index.js"; import { appendHistory, buildHistoryEntry } from "../history.js"; import { buildEvent, recordEvent } from "../telemetry.js"; +import type { RunArtifact } from "../types.js"; import { TOOL_VERSION } from "../version.js"; import { ANSI, LOGO, c, useColor } from "./helpers.js"; // ── Scan implementation ───────────────────────────────────────────────────── -async function runScan(bin: string, configPath: string | undefined, invokeTools: boolean, securityCheck?: boolean): Promise { +async function runScan(bin: string, configPath: string | undefined, invokeTools: boolean, securityCheck?: boolean, format?: string): Promise { const t0 = Date.now(); process.stdout.write(useColor() ? c(ANSI.cyan, LOGO) + ` ${c(ANSI.dim, `v${TOOL_VERSION}`)}\n\n` : LOGO + ` v${TOOL_VERSION}\n\n`); @@ -53,6 +54,7 @@ async function runScan(bin: string, configPath: string | undefined, invokeTools: } const results: ScanRow[] = []; + const artifacts: RunArtifact[] = []; const checkStatusMap: Record = {}; let passCount = 0; let failCount = 0; @@ -64,6 +66,7 @@ async function runScan(bin: string, configPath: string | undefined, invokeTools: process.stdout.write(` ${c(ANSI.dim, "⟳")} Checking ${c(ANSI.bold, t.config.targetId)}...`); try { const artifact = await runTarget(t.config, { invokeTools, securityCheck }); + artifacts.push(artifact); const toolsCheck = artifact.checks.find((ch) => ch.id === "tools"); const promptsCheck = artifact.checks.find((ch) => ch.id === "prompts"); const resourcesCheck = artifact.checks.find((ch) => ch.id === "resources"); @@ -164,6 +167,12 @@ async function runScan(bin: string, configPath: string | undefined, invokeTools: } process.stdout.write("\n"); + if (format === "pr-comment-matrix" && artifacts.length > 0) { + const { renderMatrixComment } = await import("../reporters/pr-comment-matrix.js"); + const rows = artifacts.map(a => ({ artifact: a })); + process.stdout.write(renderMatrixComment(rows) + "\n"); + } + recordEvent(buildEvent("command_complete", "scan", "cli", { serversScanned: results.length, toolsFound: totalTools, @@ -178,6 +187,9 @@ async function runScan(bin: string, configPath: string | undefined, invokeTools: t.config.adapter === "http" ? (t.config as { url: string }).url : `${(t.config as { command: string }).command} ${t.config.args.join(" ")}`, ), checkStatuses: checkStatusMap, + matrixServerCount: results.length, + matrixPassCount: passCount, + matrixFailCount: failCount, })); if (failCount > 0) { @@ -193,11 +205,12 @@ export function registerScanCommands(program: Command, bin: string): void { .description("Check all MCP servers in your Claude configs.") .option("--config ", "Path to a specific MCP config file.") .option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.") + .option("--format ", "Output format: terminal or pr-comment-matrix.", "terminal") .option("--no-color", "Disable colored output."); // `scan` with no subcommand — basic scan - scanCmd.action(async (options: { config?: string; security?: boolean }) => { - await runScan(bin, options.config, false, options.security); + scanCmd.action(async (options: { config?: string; security?: boolean; format: string }) => { + await runScan(bin, options.config, false, options.security, options.format); }); // `scan deep` — scan + invoke tools @@ -206,10 +219,12 @@ export function registerScanCommands(program: Command, bin: string): void { .description("Scan and also invoke safe tools to verify they execute.") .option("--config ", "Path to a specific MCP config file.") .option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.") - .action(async (options: { config?: string; security?: boolean }) => { + .option("--format ", "Output format: terminal or pr-comment-matrix.", "terminal") + .action(async (options: { config?: string; security?: boolean; format: string }) => { // Inherit parent config option if set const parentConfig = scanCmd.opts().config as string | undefined; const parentSecurity = scanCmd.opts().security as boolean | undefined; - await runScan(bin, options.config ?? parentConfig, true, options.security ?? parentSecurity ?? true); + const parentFormat = scanCmd.opts().format as string; + await runScan(bin, options.config ?? parentConfig, true, options.security ?? parentSecurity ?? true, options.format ?? parentFormat); }); } diff --git a/src/score.ts b/src/score.ts index 9256b7f..801306f 100644 --- a/src/score.ts +++ b/src/score.ts @@ -1,3 +1,6 @@ +// IMPORTANT: Scoring logic is duplicated in api/src/worker.ts for the Cloudflare Worker +// deployment (which can't import from src/). Keep both files in sync when making changes. + import type { CheckResult, HealthGrade, HealthScore, PerformanceMetrics, ScoreDimension } from "./types.js"; export interface ScoreWeights { @@ -9,11 +12,11 @@ export interface ScoreWeights { } export const DEFAULT_WEIGHTS: ScoreWeights = { - protocolCompliance: 0.30, - schemaQuality: 0.20, - security: 0.20, - reliability: 0.20, - performance: 0.10, + protocolCompliance: 0.30, // Highest — spec compliance is foundational for interop + schemaQuality: 0.20, // Good schemas enable AI agents to use tools correctly + security: 0.20, // Parity with quality — both critical for production use + reliability: 0.20, // Tools/prompts/resources actually responding as expected + performance: 0.10, // Lowest — latency matters less than correctness }; const STATUS_SCORES: Record = { @@ -80,6 +83,8 @@ function scorePerformance( const p95Index = Math.min(Math.ceil(sorted.length * 0.95) - 1, sorted.length - 1); const p95 = sorted[p95Index] ?? 0; + // p95 latency thresholds for performance scoring + // <500ms = excellent (100), <1s = good (80), <2s = acceptable (60), <5s = slow (40), >5s = poor (20) let score: number; if (p95 < 500) score = 100; else if (p95 < 1000) score = 80; diff --git a/src/server.ts b/src/server.ts index 9af2742..407256c 100644 --- a/src/server.ts +++ b/src/server.ts @@ -21,6 +21,9 @@ import type { RunArtifact } from "./types.js"; import { compareResponses } from "./verify.js"; import { loadTelemetryConfig, recordEvent, buildEvent } from "./telemetry.js"; import { TOOL_VERSION } from "./version.js"; +import { readLockFile, verifyAgainstLock } from "./lockfile.js"; +import { readHistory, getTrend, renderTrendLabel } from "./history.js"; +import { buildCiReport } from "./commands/ci-report.js"; // ── Security: Command Allowlist ──────────────────────────────────────────── // MCP server mode is invoked by an LLM, not an operator. Arbitrary command @@ -41,7 +44,7 @@ const ALLOWED_COMMANDS = new Set([ // Reject args containing shell metacharacters that could enable injection. const DANGEROUS_ARG_PATTERN = /[;|`]|\$\(|&&|\|\|/; -function validateArgs(args: string[]): void { +export function validateArgs(args: string[]): void { for (const arg of args) { if (DANGEROUS_ARG_PATTERN.test(arg)) { throw new Error( @@ -633,6 +636,121 @@ export async function startServer(): Promise { }, ); + server.tool( + "lock_verify", + "Verify that live MCP servers still match a previously saved lock file. Detects schema drift, added/removed tools, and breaking changes.", + { + config: z.string().optional().describe("Path to MCP config file."), + }, + async ({ config }) => { + const startMs = Date.now(); + try { + const lockFile = await readLockFile(); + const targets = await scanForTargets(config); + const results: string[] = []; + let anyFailed = false; + + for (const t of targets) { + const lockEntry = lockFile.servers.find(s => s.targetId === t.config.targetId); + if (!lockEntry) continue; + + const artifact = await runTarget(t.config); + const result = verifyAgainstLock(lockEntry, artifact); + if (result.passed) { + results.push(`✓ ${t.config.targetId}: no drift`); + } else { + anyFailed = true; + results.push(`✗ ${t.config.targetId}: ${result.drift.length} changes`); + for (const d of result.drift) { + results.push(` - ${d.category}: ${d.name} — ${d.change}`); + } + } + } + + if (results.length === 0) { + results.push("No servers in lock file match discovered targets."); + } + + logRequest("lock_verify", startMs, anyFailed); + return { content: [{ type: "text", text: results.join("\n") }] }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + logRequest("lock_verify", startMs, true); + return { content: [{ type: "text", text: `Lock verify failed: ${msg}` }], isError: true }; + } + }, + ); + + server.tool( + "get_history", + "Get health score trends for MCP servers from run history.", + { + target: z.string().optional().describe("Filter to a specific target ID."), + }, + async ({ target }) => { + const startMs = Date.now(); + try { + const history = await readHistory(); + let targetIds = [...new Set(history.entries.map(e => e.targetId))]; + if (target) targetIds = targetIds.filter(id => id === target); + + if (targetIds.length === 0) { + logRequest("get_history", startMs); + return { content: [{ type: "text", text: "No history found. Run a scan or test first." }] }; + } + + const lines: string[] = []; + for (const id of targetIds) { + const trend = getTrend(id, history); + if (!trend) continue; + const { current } = trend; + const label = renderTrendLabel(trend); + lines.push(`${id}: ${current.grade} (${current.healthScore}) ${label}`); + } + + logRequest("get_history", startMs); + return { content: [{ type: "text", text: lines.join("\n") }] }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + logRequest("get_history", startMs, true); + return { content: [{ type: "text", text: `History failed: ${msg}` }], isError: true }; + } + }, + ); + + server.tool( + "ci_report", + "Generate a CI regression report from run artifacts.", + { + artifactsDir: z.string().optional().describe("Directory containing run artifacts. Defaults to .mcp-observatory/runs/"), + }, + async ({ artifactsDir }) => { + const startMs = Date.now(); + try { + const { readdir, readFile } = await import("node:fs/promises"); + const dir = artifactsDir ?? path.join(process.cwd(), ".mcp-observatory", "runs"); + const files = await readdir(dir); + const artifacts: RunArtifact[] = []; + for (const f of files) { + if (!f.endsWith(".json")) continue; + try { + const raw = await readFile(path.join(dir, f), "utf8"); + const parsed = JSON.parse(raw) as Record; + if (parsed["artifactType"] === "run") artifacts.push(parsed as unknown as RunArtifact); + } catch { /* skip invalid */ } + } + + const report = buildCiReport(artifacts); + logRequest("ci_report", startMs); + return { content: [{ type: "text", text: JSON.stringify(report, null, 2) }] }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + logRequest("ci_report", startMs, true); + return { content: [{ type: "text", text: `CI report failed: ${msg}` }], isError: true }; + } + }, + ); + const transport = new StdioServerTransport(); await server.connect(transport); } diff --git a/src/telemetry.ts b/src/telemetry.ts index 51b5023..36e46a6 100644 --- a/src/telemetry.ts +++ b/src/telemetry.ts @@ -192,6 +192,7 @@ export function recordEvent(event: TelemetryEvent): void { method: "POST", headers: { "Content-Type": "application/json" }, body, + // 3s timeout — telemetry is fire-and-forget to avoid blocking user workflows signal: AbortSignal.timeout(3_000), }).catch(() => { // Silently ignore — telemetry must never block or fail visibly diff --git a/tests/cli-entrypoint.test.ts b/tests/cli-entrypoint.test.ts index 8a869c7..3b533ba 100644 --- a/tests/cli-entrypoint.test.ts +++ b/tests/cli-entrypoint.test.ts @@ -1,5 +1,7 @@ import { describe, expect, it } from "vitest"; import { execFileSync } from "node:child_process"; +import os from "node:os"; +import fs from "node:fs"; import path from "node:path"; const CLI = path.resolve("src/cli.ts"); @@ -151,4 +153,44 @@ describe("CLI entrypoint", () => { const { exitCode } = runCli(["nonexistent-command"]); expect(exitCode).not.toBe(0); }); + + // ── Lock commands ─────────────────────────────────────────────── + it("lock subcommand shows help", () => { + const { stdout, exitCode } = runCli(["lock", "--help"]); + expect(exitCode).toBe(0); + expect(stdout).toContain("lock"); + }); + + // ── History commands ──────────────────────────────────────────── + it("history subcommand shows help", () => { + const { stdout, exitCode } = runCli(["history", "--help"]); + expect(exitCode).toBe(0); + expect(stdout).toContain("history"); + }); + + it("history with no data shows empty message", () => { + const tmpDir = path.join(os.tmpdir(), `obs-test-${Date.now()}`); + fs.mkdirSync(tmpDir, { recursive: true }); + const { stdout, exitCode } = runCli(["history"], { cwd: tmpDir }); + expect(exitCode).toBe(0); + expect(stdout).toContain("No history"); + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + // ── CI Report commands ────────────────────────────────────────── + it("ci-report subcommand shows help", () => { + const { stdout, exitCode } = runCli(["ci-report", "--help"]); + expect(exitCode).toBe(0); + expect(stdout).toContain("ci-report"); + }); + + it("ci-report with empty dir outputs valid JSON", () => { + const tmpDir = path.join(os.tmpdir(), `obs-test-${Date.now()}`); + fs.mkdirSync(tmpDir, { recursive: true }); + const { stdout, exitCode } = runCli(["ci-report", "--artifacts-dir", tmpDir]); + expect(exitCode).toBe(0); + const parsed = JSON.parse(stdout) as Record; + expect(parsed).toHaveProperty("hasRegressions", false); + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); }); diff --git a/tests/server-security.test.ts b/tests/server-security.test.ts index 4cda481..bab7cee 100644 --- a/tests/server-security.test.ts +++ b/tests/server-security.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from "vitest"; -import { validateCommand, validatePath } from "../src/server.js"; +import { validateArgs, validateCommand, validatePath } from "../src/server.js"; describe("MCP Server Command Allowlist", () => { it("allows npx commands", () => { @@ -79,3 +79,47 @@ describe("Path Validation", () => { expect(() => validatePath("/tmp/runs-evil/file.json", "/tmp/runs")).toThrow(/resolves outside/); }); }); + +describe("validateArgs", () => { + it("rejects semicolon injection", () => { + expect(() => validateArgs(["; rm -rf /"])).toThrow(); + }); + + it("rejects backtick injection", () => { + expect(() => validateArgs(["`whoami`"])).toThrow(); + }); + + it("rejects command substitution", () => { + expect(() => validateArgs(["$(cat /etc/passwd)"])).toThrow(); + }); + + it("rejects && chaining", () => { + expect(() => validateArgs(["foo && bar"])).toThrow(); + }); + + it("rejects || chaining", () => { + expect(() => validateArgs(["foo || bar"])).toThrow(); + }); + + it("rejects pipe", () => { + expect(() => validateArgs(["foo | bar"])).toThrow(); + }); + + it("accepts normal arguments", () => { + expect(() => validateArgs(["--verbose", "/path/to/file", "hello world"])).not.toThrow(); + }); +}); + +describe("validatePath", () => { + it("rejects path traversal", () => { + expect(() => validatePath("../../../etc/passwd", "/home/user")).toThrow(); + }); + + it("rejects absolute escape", () => { + expect(() => validatePath("/etc/passwd", "/home/user")).toThrow(); + }); + + it("accepts paths within allowed root", () => { + expect(() => validatePath("/home/user/subdir/file.txt", "/home/user")).not.toThrow(); + }); +});