Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/checks/security.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,66 @@ function scanResponsesForCredentials(
return findings;
}

export function runLightweightSecurityCheck(
tools: Tool[],
target: TargetConfig,
): ObservedCheck {
const startedAt = performance.now();
const findings: SecurityFinding[] = [];

// Rule: no-auth-http (target-level)
const authFinding = checkNoAuthHttp(target);
if (authFinding) findings.push(authFinding);

// Tool-level rules against already-fetched tools
const toolInfos = tools.map(toolToInfo);
for (const tool of toolInfos) {
for (const rule of SECURITY_RULES) {
const finding = rule.match(tool);
if (finding) findings.push(finding);
}
}

// Determine status based on highest severity
const hasHigh = findings.some(f => f.severity === "high");
const hasMedium = findings.some(f => f.severity === "medium");
let status: "pass" | "partial" | "fail";
if (hasHigh) {
status = "fail";
} else if (hasMedium) {
status = "partial";
} else {
status = "pass";
}

const diagnostics = findings.map(f => `[${f.severity}] ${f.message}`);
const toolNames = [...new Set(findings.map(f => f.toolName))];

const message = findings.length === 0
? "No security issues detected (lightweight scan)."
: `Found ${findings.length} security finding(s): ${findings.filter(f => f.severity === "high").length} high, ${findings.filter(f => f.severity === "medium").length} medium, ${findings.filter(f => f.severity === "low").length} low.`;

const evidence: EvidenceSummary = {
endpoint: "security/scan-lite",
advertised: true,
responded: true,
minimalShapePresent: true,
itemCount: findings.length,
identifiers: toolNames.length > 0 ? toolNames : undefined,
diagnostics: diagnostics.length > 0 ? diagnostics : undefined,
};

return {
result: makeCheckResult(
"security-lite",
status,
performance.now() - startedAt,
message,
[evidence],
),
};
}

export async function runSecurityCheck(
context: CheckContext,
previousChecks: CheckResult[],
Expand Down
4 changes: 3 additions & 1 deletion src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ const MENU_GROUPS: MenuGroup[] = [
],
},
{
heading: "More",
heading: "CI / Regression Testing",
items: [
{ command: ["watch"], label: "watch", outcome: "Run a check, diff against previous, alert on regressions" },
{ command: ["record"], label: "record", outcome: "Capture a session for offline replay or CI" },
{ command: ["diff"], label: "diff", outcome: "Compare two runs for regressions" },
{ command: ["test"], label: "test", outcome: "Test a single server by command" },
Expand Down Expand Up @@ -221,6 +222,7 @@ async function main(): Promise<void> {
"",
` ${c(ANSI.bold, "CI / Regression Testing")}`,
"",
` ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} watch`)} ${c(ANSI.dim, "<cmd>")} Run check, diff against previous, alert regressions`,
` ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} record`)} ${c(ANSI.dim, "<cmd>")} Capture a session for offline replay`,
` ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} diff`)} ${c(ANSI.dim, "<a> <b>")} Compare two runs for regressions`,
` ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} badge`)} ${c(ANSI.dim, "<cmd>")} Generate a health badge for README`,
Expand Down
4 changes: 2 additions & 2 deletions src/commands/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ export function registerScanCommands(program: Command, bin: string): void {
.command("scan")
.description("Check all MCP servers in your Claude configs.")
.option("--config <path>", "Path to a specific MCP config file.")
.option("--security", "Run security analysis on tool schemas.")
.option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.")
.option("--no-color", "Disable colored output.");

// `scan` with no subcommand — basic scan
Expand All @@ -190,7 +190,7 @@ export function registerScanCommands(program: Command, bin: string): void {
.command("deep")
.description("Scan and also invoke safe tools to verify they execute.")
.option("--config <path>", "Path to a specific MCP config file.")
.option("--security", "Run security analysis on tool schemas.")
.option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.")
.action(async (options: { config?: string; security?: boolean }) => {
// Inherit parent config option if set
const parentConfig = scanCmd.opts().config as string | undefined;
Expand Down
2 changes: 1 addition & 1 deletion src/commands/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export function registerTestCommands(program: Command): void {
.passThroughOptions()
.description("Test a specific server by command.")
.argument("<command...>", "Server command and arguments to run.")
.option("--security", "Run security analysis on tool schemas.")
.option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.")
.option("--no-color", "Disable colored output.")
.action(async (commandArgs: string[], options: { security?: boolean }) => {
const t0 = Date.now();
Expand Down
73 changes: 63 additions & 10 deletions src/commands/watch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,50 @@ import {
writeRunArtifact,
type TargetConfig,
} from "../index.js";
import { readTargetConfig } from "./helpers.js";
import { isCI } from "../ci.js";
import { defaultRunsDirectory, findLatestArtifact, readArtifact } from "../storage.js";
import { ANSI, c, formatOutput, targetFromCommand } from "./helpers.js";

// ── Watch mode implementation ───────────────────────────────────────────────
// ── One-shot mode ────────────────────────────────────────────────────────────

async function runWatchOneShot(
target: TargetConfig,
outDir: string,
options: { format: string; failOnRegression: boolean },
): Promise<void> {
const { diffArtifacts: diff } = await import("../diff.js");

const artifact = await runTarget(target);
const outPath = await writeRunArtifact(artifact, outDir);

// Find the PREVIOUS run for this target (excluding the one just written)
const latestPath = await findLatestArtifact(outDir, target.targetId);
if (latestPath && latestPath !== outPath) {
const previousRaw = await readArtifact(latestPath);
if (previousRaw.artifactType === "run") {
const previous = previousRaw;
const diffResult = diff(previous, artifact);

process.stdout.write(formatOutput(diffResult, options.format as "terminal" | "json") + "\n");
process.stdout.write(`${c(ANSI.dim, `Artifact: ${outPath}`)}\n`);

if (options.failOnRegression && diffResult.summary.regressions > 0) {
process.exitCode = 1;
}
return;
}
}

// First run — no previous artifact to diff against
process.stdout.write(formatOutput(artifact, options.format as "terminal" | "json") + "\n");
process.stdout.write(`${c(ANSI.dim, `Artifact: ${outPath}`)}\n`);

if (artifact.gate === "fail") {
process.exitCode = 1;
}
}

// ── Continuous polling mode ──────────────────────────────────────────────────

async function runWatchMode(target: TargetConfig, outDir: string, intervalSeconds: number): Promise<void> {
const { diffArtifacts: diff } = await import("../diff.js");
Expand Down Expand Up @@ -48,18 +89,30 @@ async function runWatchMode(target: TargetConfig, outDir: string, intervalSecond

// ── Register ────────────────────────────────────────────────────────────────

export { runWatchMode };
export { runWatchMode, runWatchOneShot };

export function registerWatchCommands(program: Command): void {
program
.command("watch")
.description("Watch a server for changes, alert on regressions.")
.argument("<config>", "Path to a target config JSON file.")
.option("--interval <seconds>", "Check interval in seconds.", "30")
.passThroughOptions()
.description("Run a server check, diff against previous run, alert on regressions.")
.argument("<command...>", "Server command and arguments to run.")
.option("--interval <seconds>", "Continuous polling interval in seconds (omit for one-shot).")
.option("--format <format>", "Output format: terminal or json.", "terminal")
.option("--fail-on-regression", "Exit with code 1 on regressions.", isCI)
.option("--no-fail-on-regression", "Do not exit with code 1 on regressions.")
.option("--no-color", "Disable colored output.")
.action(async (configPath: string, options: { interval: string }) => {
const target = await readTargetConfig(configPath);
const outDir = (await import("../storage.js")).defaultRunsDirectory(process.cwd());
await runWatchMode(target, outDir, parseInt(options.interval, 10) || 30);
.action(async (commandArgs: string[], options: { interval?: string; format: string; failOnRegression: boolean }) => {
const target = targetFromCommand(commandArgs);
const outDir = defaultRunsDirectory(process.cwd());

if (options.interval) {
await runWatchMode(target, outDir, parseInt(options.interval, 10) || 30);
} else {
await runWatchOneShot(target, outDir, {
format: options.format,
failOnRegression: options.failOnRegression,
});
}
});
}
3 changes: 2 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export {
} from "./cassette.js";
export { runConformanceCheck } from "./checks/conformance.js";
export { runSchemaQualityCheck } from "./checks/schema-quality.js";
export { runSecurityCheck } from "./checks/security.js";
export { runLightweightSecurityCheck, runSecurityCheck } from "./checks/security.js";
export { SECURITY_RULES, type SecurityFinding, type SecurityRule, type ToolInfo } from "./checks/security-rules.js";
export { diffArtifacts } from "./diff.js";
export { scanForTargets } from "./discovery.js";
Expand All @@ -21,6 +21,7 @@ export { runTarget, runTargetRecording, type RunOptions, type RunResult } from "
export { computeHealthScore, type ScoreWeights, DEFAULT_WEIGHTS } from "./score.js";
export {
defaultRunsDirectory,
findLatestArtifact,
readArtifact,
writeRunArtifact
} from "./storage.js";
Expand Down
11 changes: 10 additions & 1 deletion src/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { runPromptsCheck } from "./checks/prompts.js";
import { runResourcesCheck } from "./checks/resources.js";
import { runSchemaQualityCheck } from "./checks/schema-quality.js";
import { runToolsCheck } from "./checks/tools.js";
import { runSecurityCheck } from "./checks/security.js";
import { runLightweightSecurityCheck, runSecurityCheck } from "./checks/security.js";
import { runToolsInvokeCheck } from "./checks/tools-invoke.js";
import { computeHealthScore } from "./score.js";
import { errorMessage } from "./utils/errors.js";
Expand Down Expand Up @@ -128,6 +128,15 @@ async function runTargetWithRecording(target: TargetConfig, options?: RunOptions
resourcesCheck.result
];

// Lightweight security check: run against already-fetched tools (no extra MCP calls)
try {
const toolsResp = await session.client.listTools(undefined, { timeout: checkContext.timeoutMs });
const liteSecCheck = runLightweightSecurityCheck(toolsResp.tools, target);
checks.push(liteSecCheck.result);
} catch {
// If listing tools fails, skip lightweight security (tools check already reports the error)
}

if (options?.invokeTools && !target.skipInvoke) {
const invokeCheck = await runToolsInvokeCheck(checkContext);
checks.push(invokeCheck.result);
Expand Down
18 changes: 17 additions & 1 deletion src/storage.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { mkdir, readFile, writeFile } from "node:fs/promises";
import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
import path from "node:path";

import type { DiffArtifact, RunArtifact } from "./types.js";
Expand Down Expand Up @@ -28,6 +28,22 @@ export async function writeRunArtifact(
return filePath;
}

export async function findLatestArtifact(outDir: string, targetId: string): Promise<string | null> {
const slug = slugify(targetId);
const suffix = `--${slug}.json`;
try {
const entries = await readdir(outDir);
const matching = entries
.filter(f => f.endsWith(suffix))
.sort()
.reverse();
if (matching.length === 0) return null;
return path.join(outDir, matching[0]!);
} catch {
return null;
}
}

export async function readArtifact(filePath: string): Promise<Artifact> {
const content = await readFile(filePath, "utf8");
const data: unknown = JSON.parse(content);
Expand Down
2 changes: 1 addition & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export type CheckStatus =
| "unsupported"
| "flaky"
| "skipped";
export type CheckId = "tools" | "prompts" | "resources" | "tools-invoke" | "security" | "conformance" | "schema-quality";
export type CheckId = "tools" | "prompts" | "resources" | "tools-invoke" | "security" | "security-lite" | "conformance" | "schema-quality";

export const STATUS_RANK: Record<CheckStatus, number> = {
pass: 6, partial: 5, flaky: 4, unsupported: 3, skipped: 2, fail: 1
Expand Down
3 changes: 2 additions & 1 deletion tests/runner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ describe("runTarget", () => {
expect(artifact.artifactType).toBe("run");
expect(artifact.schemaVersion).toBe("1.0.0");
expect(artifact.gate).toBe("pass");
expect(artifact.summary.total).toBe(5);
expect(artifact.summary.total).toBe(6);
expect(artifact.summary.fail).toBe(0);
expect(artifact.checks.map((check) => check.id)).toEqual([
"tools",
"prompts",
"resources",
"security-lite",
"conformance",
"schema-quality",
]);
Expand Down
Loading