diff --git a/action/action.yml b/action/action.yml index 1a2772e..a404bb7 100644 --- a/action/action.yml +++ b/action/action.yml @@ -113,9 +113,9 @@ runs: echo "artifact_path=" >> "$GITHUB_OUTPUT" fi - # Generate markdown report + # Generate PR comment report if [ -n "$ARTIFACT_PATH" ]; then - mcp-observatory report "$ARTIFACT_PATH" --format markdown --no-color > "${ARTIFACT_DIR}/report.md" 2>/dev/null || true + mcp-observatory report "$ARTIFACT_PATH" --format pr-comment --no-color > "${ARTIFACT_DIR}/report.md" 2>/dev/null || true fi - name: Verify against baseline @@ -161,16 +161,9 @@ runs: exit 0 fi - # Build comment body file to avoid shell expansion issues + # pr-comment format already includes header and footer COMMENT_FILE="${RUNNER_TEMP}/observatory/comment.md" - { - echo "## MCP Observatory Report" - echo "" - cat "$REPORT_FILE" - echo "" - echo "---" - echo "Generated by MCP Observatory action" - } > "$COMMENT_FILE" + cp "$REPORT_FILE" "$COMMENT_FILE" # Use --body-file to avoid shell injection via report content gh pr comment "$PR_NUMBER" \ diff --git a/src/commands/diff.ts b/src/commands/diff.ts index 73b6956..cda55cc 100644 --- a/src/commands/diff.ts +++ b/src/commands/diff.ts @@ -12,14 +12,14 @@ export function registerDiffCommands(program: Command): void { .description("Compare two runs and show regressions and schema drift.") .argument("", "Base run artifact JSON file.") .argument("", "Head run artifact JSON file.") - .option("--format ", "terminal, json, markdown, html, junit, or sarif", "terminal") + .option("--format ", "terminal, json, markdown, pr-comment, html, junit, or sarif", "terminal") .option("--output ", "Write to file instead of stdout.") .option("--no-color", "Disable colored output.") .option("--fail-on-regression", "Exit with code 1 when regressions are present.", false) .action( async (base: string, head: string, options: { failOnRegression?: boolean; - format: "html" | "json" | "junit" | "markdown" | "sarif" | "terminal"; + format: "html" | "json" | "junit" | "markdown" | "pr-comment" | "sarif" | "terminal"; output?: string; }) => { const baseArtifact = await readArtifact(base); diff --git a/src/commands/helpers.ts b/src/commands/helpers.ts index 856fa17..9b16f13 100644 --- a/src/commands/helpers.ts +++ b/src/commands/helpers.ts @@ -8,6 +8,7 @@ import { type TargetConfig, } from "../index.js"; import { renderJUnit } from "../reporters/junit.js"; +import { renderPrComment } from "../reporters/pr-comment.js"; import { renderSarif } from "../reporters/sarif.js"; import { validateTargetConfig } from "../validate.js"; @@ -121,10 +122,11 @@ export async function resolveTarget(options: { target?: string }): Promise[0], - format: "html" | "json" | "junit" | "markdown" | "sarif" | "terminal", + format: "html" | "json" | "junit" | "markdown" | "pr-comment" | "sarif" | "terminal", ): string { if (format === "json") return JSON.stringify(artifact, null, 2); if (format === "markdown") return renderMarkdown(artifact); + if (format === "pr-comment") return renderPrComment(artifact); if (format === "html") return renderHtml(artifact); if (format === "junit" && artifact.artifactType === "run") return renderJUnit(artifact); if (format === "sarif" && artifact.artifactType === "run") return renderSarif(artifact); diff --git a/src/commands/legacy.ts b/src/commands/legacy.ts index 5fd1613..9944b0b 100644 --- a/src/commands/legacy.ts +++ b/src/commands/legacy.ts @@ -72,12 +72,12 @@ export function registerLegacyCommands(program: Command): void { .command("report", { hidden: true }) .description("Render a run artifact.") .requiredOption("--run ", "Run artifact JSON.") - .option("--format ", "terminal, markdown, json, or html", "terminal") + .option("--format ", "terminal, markdown, pr-comment, json, or html", "terminal") .option("--output ", "Write to file instead of stdout.") .option("--no-color", "Disable colored output.") .action( async (options: { - format: "html" | "json" | "junit" | "markdown" | "sarif" | "terminal"; + format: "html" | "json" | "junit" | "markdown" | "pr-comment" | "sarif" | "terminal"; output?: string; run: string; }) => { diff --git a/src/index.ts b/src/index.ts index 6f4e710..ace09c2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -15,6 +15,7 @@ export { scanForTargets } from "./discovery.js"; export { renderHtml } from "./reporters/html.js"; export { renderJUnit } from "./reporters/junit.js"; export { renderMarkdown } from "./reporters/markdown.js"; +export { renderPrComment } from "./reporters/pr-comment.js"; export { renderSarif } from "./reporters/sarif.js"; export { renderTerminal, renderWatchFirstRun, renderWatchNoChanges, renderWatchChanges } from "./reporters/terminal.js"; export { runTarget, runTargetRecording, type RunOptions, type RunResult } from "./runner.js"; diff --git a/src/reporters/pr-comment.ts b/src/reporters/pr-comment.ts new file mode 100644 index 0000000..6fc69cc --- /dev/null +++ b/src/reporters/pr-comment.ts @@ -0,0 +1,239 @@ +import type { CheckResult, DiffArtifact, RunArtifact, SchemaDriftEntry } from "../types.js"; +import { findChecksByStatus } from "./common.js"; + +// ── Types ─────────────────────────────────────────────────────────────────── + +interface ParsedFinding { + severity: string; + message: string; +} + +// ── Constants ─────────────────────────────────────────────────────────────── + +const MAX_ITEMS_PER_SECTION = 5; +const REPO_URL = "https://github.com/KryptosAI/mcp-observatory"; + +// ── Extraction helpers ────────────────────────────────────────────────────── + +function extractSecurityFindings(checks: CheckResult[]): ParsedFinding[] { + const securityChecks = checks.filter(c => c.id === "security" || c.id === "security-lite"); + const findings: ParsedFinding[] = []; + for (const check of securityChecks) { + for (const ev of check.evidence) { + for (const diag of ev.diagnostics ?? []) { + const match = diag.match(/^\[(high|medium|low)]\s+(.+)$/); + if (match) { + findings.push({ severity: match[1]!, message: match[2]! }); + } + } + } + } + return findings; +} + +function extractQualityFindings(checks: CheckResult[]): ParsedFinding[] { + const qualityChecks = checks.filter(c => c.id === "schema-quality"); + const findings: ParsedFinding[] = []; + for (const check of qualityChecks) { + for (const ev of check.evidence) { + for (const diag of ev.diagnostics ?? []) { + const match = diag.match(/^\[(warning|info)]\s+(.+)$/i); + if (match) { + findings.push({ severity: match[1]!, message: match[2]! }); + } + } + } + } + return findings; +} + +function countCapabilities(checks: CheckResult[]): { tools: number; prompts: number; resources: number } { + const get = (id: string) => { + const check = checks.find(c => c.id === id); + return check?.evidence[0]?.itemCount ?? 0; + }; + return { tools: get("tools"), prompts: get("prompts"), resources: get("resources") }; +} + +function conformanceSummary(checks: CheckResult[]): string | undefined { + const check = checks.find(c => c.id === "conformance"); + if (!check || check.status === "pass") return undefined; + return check.message; +} + +// ── Formatting helpers ────────────────────────────────────────────────────── + +function blockquoteList(items: string[], max = MAX_ITEMS_PER_SECTION): string { + const shown = items.slice(0, max); + const lines = shown.map(item => `> ${item}`); + const remaining = items.length - max; + if (remaining > 0) { + lines.push(`> ...and ${remaining} more`); + } + return lines.join("\n"); +} + +function footer(): string { + return [ + "", + "---", + `🔭 MCP Observatory — test your MCP servers for breaking changes · ⭐ Star`, + ].join("\n"); +} + +// ── Run artifact rendering ────────────────────────────────────────────────── + +function renderRunComment(artifact: RunArtifact): string { + const sections: string[] = []; + const security = extractSecurityFindings(artifact.checks); + const quality = extractQualityFindings(artifact.checks); + const failingChecks = findChecksByStatus(artifact.checks, "fail") + .filter(c => c.id !== "security" && c.id !== "security-lite"); + const conformance = conformanceSummary(artifact.checks); + + const highMedSecurity = security.filter(f => f.severity === "high" || f.severity === "medium"); + const issueCount = highMedSecurity.length + failingChecks.length + quality.length + (conformance ? 1 : 0); + + // Header + if (issueCount === 0) { + sections.push("## 🔭 MCP Observatory — All clear ✅"); + sections.push(""); + sections.push("All checks passed. No security issues, no schema quality warnings."); + } else { + sections.push(`## 🔭 MCP Observatory — ${issueCount} issue${issueCount === 1 ? "" : "s"} found`); + } + + // Security (red) + if (highMedSecurity.length > 0) { + const highCount = highMedSecurity.filter(f => f.severity === "high").length; + const medCount = highMedSecurity.filter(f => f.severity === "medium").length; + const parts: string[] = []; + if (highCount > 0) parts.push(`${highCount} high`); + if (medCount > 0) parts.push(`${medCount} medium`); + sections.push(""); + sections.push(`### 🔴 SECURITY (${parts.join(", ")})`); + sections.push(blockquoteList(highMedSecurity.map(f => `\`${f.severity}\` ${f.message}`))); + } + + // Failing checks (red) + if (failingChecks.length > 0) { + sections.push(""); + sections.push(`### 🔴 FAILING (${failingChecks.length})`); + sections.push(blockquoteList(failingChecks.map(c => `**${c.id}**: ${c.message}`))); + } + + // Quality warnings (yellow) + const qualityItems: string[] = []; + for (const f of quality) { + qualityItems.push(f.message); + } + if (conformance) { + qualityItems.push(`Conformance: ${conformance}`); + } + if (qualityItems.length > 0) { + sections.push(""); + sections.push(`### ⚠️ QUALITY (${qualityItems.length} warning${qualityItems.length === 1 ? "" : "s"})`); + sections.push(blockquoteList(qualityItems)); + } + + // Summary stats + const caps = countCapabilities(artifact.checks); + const statsLine = [ + artifact.healthScore + ? `Health: **${artifact.healthScore.grade}** (${artifact.healthScore.overall})` + : `Gate: **${artifact.gate}**`, + `${caps.tools} tools`, + `${caps.prompts} prompts`, + `${caps.resources} resources`, + ].join(" · "); + + sections.push(""); + sections.push(`### 📊 Summary`); + sections.push(`> ${statsLine}`); + + sections.push(footer()); + return sections.join("\n"); +} + +// ── Diff artifact rendering ───────────────────────────────────────────────── + +function renderDiffComment(artifact: DiffArtifact): string { + const sections: string[] = []; + const { regressions, recoveries, schemaDrift } = artifact; + const driftCount = schemaDrift?.length ?? 0; + const totalIssues = regressions.length + driftCount; + + // Header + if (totalIssues === 0) { + sections.push("## 🔭 MCP Observatory — No regressions ✅"); + sections.push(""); + sections.push("All checks stable. No regressions, no schema drift."); + } else { + const parts: string[] = []; + if (regressions.length > 0) parts.push(`${regressions.length} regression${regressions.length === 1 ? "" : "s"}`); + if (driftCount > 0) parts.push(`${driftCount} schema change${driftCount === 1 ? "" : "s"}`); + sections.push(`## 🔭 MCP Observatory — ${parts.join(", ")} detected`); + } + + // Regressions (red) + if (regressions.length > 0) { + sections.push(""); + sections.push(`### 🔴 REGRESSIONS (${regressions.length})`); + sections.push(blockquoteList(regressions.map(r => { + const transition = r.fromStatus && r.toStatus ? `${r.fromStatus} → ${r.toStatus}` : ""; + return `**${r.id}**: ${transition}${transition ? " — " : ""}${r.message}`; + }))); + } + + // Schema drift (red) + if (schemaDrift && schemaDrift.length > 0) { + sections.push(""); + sections.push(`### 🔴 SCHEMA DRIFT (${schemaDrift.length})`); + const driftLines = flattenDrift(schemaDrift); + sections.push(blockquoteList(driftLines)); + } + + // Recoveries (green) + if (recoveries.length > 0) { + sections.push(""); + sections.push(`### ✅ RECOVERED (${recoveries.length})`); + sections.push(blockquoteList(recoveries.map(r => { + const transition = r.fromStatus && r.toStatus ? `${r.fromStatus} → ${r.toStatus}` : ""; + return `**${r.id}**: ${transition}`; + }))); + } + + // Summary + const { summary } = artifact; + const statsLine = [ + `Gate: **${artifact.gate}**`, + `Regressions: ${summary.regressions}`, + `Recoveries: ${summary.recoveries}`, + `Unchanged: ${summary.unchanged}`, + ].join(" · "); + + sections.push(""); + sections.push(`### 📊 Summary`); + sections.push(`> ${statsLine}`); + + sections.push(footer()); + return sections.join("\n"); +} + +function flattenDrift(drift: SchemaDriftEntry[]): string[] { + const lines: string[] = []; + for (const entry of drift) { + for (const change of entry.changes) { + lines.push(`**${entry.name}**: ${change}`); + } + } + return lines; +} + +// ── Public API ────────────────────────────────────────────────────────────── + +export function renderPrComment(artifact: RunArtifact | DiffArtifact): string { + return artifact.artifactType === "run" + ? renderRunComment(artifact) + : renderDiffComment(artifact); +} diff --git a/tests/pr-comment.test.ts b/tests/pr-comment.test.ts new file mode 100644 index 0000000..0774ef6 --- /dev/null +++ b/tests/pr-comment.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, it } from "vitest"; + +import { renderPrComment } from "../src/reporters/pr-comment.js"; +import type { DiffArtifact } from "../src/types.js"; +import { makeArtifact } from "./fixtures/test-helpers.js"; + +// ── Run artifact tests ────────────────────────────────────────────────────── + +describe("renderPrComment (RunArtifact)", () => { + it("shows all-clear when no issues", () => { + const out = renderPrComment(makeArtifact([ + { id: "tools", capability: "tools", status: "pass", durationMs: 100, message: "OK", evidence: [{ endpoint: "tools/list", advertised: true, responded: true, minimalShapePresent: true, itemCount: 5, identifiers: ["a", "b", "c", "d", "e"] }] }, + ])); + expect(out).toContain("All clear"); + expect(out).not.toContain("🔴"); + }); + + it("shows security findings", () => { + const out = renderPrComment(makeArtifact([ + { id: "security", capability: "security", status: "fail", durationMs: 50, message: "1 finding", evidence: [{ endpoint: "security/scan", advertised: true, responded: true, minimalShapePresent: true, diagnostics: ['[high] Tool "execute_command" has parameter "command" which may allow arbitrary command execution.'] }] }, + ])); + expect(out).toContain("SECURITY"); + expect(out).toContain("high"); + expect(out).toContain("execute_command"); + }); + + it("shows failing checks", () => { + const out = renderPrComment(makeArtifact([ + { id: "tools", capability: "tools", status: "fail", durationMs: 50, message: "Tools endpoint returned error", evidence: [] }, + ])); + expect(out).toContain("FAILING"); + expect(out).toContain("tools"); + expect(out).toContain("Tools endpoint returned error"); + }); + + it("shows quality warnings from schema-quality check", () => { + const out = renderPrComment(makeArtifact([ + { id: "schema-quality", capability: "schema-quality", status: "partial", durationMs: 30, message: "2 issues", evidence: [{ endpoint: "schema-quality", advertised: true, responded: true, minimalShapePresent: true, diagnostics: ['[warning] tool "list_repos": Missing description', '[warning] tool "search": Property \'query\' missing description'] }] }, + ])); + expect(out).toContain("QUALITY"); + expect(out).toContain("list_repos"); + }); + + it("renders health score when present", () => { + const artifact = makeArtifact([ + { id: "tools", capability: "tools", status: "pass", durationMs: 100, message: "OK", evidence: [{ endpoint: "tools/list", advertised: true, responded: true, minimalShapePresent: true, itemCount: 24 }] }, + ]); + artifact.healthScore = { overall: 95, grade: "A", dimensions: [] }; + const out = renderPrComment(artifact); + expect(out).toContain("**A** (95)"); + }); + + it("shows capability counts", () => { + const out = renderPrComment(makeArtifact([ + { id: "tools", capability: "tools", status: "pass", durationMs: 100, message: "OK", evidence: [{ endpoint: "tools/list", advertised: true, responded: true, minimalShapePresent: true, itemCount: 24 }] }, + { id: "prompts", capability: "prompts", status: "pass", durationMs: 50, message: "OK", evidence: [{ endpoint: "prompts/list", advertised: true, responded: true, minimalShapePresent: true, itemCount: 3 }] }, + { id: "resources", capability: "resources", status: "pass", durationMs: 50, message: "OK", evidence: [{ endpoint: "resources/list", advertised: true, responded: true, minimalShapePresent: true, itemCount: 5 }] }, + ])); + expect(out).toContain("24 tools"); + expect(out).toContain("3 prompts"); + expect(out).toContain("5 resources"); + }); + + it("truncates long lists at 5 items", () => { + const diagnostics = Array.from({ length: 8 }, (_, i) => `[warning] tool "tool_${i}": Missing description`); + const out = renderPrComment(makeArtifact([ + { id: "schema-quality", capability: "schema-quality", status: "partial", durationMs: 30, message: "8 issues", evidence: [{ endpoint: "schema-quality", advertised: true, responded: true, minimalShapePresent: true, diagnostics }] }, + ])); + expect(out).toContain("and 3 more"); + }); + + it("always includes footer with MCP Observatory link", () => { + const out = renderPrComment(makeArtifact([])); + expect(out).toContain("MCP Observatory"); + expect(out).toContain("KryptosAI/mcp-observatory"); + }); + + it("does not count security checks as FAILING", () => { + const out = renderPrComment(makeArtifact([ + { id: "security", capability: "security", status: "fail", durationMs: 50, message: "1 finding", evidence: [{ endpoint: "security/scan", advertised: true, responded: true, minimalShapePresent: true, diagnostics: ["[high] Bad stuff"] }] }, + ])); + expect(out).toContain("SECURITY"); + expect(out).not.toContain("FAILING"); + }); +}); + +// ── Diff artifact tests ───────────────────────────────────────────────────── + +function makeDiff(overrides: Partial = {}): DiffArtifact { + return { + artifactType: "diff", + schemaVersion: "1.0.0", + gate: "pass", + baseRunId: "base-run", + headRunId: "head-run", + createdAt: "2026-03-21T00:00:00Z", + summary: { regressions: 0, recoveries: 0, unchanged: 0, added: 0, removed: 0, gate: "pass" }, + regressions: [], + recoveries: [], + unchanged: [], + added: [], + removed: [], + ...overrides, + }; +} + +describe("renderPrComment (DiffArtifact)", () => { + it("shows no-regressions message when clean", () => { + const out = renderPrComment(makeDiff({ unchanged: [{ id: "tools", capability: "tools", message: "OK" }] })); + expect(out).toContain("No regressions"); + expect(out).not.toContain("🔴"); + }); + + it("shows regressions with status transitions", () => { + const out = renderPrComment(makeDiff({ + gate: "fail", + summary: { regressions: 1, recoveries: 0, unchanged: 2, added: 0, removed: 0, gate: "fail" }, + regressions: [{ id: "tools-invoke", capability: "tools-invoke", fromStatus: "pass", toStatus: "fail", message: "search_code invocation error" }], + })); + expect(out).toContain("REGRESSIONS (1)"); + expect(out).toContain("pass → fail"); + expect(out).toContain("search_code"); + }); + + it("shows schema drift entries", () => { + const out = renderPrComment(makeDiff({ + gate: "fail", + summary: { regressions: 0, recoveries: 0, unchanged: 2, added: 0, removed: 0, schemaDriftCount: 2, gate: "fail" }, + schemaDrift: [ + { capability: "tools", name: "create_issue", changes: ["added required property 'type'"] }, + { capability: "tools", name: "search_code", changes: ["removed property 'language'"] }, + ], + })); + expect(out).toContain("SCHEMA DRIFT (2)"); + expect(out).toContain("create_issue"); + expect(out).toContain("added required property"); + }); + + it("shows recoveries", () => { + const out = renderPrComment(makeDiff({ + summary: { regressions: 0, recoveries: 1, unchanged: 2, added: 0, removed: 0, gate: "pass" }, + recoveries: [{ id: "prompts", capability: "prompts", fromStatus: "fail", toStatus: "pass", message: "Recovered" }], + })); + expect(out).toContain("RECOVERED (1)"); + expect(out).toContain("fail → pass"); + }); + + it("always includes footer", () => { + const out = renderPrComment(makeDiff()); + expect(out).toContain("MCP Observatory"); + expect(out).toContain("KryptosAI/mcp-observatory"); + }); +});