From f7351e48e5d7aa9e1e8c9a1b6d64d766ba7d8569 Mon Sep 17 00:00:00 2001
From: William Weishuhn <william.weishuhn3@gmail.com>
Date: Sun, 22 Mar 2026 10:08:42 -0700
Subject: [PATCH 1/2] feat: add watch one-shot mode and always-on lightweight
 security
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`observatory watch <cmd>` runs a server, auto-diffs against the last
run, and exits 1 on regression — replacing the manual run+diff workflow
that accounts for 88% of usage. Continuous polling still available via
--interval.

Every run now includes a security-lite check (static rule matching on
tool schemas, zero extra network calls). The --security flag remains
for deep scanning with credential pattern analysis.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/checks/security.ts | 60 ++++++++++++++++++++++++++++++++
 src/cli.ts             |  4 ++-
 src/commands/scan.ts   |  4 +--
 src/commands/test.ts   |  2 +-
 src/commands/watch.ts  | 77 ++++++++++++++++++++++++++++++++++++------
 src/index.ts           |  3 +-
 src/runner.ts          | 11 +++++-
 src/storage.ts         | 18 +++++++++-
 src/types.ts           |  2 +-
 tests/runner.test.ts   |  3 +-
 10 files changed, 165 insertions(+), 19 deletions(-)
diff --git a/src/checks/security.ts b/src/checks/security.ts
index 3203171..f73afee 100644
--- a/src/checks/security.ts
+++ b/src/checks/security.ts
@@ -58,6 +58,66 @@ function scanResponsesForCredentials(
   return findings;
 }
 
+export function runLightweightSecurityCheck(
+  tools: Tool[],
+  target: TargetConfig,
+): ObservedCheck {
+  const startedAt = performance.now();
+  const findings: SecurityFinding[] = [];
+
+  // Rule: no-auth-http (target-level)
+  const authFinding = checkNoAuthHttp(target);
+  if (authFinding) findings.push(authFinding);
+
+  // Tool-level rules against already-fetched tools
+  const toolInfos = tools.map(toolToInfo);
+  for (const tool of toolInfos) {
+    for (const rule of SECURITY_RULES) {
+      const finding = rule.match(tool);
+      if (finding) findings.push(finding);
+    }
+  }
+
+  // Determine status based on highest severity
+  const hasHigh = findings.some(f => f.severity === "high");
+  const hasMedium = findings.some(f => f.severity === "medium");
+  let status: "pass" | "partial" | "fail";
+  if (hasHigh) {
+    status = "fail";
+  } else if (hasMedium) {
+    status = "partial";
+  } else {
+    status = "pass";
+  }
+
+  const diagnostics = findings.map(f => `[${f.severity}] ${f.message}`);
+  const toolNames = [...new Set(findings.map(f => f.toolName))];
+
+  const message = findings.length === 0
+    ? "No security issues detected (lightweight scan)."
+    : `Found ${findings.length} security finding(s): ${findings.filter(f => f.severity === "high").length} high, ${findings.filter(f => f.severity === "medium").length} medium, ${findings.filter(f => f.severity === "low").length} low.`;
+
+  const evidence: EvidenceSummary = {
+    endpoint: "security/scan-lite",
+    advertised: true,
+    responded: true,
+    minimalShapePresent: true,
+    itemCount: findings.length,
+    identifiers: toolNames.length > 0 ? toolNames : undefined,
+    diagnostics: diagnostics.length > 0 ? diagnostics : undefined,
+  };
+
+  return {
+    result: makeCheckResult(
+      "security-lite",
+      status,
+      performance.now() - startedAt,
+      message,
+      [evidence],
+    ),
+  };
+}
+
 export async function runSecurityCheck(
   context: CheckContext,
   previousChecks: CheckResult[],
diff --git a/src/cli.ts b/src/cli.ts
index b03d174..0665bcd 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -45,8 +45,9 @@ const MENU_GROUPS: MenuGroup[] = [
     ],
   },
   {
-    heading: "More",
+    heading: "CI / Regression Testing",
     items: [
+      { command: ["watch"],   label: "watch",   outcome: "Run a check, diff against previous, alert on regressions" },
       { command: ["record"],  label: "record",  outcome: "Capture a session for offline replay or CI" },
       { command: ["diff"],    label: "diff",    outcome: "Compare two runs for regressions" },
       { command: ["test"],    label: "test",    outcome: "Test a single server by command" },
@@ -221,6 +222,7 @@ async function main(): Promise<void> {
         "",
         `  ${c(ANSI.bold, "CI / Regression Testing")}`,
         "",
+        `  ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} watch`)} ${c(ANSI.dim, "<cmd>")}       Run check, diff against previous, alert regressions`,
         `  ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} record`)} ${c(ANSI.dim, "<cmd>")}      Capture a session for offline replay`,
         `  ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} diff`)} ${c(ANSI.dim, "<a> <b>")}      Compare two runs for regressions`,
         `  ${c(ANSI.dim, "$")} ${c(ANSI.cyan, `${bin} badge`)} ${c(ANSI.dim, "<cmd>")}       Generate a health badge for README`,
diff --git a/src/commands/scan.ts b/src/commands/scan.ts
index c4ff152..2d0a73e 100644
--- a/src/commands/scan.ts
+++ b/src/commands/scan.ts
@@ -177,7 +177,7 @@ export function registerScanCommands(program: Command, bin: string): void {
     .command("scan")
     .description("Check all MCP servers in your Claude configs.")
     .option("--config <path>", "Path to a specific MCP config file.")
-    .option("--security", "Run security analysis on tool schemas.")
+    .option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.")
     .option("--no-color", "Disable colored output.");
 
   // `scan` with no subcommand — basic scan
@@ -190,7 +190,7 @@ export function registerScanCommands(program: Command, bin: string): void {
     .command("deep")
     .description("Scan and also invoke safe tools to verify they execute.")
     .option("--config <path>", "Path to a specific MCP config file.")
-    .option("--security", "Run security analysis on tool schemas.")
+    .option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.")
     .action(async (options: { config?: string; security?: boolean }) => {
       // Inherit parent config option if set
       const parentConfig = scanCmd.opts().config as string | undefined;
diff --git a/src/commands/test.ts b/src/commands/test.ts
index c14832c..ac816f1 100644
--- a/src/commands/test.ts
+++ b/src/commands/test.ts
@@ -14,7 +14,7 @@ export function registerTestCommands(program: Command): void {
     .passThroughOptions()
     .description("Test a specific server by command.")
     .argument("<command...>", "Server command and arguments to run.")
-    .option("--security", "Run security analysis on tool schemas.")
+    .option("--security", "Run deep security scan (credential patterns, response analysis). Lightweight security is always included.")
     .option("--no-color", "Disable colored output.")
     .action(async (commandArgs: string[], options: { security?: boolean }) => {
       const t0 = Date.now();
diff --git a/src/commands/watch.ts b/src/commands/watch.ts
index e18b141..831028a 100644
--- a/src/commands/watch.ts
+++ b/src/commands/watch.ts
@@ -6,9 +6,54 @@ import {
   writeRunArtifact,
   type TargetConfig,
 } from "../index.js";
-import { readTargetConfig } from "./helpers.js";
+import { isCI } from "../ci.js";
+import { defaultRunsDirectory, findLatestArtifact, readArtifact } from "../storage.js";
+import type { RunArtifact } from "../types.js";
+import { ANSI, c, formatOutput, targetFromCommand } from "./helpers.js";
 
-// ── Watch mode implementation ───────────────────────────────────────────────
+// ── One-shot mode ────────────────────────────────────────────────────────────
+
+async function runWatchOneShot(
+  target: TargetConfig,
+  outDir: string,
+  options: { format: string; failOnRegression: boolean },
+): Promise<void> {
+  const { diffArtifacts: diff } = await import("../diff.js");
+
+  const artifact = await runTarget(target);
+  const outPath = await writeRunArtifact(artifact, outDir);
+
+  // Find the PREVIOUS run for this target (excluding the one just written)
+  const latestPath = await findLatestArtifact(outDir, target.targetId);
+  let hasPreviousRun = false;
+
+  if (latestPath && latestPath !== outPath) {
+    hasPreviousRun = true;
+    const previousRaw = await readArtifact(latestPath);
+    if (previousRaw.artifactType === "run") {
+      const previous = previousRaw as RunArtifact;
+      const diffResult = diff(previous, artifact);
+
+      process.stdout.write(formatOutput(diffResult, options.format as "terminal" | "json") + "\n");
+      process.stdout.write(`${c(ANSI.dim, `Artifact: ${outPath}`)}\n`);
+
+      if (options.failOnRegression && diffResult.summary.regressions > 0) {
+        process.exitCode = 1;
+      }
+      return;
+    }
+  }
+
+  // First run — no previous artifact to diff against
+  process.stdout.write(formatOutput(artifact, options.format as "terminal" | "json") + "\n");
+  process.stdout.write(`${c(ANSI.dim, `Artifact: ${outPath}`)}\n`);
+
+  if (artifact.gate === "fail") {
+    process.exitCode = 1;
+  }
+}
+
+// ── Continuous polling mode ──────────────────────────────────────────────────
 
 async function runWatchMode(target: TargetConfig, outDir: string, intervalSeconds: number): Promise<void> {
   const { diffArtifacts: diff } = await import("../diff.js");
@@ -48,18 +93,30 @@ async function runWatchMode(target: TargetConfig, outDir: string, intervalSecond
 
 // ── Register ────────────────────────────────────────────────────────────────
 
-export { runWatchMode };
+export { runWatchMode, runWatchOneShot };
 
 export function registerWatchCommands(program: Command): void {
   program
     .command("watch")
-    .description("Watch a server for changes, alert on regressions.")
-    .argument("<config>", "Path to a target config JSON file.")
-    .option("--interval <seconds>", "Check interval in seconds.", "30")
+    .passThroughOptions()
+    .description("Run a server check, diff against previous run, alert on regressions.")
+    .argument("<command...>", "Server command and arguments to run.")
+    .option("--interval <seconds>", "Continuous polling interval in seconds (omit for one-shot).")
+    .option("--format <format>", "Output format: terminal or json.", "terminal")
+    .option("--fail-on-regression", "Exit with code 1 on regressions.", isCI)
+    .option("--no-fail-on-regression", "Do not exit with code 1 on regressions.")
     .option("--no-color", "Disable colored output.")
-    .action(async (configPath: string, options: { interval: string }) => {
-      const target = await readTargetConfig(configPath);
-      const outDir = (await import("../storage.js")).defaultRunsDirectory(process.cwd());
-      await runWatchMode(target, outDir, parseInt(options.interval, 10) || 30);
+    .action(async (commandArgs: string[], options: { interval?: string; format: string; failOnRegression: boolean }) => {
+      const target = targetFromCommand(commandArgs);
+      const outDir = defaultRunsDirectory(process.cwd());
+
+      if (options.interval) {
+        await runWatchMode(target, outDir, parseInt(options.interval, 10) || 30);
+      } else {
+        await runWatchOneShot(target, outDir, {
+          format: options.format,
+          failOnRegression: options.failOnRegression,
+        });
+      }
     });
 }
diff --git a/src/index.ts b/src/index.ts
index 6441038..ec304a9 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -8,7 +8,7 @@ export {
 } from "./cassette.js";
 export { runConformanceCheck } from "./checks/conformance.js";
 export { runSchemaQualityCheck } from "./checks/schema-quality.js";
-export { runSecurityCheck } from "./checks/security.js";
+export { runLightweightSecurityCheck, runSecurityCheck } from "./checks/security.js";
 export { SECURITY_RULES, type SecurityFinding, type SecurityRule, type ToolInfo } from "./checks/security-rules.js";
 export { diffArtifacts } from "./diff.js";
 export { scanForTargets } from "./discovery.js";
@@ -21,6 +21,7 @@ export { runTarget, runTargetRecording, type RunOptions, type RunResult } from "
 export { computeHealthScore, type ScoreWeights, DEFAULT_WEIGHTS } from "./score.js";
 export {
   defaultRunsDirectory,
+  findLatestArtifact,
   readArtifact,
   writeRunArtifact
 } from "./storage.js";
diff --git a/src/runner.ts b/src/runner.ts
index e02dfe8..da6293b 100644
--- a/src/runner.ts
+++ b/src/runner.ts
@@ -10,7 +10,7 @@ import { runPromptsCheck } from "./checks/prompts.js";
 import { runResourcesCheck } from "./checks/resources.js";
 import { runSchemaQualityCheck } from "./checks/schema-quality.js";
 import { runToolsCheck } from "./checks/tools.js";
-import { runSecurityCheck } from "./checks/security.js";
+import { runLightweightSecurityCheck, runSecurityCheck } from "./checks/security.js";
 import { runToolsInvokeCheck } from "./checks/tools-invoke.js";
 import { computeHealthScore } from "./score.js";
 import { errorMessage } from "./utils/errors.js";
@@ -128,6 +128,15 @@ async function runTargetWithRecording(target: TargetConfig, options?: RunOptions
         resourcesCheck.result
       ];
 
+      // Lightweight security check: run against already-fetched tools (no extra MCP calls)
+      try {
+        const toolsResp = await session.client.listTools(undefined, { timeout: checkContext.timeoutMs });
+        const liteSecCheck = runLightweightSecurityCheck(toolsResp.tools, target);
+        checks.push(liteSecCheck.result);
+      } catch {
+        // If listing tools fails, skip lightweight security (tools check already reports the error)
+      }
+
       if (options?.invokeTools && !target.skipInvoke) {
         const invokeCheck = await runToolsInvokeCheck(checkContext);
         checks.push(invokeCheck.result);
diff --git a/src/storage.ts b/src/storage.ts
index 5294a53..9211cb4 100644
--- a/src/storage.ts
+++ b/src/storage.ts
@@ -1,4 +1,4 @@
-import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
 import path from "node:path";
 
 import type { DiffArtifact, RunArtifact } from "./types.js";
@@ -28,6 +28,22 @@ export async function writeRunArtifact(
   return filePath;
 }
 
+export async function findLatestArtifact(outDir: string, targetId: string): Promise<string | null> {
+  const slug = slugify(targetId);
+  const suffix = `--${slug}.json`;
+  try {
+    const entries = await readdir(outDir);
+    const matching = entries
+      .filter(f => f.endsWith(suffix))
+      .sort()
+      .reverse();
+    if (matching.length === 0) return null;
+    return path.join(outDir, matching[0]!);
+  } catch {
+    return null;
+  }
+}
+
 export async function readArtifact(filePath: string): Promise<Artifact> {
   const content = await readFile(filePath, "utf8");
   const data: unknown = JSON.parse(content);
diff --git a/src/types.ts b/src/types.ts
index a812a80..d06510f 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -9,7 +9,7 @@ export type CheckStatus =
   | "unsupported"
   | "flaky"
   | "skipped";
-export type CheckId = "tools" | "prompts" | "resources" | "tools-invoke" | "security" | "conformance" | "schema-quality";
+export type CheckId = "tools" | "prompts" | "resources" | "tools-invoke" | "security" | "security-lite" | "conformance" | "schema-quality";
 
 export const STATUS_RANK: Record<CheckStatus, number> = {
   pass: 6, partial: 5, flaky: 4, unsupported: 3, skipped: 2, fail: 1
diff --git a/tests/runner.test.ts b/tests/runner.test.ts
index 489dce2..0bc9753 100644
--- a/tests/runner.test.ts
+++ b/tests/runner.test.ts
@@ -18,12 +18,13 @@ describe("runTarget", () => {
     expect(artifact.artifactType).toBe("run");
     expect(artifact.schemaVersion).toBe("1.0.0");
     expect(artifact.gate).toBe("pass");
-    expect(artifact.summary.total).toBe(5);
+    expect(artifact.summary.total).toBe(6);
     expect(artifact.summary.fail).toBe(0);
     expect(artifact.checks.map((check) => check.id)).toEqual([
       "tools",
       "prompts",
       "resources",
+      "security-lite",
       "conformance",
       "schema-quality",
     ]);

From a700b1233d19d7dcc68d0cdbf3f92e59aa896596 Mon Sep 17 00:00:00 2001
From: William Weishuhn <william.weishuhn3@gmail.com>
Date: Sun, 22 Mar 2026 10:11:14 -0700
Subject: [PATCH 2/2] fix: remove unused variable and import in watch.ts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/commands/watch.ts | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/commands/watch.ts b/src/commands/watch.ts
index 831028a..43c14a8 100644
--- a/src/commands/watch.ts
+++ b/src/commands/watch.ts
@@ -8,7 +8,6 @@ import {
 } from "../index.js";
 import { isCI } from "../ci.js";
 import { defaultRunsDirectory, findLatestArtifact, readArtifact } from "../storage.js";
-import type { RunArtifact } from "../types.js";
 import { ANSI, c, formatOutput, targetFromCommand } from "./helpers.js";
 
 // ── One-shot mode ────────────────────────────────────────────────────────────
@@ -25,13 +24,10 @@ async function runWatchOneShot(
 
   // Find the PREVIOUS run for this target (excluding the one just written)
   const latestPath = await findLatestArtifact(outDir, target.targetId);
-  let hasPreviousRun = false;
-
   if (latestPath && latestPath !== outPath) {
-    hasPreviousRun = true;
     const previousRaw = await readArtifact(latestPath);
     if (previousRaw.artifactType === "run") {
-      const previous = previousRaw as RunArtifact;
+      const previous = previousRaw;
       const diffResult = diff(previous, artifact);
 
       process.stdout.write(formatOutput(diffResult, options.format as "terminal" | "json") + "\n");