diff --git a/.changeset/mcp-registry-skill-finder.md b/.changeset/mcp-registry-skill-finder.md new file mode 100644 index 00000000..d4d0b876 --- /dev/null +++ b/.changeset/mcp-registry-skill-finder.md @@ -0,0 +1,8 @@ +--- +"@perstack/create-expert-skill": patch +"create-expert": patch +--- + +feat: add MCP registry search tools and skill-finder expert + +Add `searchMcpRegistry` and `getMcpServerDetail` tools to `@perstack/create-expert-skill` that search the official MCP registry for MCP servers matching expert skill requirements. Add `@create-expert/skill-finder` expert that uses these tools to investigate registry entries and produce skill-report.md with ready-to-use TOML configuration snippets. Update coordinator, planner, and definition-writer instructions to integrate skill findings into generated expert definitions. diff --git a/apps/create-expert-skill/src/index.ts b/apps/create-expert-skill/src/index.ts index 2f44b6df..f1c0d416 100644 --- a/apps/create-expert-skill/src/index.ts +++ b/apps/create-expert-skill/src/index.ts @@ -4,4 +4,12 @@ export { SKILL_NAME, SKILL_VERSION, } from "./server.js" +export { + getMcpServerDetail, + registerGetMcpServerDetail, +} from "./tools/get-mcp-server-detail.js" export { registerRunExpert, runExpert } from "./tools/run-expert.js" +export { + registerSearchMcpRegistry, + searchMcpRegistry, +} from "./tools/search-mcp-registry.js" diff --git a/apps/create-expert-skill/src/lib/mcp-registry-client.test.ts b/apps/create-expert-skill/src/lib/mcp-registry-client.test.ts new file mode 100644 index 00000000..b78f1dab --- /dev/null +++ b/apps/create-expert-skill/src/lib/mcp-registry-client.test.ts @@ -0,0 +1,237 @@ +import { afterEach, describe, expect, it, mock } from "bun:test" +import { clearCache, fetchAllServers, fetchServerDetail } from "./mcp-registry-client.js" + +const originalFetch = globalThis.fetch + +afterEach(() => { + globalThis.fetch = originalFetch + clearCache() +}) + +describe("fetchAllServers", () => { + it("fetches servers from a single page", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + servers: [ + { + name: "server-a", + description: "A server", + version_detail: { version: "1.0.0", status: "active" }, + }, + { + name: "server-b", + description: "B server", + version_detail: { version: "2.0.0", status: "active" }, + }, + ], + }), + { status: 200 }, + ), + ), + ) as typeof fetch + + const servers = await fetchAllServers() + expect(servers).toHaveLength(2) + expect(servers[0].name).toBe("server-a") + expect(servers[1].name).toBe("server-b") + }) + + it("paginates through multiple pages", async () => { + let callCount = 0 + globalThis.fetch = mock(() => { + callCount++ + if (callCount === 1) { + return Promise.resolve( + new Response( + JSON.stringify({ + servers: [ + { + name: "page1-server", + description: "first", + version_detail: { version: "1.0.0", status: "active" }, + }, + ], + next_cursor: "cursor-abc", + }), + { status: 200 }, + ), + ) + } + return Promise.resolve( + new Response( + JSON.stringify({ + servers: [ + { + name: "page2-server", + description: "second", + version_detail: { version: "1.0.0", status: "active" }, + }, + ], + }), + { status: 200 }, + ), + ) + }) as typeof fetch + + const servers = await fetchAllServers() + expect(servers).toHaveLength(2) + expect(servers[0].name).toBe("page1-server") + expect(servers[1].name).toBe("page2-server") + expect(callCount).toBe(2) + }) + + it("includes cursor query parameter for pagination", async () => { + const urls: string[] = [] + let callCount = 0 + globalThis.fetch = mock((input: string | URL | Request) => { + urls.push(typeof input === "string" ? input : input.toString()) + callCount++ + if (callCount === 1) { + return Promise.resolve( + new Response( + JSON.stringify({ + servers: [ + { + name: "s1", + description: "", + version_detail: { version: "1.0.0", status: "active" }, + }, + ], + next_cursor: "my-cursor", + }), + { status: 200 }, + ), + ) + } + return Promise.resolve(new Response(JSON.stringify({ servers: [] }), { status: 200 })) + }) as typeof fetch + + await fetchAllServers() + + expect(urls[0]).not.toContain("cursor=") + expect(urls[1]).toContain("cursor=my-cursor") + }) + + it("returns cached data on second call", async () => { + let callCount = 0 + globalThis.fetch = mock(() => { + callCount++ + return Promise.resolve( + new Response( + JSON.stringify({ + servers: [ + { + name: "cached", + description: "", + version_detail: { version: "1.0.0", status: "active" }, + }, + ], + }), + { status: 200 }, + ), + ) + }) as typeof fetch + + const first = await fetchAllServers() + const second = await fetchAllServers() + + expect(first).toEqual(second) + expect(callCount).toBe(1) + }) + + it("throws on HTTP error", async () => { + globalThis.fetch = mock(() => + Promise.resolve(new Response("", { status: 500, statusText: "Internal Server Error" })), + ) as typeof fetch + + await expect(fetchAllServers()).rejects.toThrow("Registry API error: 500") + }) + + it("throws on network error", async () => { + globalThis.fetch = mock(() => Promise.reject(new Error("Network failure"))) as typeof fetch + + await expect(fetchAllServers()).rejects.toThrow("Network failure") + }) +}) + +describe("fetchServerDetail", () => { + it("fetches server detail", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + name: "my/server", + description: "Test server", + version: "1.0.0", + packages: [], + remotes: [], + status: "active", + }), + { status: 200 }, + ), + ), + ) as typeof fetch + + const detail = await fetchServerDetail("my/server") + expect(detail.name).toBe("my/server") + expect(detail.description).toBe("Test server") + }) + + it("encodes server name with slashes", async () => { + let calledUrl = "" + globalThis.fetch = mock((input: string | URL | Request) => { + calledUrl = typeof input === "string" ? input : input.toString() + return Promise.resolve( + new Response( + JSON.stringify({ + name: "org/repo", + description: "", + version: "1.0.0", + packages: [], + remotes: [], + status: "active", + }), + { status: 200 }, + ), + ) + }) as typeof fetch + + await fetchServerDetail("org/repo", "latest") + expect(calledUrl).toContain("org%2Frepo") + }) + + it("caches server detail", async () => { + let callCount = 0 + globalThis.fetch = mock(() => { + callCount++ + return Promise.resolve( + new Response( + JSON.stringify({ + name: "test", + description: "", + version: "1.0.0", + packages: [], + remotes: [], + status: "active", + }), + { status: 200 }, + ), + ) + }) as typeof fetch + + await fetchServerDetail("test", "latest") + await fetchServerDetail("test", "latest") + + expect(callCount).toBe(1) + }) + + it("throws on 404", async () => { + globalThis.fetch = mock(() => + Promise.resolve(new Response("", { status: 404, statusText: "Not Found" })), + ) as typeof fetch + + await expect(fetchServerDetail("nonexistent")).rejects.toThrow("Registry API error: 404") + }) +}) diff --git a/apps/create-expert-skill/src/lib/mcp-registry-client.ts b/apps/create-expert-skill/src/lib/mcp-registry-client.ts new file mode 100644 index 00000000..967ea36a --- /dev/null +++ b/apps/create-expert-skill/src/lib/mcp-registry-client.ts @@ -0,0 +1,170 @@ +import { z } from "zod/v4" + +const REGISTRY_BASE_URL = "https://registry.modelcontextprotocol.io" +const PAGE_LIMIT = 100 +const MAX_PAGES = 10 +const REQUEST_TIMEOUT_MS = 15_000 +const CACHE_TTL_MS = 10 * 60 * 1000 + +// --- Zod Schemas --- + +const ServerListItemSchema = z.object({ + name: z.string(), + description: z.string().optional().default(""), + version_detail: z + .object({ + version: z.string().optional().default(""), + status: z.string().optional().default(""), + }) + .optional() + .default({ version: "", status: "" }), +}) + +const ServerListResponseSchema = z.object({ + servers: z.array(ServerListItemSchema), + next_cursor: z.string().optional(), +}) + +const EnvironmentVariableSchema = z.object({ + name: z.string(), + description: z.string().optional().default(""), + required: z.boolean().optional().default(false), + isSecret: z.boolean().optional().default(false), +}) + +const PackageSchema = z.object({ + registry_name: z.string().optional().default(""), + name: z.string().optional().default(""), + version: z.string().optional().default(""), + package_arguments: z.array(z.string()).optional().default([]), + environment_variables: z.array(EnvironmentVariableSchema).optional().default([]), +}) + +const RemoteSchema = z.object({ + transport_type: z.string().optional().default(""), + url: z.string().optional().default(""), +}) + +const ServerDetailSchema = z.object({ + name: z.string(), + description: z.string().optional().default(""), + version: z.string().optional().default(""), + repository: z + .object({ + url: z.string().optional().default(""), + }) + .optional(), + packages: z.array(PackageSchema).optional().default([]), + remotes: z.array(RemoteSchema).optional().default([]), + status: z.string().optional().default(""), +}) + +// --- Types --- + +export type ServerListItem = z.infer +export type ServerDetail = z.infer +export type Package = z.infer +export type Remote = z.infer +export type EnvironmentVariable = z.infer + +// --- Cache --- + +interface CacheEntry { + data: T + expiresAt: number +} + +const cache = new Map>() + +function getCached(key: string): T | undefined { + const entry = cache.get(key) + if (!entry) return undefined + if (Date.now() > entry.expiresAt) { + cache.delete(key) + return undefined + } + return entry.data as T +} + +function setCache(key: string, data: T): void { + cache.set(key, { data, expiresAt: Date.now() + CACHE_TTL_MS }) +} + +/** @public Used in test files for cache isolation */ +export function clearCache(): void { + cache.clear() +} + +// --- Fetch with Timeout --- + +async function fetchWithTimeout(url: string): Promise { + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS) + try { + const response = await fetch(url, { signal: controller.signal }) + return response + } finally { + clearTimeout(timer) + } +} + +// --- API Functions --- + +export async function fetchAllServers(): Promise { + const cacheKey = "allServers" + const cached = getCached(cacheKey) + if (cached) return cached + + const allServers: ServerListItem[] = [] + let cursor: string | undefined + + for (let page = 0; page < MAX_PAGES; page++) { + const url = new URL("/v0.1/servers", REGISTRY_BASE_URL) + url.searchParams.set("limit", String(PAGE_LIMIT)) + if (cursor) { + url.searchParams.set("cursor", cursor) + } + + const response = await fetchWithTimeout(url.toString()) + if (!response.ok) { + throw new Error(`Registry API error: ${response.status} ${response.statusText}`) + } + + const json = await response.json() + const parsed = ServerListResponseSchema.parse(json) + + allServers.push(...parsed.servers) + + if (!parsed.next_cursor) break + cursor = parsed.next_cursor + } + + setCache(cacheKey, allServers) + return allServers +} + +export async function fetchServerDetail( + serverName: string, + version = "latest", +): Promise { + const cacheKey = `detail:${serverName}:${version}` + const cached = getCached(cacheKey) + if (cached) return cached + + const encodedName = serverName + .split("/") + .map((s) => encodeURIComponent(s)) + .join("%2F") + const url = `${REGISTRY_BASE_URL}/v0.1/servers/${encodedName}/versions/${encodeURIComponent(version)}` + + const response = await fetchWithTimeout(url) + if (!response.ok) { + throw new Error(`Registry API error: ${response.status} ${response.statusText}`) + } + + const json = await response.json() + const parsed = ServerDetailSchema.parse(json) + + setCache(cacheKey, parsed) + return parsed +} diff --git a/apps/create-expert-skill/src/server.ts b/apps/create-expert-skill/src/server.ts index 5321defa..2bf1b577 100644 --- a/apps/create-expert-skill/src/server.ts +++ b/apps/create-expert-skill/src/server.ts @@ -1,12 +1,16 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" import packageJson from "../package.json" with { type: "json" } +import { registerGetMcpServerDetail } from "./tools/get-mcp-server-detail.js" import { registerRunExpert } from "./tools/run-expert.js" +import { registerSearchMcpRegistry } from "./tools/search-mcp-registry.js" export const SKILL_NAME = packageJson.name export const SKILL_VERSION = packageJson.version export function registerAllTools(server: McpServer): void { registerRunExpert(server) + registerSearchMcpRegistry(server) + registerGetMcpServerDetail(server) } export function createCreateExpertSkillServer(): McpServer { diff --git a/apps/create-expert-skill/src/tools/get-mcp-server-detail.test.ts b/apps/create-expert-skill/src/tools/get-mcp-server-detail.test.ts new file mode 100644 index 00000000..9abc191b --- /dev/null +++ b/apps/create-expert-skill/src/tools/get-mcp-server-detail.test.ts @@ -0,0 +1,245 @@ +import { afterEach, describe, expect, it, mock } from "bun:test" +import { clearCache } from "../lib/mcp-registry-client.js" +import { getMcpServerDetail } from "./get-mcp-server-detail.js" + +const originalFetch = globalThis.fetch + +function mockServerDetail(detail: Record) { + globalThis.fetch = mock(() => + Promise.resolve(new Response(JSON.stringify(detail), { status: 200 })), + ) as typeof fetch +} + +afterEach(() => { + globalThis.fetch = originalFetch + clearCache() +}) + +describe("getMcpServerDetail", () => { + it("maps npm package to mcpStdioSkill", async () => { + mockServerDetail({ + name: "example/npm-server", + description: "An npm-based MCP server", + version: "1.2.0", + packages: [ + { + registry_name: "npm", + name: "@example/mcp-server", + version: "1.2.0", + package_arguments: [], + environment_variables: [ + { name: "API_KEY", description: "API key", required: true, isSecret: true }, + { name: "DEBUG", description: "Debug mode", required: false, isSecret: false }, + ], + }, + ], + remotes: [], + status: "active", + }) + + const result = await getMcpServerDetail({ serverName: "example/npm-server", version: "latest" }) + + expect(result.perstackSkillMapping.type).toBe("mcpStdioSkill") + expect(result.perstackSkillMapping.command).toBe("npx") + expect(result.perstackSkillMapping.packageName).toBe("@example/mcp-server") + expect(result.perstackSkillMapping.requiredEnv).toEqual(["API_KEY"]) + }) + + it("maps SSE remote to mcpSseSkill", async () => { + mockServerDetail({ + name: "example/sse-server", + description: "An SSE-based MCP server", + version: "1.0.0", + packages: [], + remotes: [{ transport_type: "sse", url: "https://api.example.com/mcp" }], + status: "active", + }) + + const result = await getMcpServerDetail({ serverName: "example/sse-server", version: "latest" }) + + expect(result.perstackSkillMapping.type).toBe("mcpSseSkill") + expect(result.perstackSkillMapping.endpoint).toBe("https://api.example.com/mcp") + }) + + it("maps streamable-http remote to mcpSseSkill", async () => { + mockServerDetail({ + name: "example/http-server", + description: "A streamable HTTP MCP server", + version: "1.0.0", + packages: [], + remotes: [{ transport_type: "streamable-http", url: "https://api.example.com/stream" }], + status: "active", + }) + + const result = await getMcpServerDetail({ + serverName: "example/http-server", + version: "latest", + }) + + expect(result.perstackSkillMapping.type).toBe("mcpSseSkill") + expect(result.perstackSkillMapping.endpoint).toBe("https://api.example.com/stream") + }) + + it("prioritizes npm over SSE when both exist", async () => { + mockServerDetail({ + name: "example/dual-server", + description: "Has both npm and SSE", + version: "1.0.0", + packages: [ + { + registry_name: "npm", + name: "@example/dual-server", + version: "1.0.0", + package_arguments: [], + environment_variables: [], + }, + ], + remotes: [{ transport_type: "sse", url: "https://api.example.com/sse" }], + status: "active", + }) + + const result = await getMcpServerDetail({ + serverName: "example/dual-server", + version: "latest", + }) + + expect(result.perstackSkillMapping.type).toBe("mcpStdioSkill") + expect(result.perstackSkillMapping.command).toBe("npx") + }) + + it("marks OCI packages as unsupported", async () => { + mockServerDetail({ + name: "example/oci-server", + description: "An OCI-based server", + version: "1.0.0", + packages: [ + { + registry_name: "oci", + name: "example/oci-image", + version: "1.0.0", + package_arguments: [], + environment_variables: [], + }, + ], + remotes: [], + status: "active", + }) + + const result = await getMcpServerDetail({ serverName: "example/oci-server", version: "latest" }) + + expect(result.perstackSkillMapping.type).toBe("unsupported") + expect(result.perstackSkillMapping.note).toContain("Docker") + }) + + it("marks servers with no supported transport as unsupported", async () => { + mockServerDetail({ + name: "example/bare-server", + description: "No packages or remotes", + version: "1.0.0", + packages: [], + remotes: [], + status: "active", + }) + + const result = await getMcpServerDetail({ + serverName: "example/bare-server", + version: "latest", + }) + + expect(result.perstackSkillMapping.type).toBe("unsupported") + expect(result.perstackSkillMapping.note).toContain("No supported transport") + }) + + it("only includes isSecret env vars in requiredEnv", async () => { + mockServerDetail({ + name: "example/env-server", + description: "Server with env vars", + version: "1.0.0", + packages: [ + { + registry_name: "npm", + name: "@example/env-server", + version: "1.0.0", + package_arguments: [], + environment_variables: [ + { name: "SECRET_KEY", description: "Secret", required: true, isSecret: true }, + { name: "PUBLIC_VAR", description: "Public", required: true, isSecret: false }, + { name: "ANOTHER_SECRET", description: "Another", required: false, isSecret: true }, + ], + }, + ], + remotes: [], + status: "active", + }) + + const result = await getMcpServerDetail({ serverName: "example/env-server", version: "latest" }) + + expect(result.perstackSkillMapping.requiredEnv).toEqual(["SECRET_KEY", "ANOTHER_SECRET"]) + }) + + it("omits requiredEnv when no secret env vars exist", async () => { + mockServerDetail({ + name: "example/no-secrets", + description: "No secrets", + version: "1.0.0", + packages: [ + { + registry_name: "npm", + name: "@example/no-secrets", + version: "1.0.0", + package_arguments: [], + environment_variables: [ + { name: "LOG_LEVEL", description: "Log level", required: false, isSecret: false }, + ], + }, + ], + remotes: [], + status: "active", + }) + + const result = await getMcpServerDetail({ serverName: "example/no-secrets", version: "latest" }) + + expect(result.perstackSkillMapping.type).toBe("mcpStdioSkill") + expect(result.perstackSkillMapping.requiredEnv).toBeUndefined() + }) + + it("includes repository URL when available", async () => { + mockServerDetail({ + name: "example/with-repo", + description: "Has repo", + version: "1.0.0", + repository: { url: "https://github.com/example/repo" }, + packages: [], + remotes: [], + status: "active", + }) + + const result = await getMcpServerDetail({ serverName: "example/with-repo", version: "latest" }) + expect(result.repository).toBe("https://github.com/example/repo") + }) + + it("throws on 404 server not found", async () => { + globalThis.fetch = mock(() => + Promise.resolve(new Response("", { status: 404, statusText: "Not Found" })), + ) as typeof fetch + + await expect( + getMcpServerDetail({ serverName: "nonexistent/server", version: "latest" }), + ).rejects.toThrow("Registry API error: 404") + }) + + it("rejects non-HTTPS SSE remotes", async () => { + mockServerDetail({ + name: "example/insecure", + description: "Has HTTP-only remote", + version: "1.0.0", + packages: [], + remotes: [{ transport_type: "sse", url: "http://insecure.example.com/mcp" }], + status: "active", + }) + + const result = await getMcpServerDetail({ serverName: "example/insecure", version: "latest" }) + + expect(result.perstackSkillMapping.type).toBe("unsupported") + }) +}) diff --git a/apps/create-expert-skill/src/tools/get-mcp-server-detail.ts b/apps/create-expert-skill/src/tools/get-mcp-server-detail.ts new file mode 100644 index 00000000..99b2617e --- /dev/null +++ b/apps/create-expert-skill/src/tools/get-mcp-server-detail.ts @@ -0,0 +1,165 @@ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { dedent } from "ts-dedent" +import { z } from "zod/v4" +import type { + EnvironmentVariable, + Package, + Remote, + ServerDetail, +} from "../lib/mcp-registry-client.js" +import { fetchServerDetail } from "../lib/mcp-registry-client.js" +import { errorToolResult, successToolResult } from "../lib/tool-result.js" + +interface GetDetailInput { + serverName: string + version: string +} + +interface PerstackSkillMapping { + type: "mcpStdioSkill" | "mcpSseSkill" | "unsupported" + command?: string + packageName?: string + endpoint?: string + requiredEnv?: string[] + note?: string +} + +interface GetDetailOutput { + name: string + description: string + version: string + repository?: string + packages: Array<{ + registryName: string + name: string + version: string + arguments: string[] + environmentVariables: Array<{ + name: string + description: string + required: boolean + isSecret: boolean + }> + }> + remotes: Array<{ + transportType: string + url: string + }> + status: string + perstackSkillMapping: PerstackSkillMapping +} + +function findNpmPackage(packages: Package[]): Package | undefined { + return packages.find((p) => p.registry_name === "npm") +} + +function findSseRemote(remotes: Remote[]): Remote | undefined { + return remotes.find( + (r) => + (r.transport_type === "sse" || r.transport_type === "streamable-http") && + r.url.startsWith("https://"), + ) +} + +function extractRequiredEnv(envVars: EnvironmentVariable[]): string[] { + return envVars.filter((v) => v.isSecret).map((v) => v.name) +} + +function buildPerstackSkillMapping(detail: ServerDetail): PerstackSkillMapping { + // Priority 1: npm + stdio + const npmPkg = findNpmPackage(detail.packages) + if (npmPkg) { + const requiredEnv = extractRequiredEnv(npmPkg.environment_variables) + return { + type: "mcpStdioSkill", + command: "npx", + packageName: npmPkg.name || undefined, + requiredEnv: requiredEnv.length > 0 ? requiredEnv : undefined, + } + } + + // Priority 2: SSE / streamable-http remote + const sseRemote = findSseRemote(detail.remotes) + if (sseRemote) { + return { + type: "mcpSseSkill", + endpoint: sseRemote.url, + } + } + + // OCI or no supported transport + const hasOci = detail.packages.some((p) => p.registry_name === "oci") + if (hasOci) { + return { + type: "unsupported", + note: "OCI package — requires manual Docker setup. Not directly supported as a perstack skill.", + } + } + + return { + type: "unsupported", + note: "No supported transport found (npm stdio or SSE/streamable-http remote).", + } +} + +export async function getMcpServerDetail(input: GetDetailInput): Promise { + const detail = await fetchServerDetail(input.serverName, input.version) + + return { + name: detail.name, + description: detail.description, + version: detail.version, + repository: detail.repository?.url || undefined, + packages: detail.packages.map((p) => ({ + registryName: p.registry_name, + name: p.name, + version: p.version, + arguments: p.package_arguments, + environmentVariables: p.environment_variables.map((e) => ({ + name: e.name, + description: e.description, + required: e.required, + isSecret: e.isSecret, + })), + })), + remotes: detail.remotes.map((r) => ({ + transportType: r.transport_type, + url: r.url, + })), + status: detail.status, + perstackSkillMapping: buildPerstackSkillMapping(detail), + } +} + +export function registerGetMcpServerDetail(server: McpServer) { + server.registerTool( + "getMcpServerDetail", + { + title: "Get MCP Server Detail", + description: dedent` + Get detailed information about a specific MCP server from the registry. + Returns server metadata, packages, remotes, and a perstackSkillMapping + that maps the server to the appropriate perstack skill type + (mcpStdioSkill for npm packages, mcpSseSkill for SSE/streamable-http remotes). + `, + inputSchema: { + serverName: z + .string() + .describe("Server name from the registry (e.g. 'github/github-mcp-server')"), + version: z + .string() + .optional() + .default("latest") + .describe("Server version (default: 'latest')"), + }, + }, + async (input: GetDetailInput) => { + try { + return successToolResult(await getMcpServerDetail(input)) + } catch (e) { + if (e instanceof Error) return errorToolResult(e) + throw e + } + }, + ) +} diff --git a/apps/create-expert-skill/src/tools/search-mcp-registry.test.ts b/apps/create-expert-skill/src/tools/search-mcp-registry.test.ts new file mode 100644 index 00000000..6a350471 --- /dev/null +++ b/apps/create-expert-skill/src/tools/search-mcp-registry.test.ts @@ -0,0 +1,128 @@ +import { afterEach, describe, expect, it, mock } from "bun:test" +import { clearCache } from "../lib/mcp-registry-client.js" +import { searchMcpRegistry } from "./search-mcp-registry.js" + +const originalFetch = globalThis.fetch + +function mockRegistryWithServers( + servers: Array<{ name: string; description: string; status?: string }>, +) { + globalThis.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + servers: servers.map((s) => ({ + name: s.name, + description: s.description, + version_detail: { version: "1.0.0", status: s.status ?? "active" }, + })), + }), + { status: 200 }, + ), + ), + ) as typeof fetch +} + +afterEach(() => { + globalThis.fetch = originalFetch + clearCache() +}) + +describe("searchMcpRegistry", () => { + it("returns servers matching all keywords (AND match)", async () => { + mockRegistryWithServers([ + { name: "github-mcp", description: "GitHub integration for MCP" }, + { name: "slack-mcp", description: "Slack integration for MCP" }, + { name: "github-slack", description: "GitHub and Slack bridge" }, + ]) + + const result = await searchMcpRegistry({ query: "github", maxResults: 20 }) + expect(result.servers).toHaveLength(2) + expect(result.servers.map((s) => s.name)).toContain("github-mcp") + expect(result.servers.map((s) => s.name)).toContain("github-slack") + }) + + it("performs case-insensitive matching", async () => { + mockRegistryWithServers([{ name: "GitHub-Server", description: "A GitHub MCP server" }]) + + const result = await searchMcpRegistry({ query: "GITHUB", maxResults: 20 }) + expect(result.servers).toHaveLength(1) + expect(result.servers[0].name).toBe("GitHub-Server") + }) + + it("ranks name matches higher than description-only matches", async () => { + mockRegistryWithServers([ + { name: "plain-server", description: "Provides slack integration" }, + { name: "slack-mcp", description: "A messaging tool" }, + ]) + + const result = await searchMcpRegistry({ query: "slack", maxResults: 20 }) + expect(result.servers).toHaveLength(2) + expect(result.servers[0].name).toBe("slack-mcp") + expect(result.servers[1].name).toBe("plain-server") + }) + + it("excludes deprecated servers", async () => { + mockRegistryWithServers([ + { name: "active-server", description: "Active tool", status: "active" }, + { name: "old-server", description: "Active but old tool", status: "deprecated" }, + ]) + + const result = await searchMcpRegistry({ query: "tool", maxResults: 20 }) + expect(result.servers).toHaveLength(1) + expect(result.servers[0].name).toBe("active-server") + }) + + it("returns empty results for empty query", async () => { + mockRegistryWithServers([{ name: "server", description: "A server" }]) + + const result = await searchMcpRegistry({ query: " ", maxResults: 20 }) + expect(result.servers).toHaveLength(0) + expect(result.matchCount).toBe(0) + }) + + it("limits results to maxResults", async () => { + mockRegistryWithServers([ + { name: "a-tool", description: "tool alpha" }, + { name: "b-tool", description: "tool beta" }, + { name: "c-tool", description: "tool gamma" }, + ]) + + const result = await searchMcpRegistry({ query: "tool", maxResults: 2 }) + expect(result.servers).toHaveLength(2) + expect(result.matchCount).toBe(3) + }) + + it("returns totalFetched count", async () => { + mockRegistryWithServers([ + { name: "server-a", description: "A" }, + { name: "server-b", description: "B" }, + ]) + + const result = await searchMcpRegistry({ query: "server-a", maxResults: 20 }) + expect(result.totalFetched).toBe(2) + expect(result.servers).toHaveLength(1) + }) + + it("handles registry error gracefully", async () => { + globalThis.fetch = mock(() => + Promise.resolve(new Response("", { status: 503, statusText: "Service Unavailable" })), + ) as typeof fetch + + await expect(searchMcpRegistry({ query: "test", maxResults: 20 })).rejects.toThrow( + "Registry API error", + ) + }) + + it("applies AND matching with multiple keywords", async () => { + mockRegistryWithServers([ + { name: "file-system", description: "Read and write files" }, + { name: "file-search", description: "Search through documents" }, + { name: "system-monitor", description: "Monitor system health" }, + ]) + + const result = await searchMcpRegistry({ query: "file system", maxResults: 20 }) + expect(result.servers).toHaveLength(1) + expect(result.servers[0].name).toBe("file-system") + }) +}) diff --git a/apps/create-expert-skill/src/tools/search-mcp-registry.ts b/apps/create-expert-skill/src/tools/search-mcp-registry.ts new file mode 100644 index 00000000..36905976 --- /dev/null +++ b/apps/create-expert-skill/src/tools/search-mcp-registry.ts @@ -0,0 +1,112 @@ +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { dedent } from "ts-dedent" +import { z } from "zod/v4" +import type { ServerListItem } from "../lib/mcp-registry-client.js" +import { fetchAllServers } from "../lib/mcp-registry-client.js" +import { errorToolResult, successToolResult } from "../lib/tool-result.js" + +const MAX_RESULTS_LIMIT = 50 +const DEFAULT_MAX_RESULTS = 20 + +interface SearchInput { + query: string + maxResults: number +} + +interface SearchResultItem { + name: string + description: string + version: string + status: string +} + +interface SearchOutput { + servers: SearchResultItem[] + totalFetched: number + matchCount: number +} + +function matchesKeywords(text: string, keywords: string[]): boolean { + const lower = text.toLowerCase() + return keywords.every((kw) => lower.includes(kw)) +} + +function scoreMatch(server: ServerListItem, keywords: string[]): number { + const nameLower = server.name.toLowerCase() + const nameMatches = keywords.filter((kw) => nameLower.includes(kw)).length + return nameMatches +} + +export async function searchMcpRegistry(input: SearchInput): Promise { + const allServers = await fetchAllServers() + + const keywords = input.query + .toLowerCase() + .split(/\s+/) + .filter((k) => k.length > 0) + + if (keywords.length === 0) { + return { servers: [], totalFetched: allServers.length, matchCount: 0 } + } + + const matched = allServers.filter((s) => { + const status = s.version_detail?.status ?? "" + if (status === "deprecated") return false + + const searchText = `${s.name} ${s.description}` + return matchesKeywords(searchText, keywords) + }) + + matched.sort((a, b) => { + const scoreA = scoreMatch(a, keywords) + const scoreB = scoreMatch(b, keywords) + return scoreB - scoreA + }) + + const limited = matched.slice(0, input.maxResults) + + return { + servers: limited.map((s) => ({ + name: s.name, + description: s.description, + version: s.version_detail?.version ?? "", + status: s.version_detail?.status ?? "", + })), + totalFetched: allServers.length, + matchCount: matched.length, + } +} + +export function registerSearchMcpRegistry(server: McpServer) { + server.registerTool( + "searchMcpRegistry", + { + title: "Search MCP Registry", + description: dedent` + Search the official MCP registry for MCP servers by keywords. + Uses space-separated AND matching against server name and description. + Returns matching servers sorted by relevance (name matches ranked higher). + Deprecated servers are excluded from results. + `, + inputSchema: { + query: z.string().describe("Space-separated keywords to search for (AND matching)"), + maxResults: z + .number() + .optional() + .default(DEFAULT_MAX_RESULTS) + .describe( + `Maximum results to return (default ${DEFAULT_MAX_RESULTS}, max ${MAX_RESULTS_LIMIT})`, + ), + }, + }, + async (input: SearchInput) => { + try { + const clamped = { ...input, maxResults: Math.min(input.maxResults, MAX_RESULTS_LIMIT) } + return successToolResult(await searchMcpRegistry(clamped)) + } catch (e) { + if (e instanceof Error) return errorToolResult(e) + throw e + } + }, + ) +} diff --git a/apps/create-expert/bin/cli.ts b/apps/create-expert/bin/cli.ts index 64abae1b..bbd243a3 100644 --- a/apps/create-expert/bin/cli.ts +++ b/apps/create-expert/bin/cli.ts @@ -1,6 +1,7 @@ #!/usr/bin/env bun import { readFileSync } from "node:fs" +import { fileURLToPath } from "node:url" import { PerstackError } from "@perstack/core" import { findLockfile, loadLockfile, parsePerstackConfig } from "@perstack/perstack-toml" import { runHandler, startHandler } from "@perstack/tui" @@ -48,6 +49,19 @@ new Command() .option("-i, --interactive-tool-call-result", "Query is interactive tool call result") .action(async (query: string | undefined, options: Record) => { const config = parsePerstackConfig(readFileSync(tomlPath, "utf-8")) + + // Resolve create-expert-skill server path to absolute so it works regardless of CWD. + // tomlPath resolves to dist/perstack.toml from compiled CLI (dist/bin/cli.js). + // ../../ goes: dist/ → create-expert/ → apps/, then into create-expert-skill/. + const skillServerPath = fileURLToPath( + new URL("../../create-expert-skill/dist/bin/server.js", tomlPath), + ) + const skillFinderSkill = + config.experts?.["@create-expert/skill-finder"]?.skills?.["@perstack/create-expert-skill"] + if (skillFinderSkill && skillFinderSkill.type === "mcpStdioSkill") { + skillFinderSkill.args = [skillServerPath] + } + const lockfilePath = findLockfile() const lockfile = lockfilePath ? (loadLockfile(lockfilePath) ?? undefined) : undefined const additionalEnv = (env: Record) => { diff --git a/apps/create-expert/perstack.toml b/apps/create-expert/perstack.toml index c8e87ef3..fd8ed00e 100644 --- a/apps/create-expert/perstack.toml +++ b/apps/create-expert/perstack.toml @@ -14,6 +14,7 @@ Delegate to your specialists and pass file paths between them. Do not read or in ## Delegates - @create-expert/planner — designs expert architectures, writes plan to plan.md +- @create-expert/skill-finder — searches MCP registry for relevant skills, writes skill-report.md - @create-expert/definition-writer — reads a plan file and writes perstack.toml - @create-expert/expert-tester — tests a perstack.toml by exercising each expert @@ -21,12 +22,14 @@ Delegate to your specialists and pass file paths between them. Do not read or in 1. If a perstack.toml already exists in the workspace, note its path 2. Delegate to planner: pass the user's request and the perstack.toml path if one exists -3. Delegate to definition-writer: tell it to read plan.md and write perstack.toml -4. Delegate to expert-tester: tell it to test perstack.toml -5. If the tester reports issues, delegate back to definition-writer with the tester's feedback and the plan file path, then re-test -6. attemptCompletion with a summary of what was created +3. If the plan includes MCP skill requirements (external API/service integrations), delegate to skill-finder: pass the plan.md path so it can search the MCP registry and write skill-report.md +4. Delegate to definition-writer: tell it to read plan.md AND skill-report.md (if step 3 ran) and write perstack.toml. IMPORTANT: explicitly include the skill-report.md path in the delegation message so the definition-writer knows to incorporate MCP skills from it. +5. Delegate to expert-tester: tell it to test perstack.toml +6. If the tester reports issues, delegate back to definition-writer with the tester's feedback and the plan file path, then re-test +7. If skill-report.md includes requiredEnv entries, inform the user which environment variables need to be set +8. attemptCompletion with a summary of what was created """ -delegates = ["@create-expert/planner", "@create-expert/definition-writer", "@create-expert/expert-tester"] +delegates = ["@create-expert/planner", "@create-expert/skill-finder", "@create-expert/definition-writer", "@create-expert/expert-tester"] [experts."create-expert".skills."@perstack/base"] type = "mcpStdioSkill" @@ -99,6 +102,13 @@ Experts must produce real, usable output — not ceremony. A programming expert 5. Consider what tools each expert needs (minimal set) 6. Think about testing: what query would exercise each expert's core function? +## Skill Requirements + +When the expert needs to interact with external APIs or services (e.g., GitHub, Slack, databases, cloud providers), include a "Skill Requirements" section in the plan: +- List the external integrations needed +- Suggest search keywords for the MCP registry (try multiple variations) +- Include fallback approaches if no suitable MCP server is found (e.g., using exec with CLI tools, direct API calls) + ## Output Write your design to plan.md with the following sections: @@ -109,6 +119,7 @@ Write your design to plan.md with the following sections: 4. **Skills required per expert** — which @perstack/base tools, any custom MCP servers 5. **Delegation structure** — who delegates to whom, with rationale 6. **Test scenario for each expert** — a concrete, realistic query that exercises the expert's core function +7. **MCP skill requirements** (if applicable) — external integrations needed, search keywords, fallback approaches After writing the file, attemptCompletion with the file path. """ @@ -119,11 +130,74 @@ command = "npx" packageName = "@perstack/base" pick = ["readTextFile", "writeTextFile", "exec", "todo", "attemptCompletion"] +[experts."@create-expert/skill-finder"] +version = "1.0.0" +description = """ +Searches the MCP registry for MCP servers that match expert skill requirements. Provide: path to plan.md \ +containing a "Skill Requirements" section. Writes findings to skill-report.md with TOML configuration snippets. +""" +instruction = """ +You are an MCP skill researcher. Your job is to find and evaluate MCP servers from the official registry that can serve as skills for Perstack experts. + +## Investigation Process + +1. Read the plan.md file to identify the "Skill Requirements" section +2. For each required integration, search the MCP registry using multiple keyword variations (e.g., for GitHub: "github", "git", "github api") +3. For promising candidates, get detailed server information +4. Verify npm package availability using exec: `npm info --json` — check that the package exists, note version and weekly downloads +5. Assess compatibility with Perstack skill types (mcpStdioSkill for npm, mcpSseSkill for SSE/streamable-http) + +## Evaluation Criteria + +- Prefer npm+stdio packages (local execution, ENV support, no external dependency) +- Only recommend SSE/streamable-http remotes if they use HTTPS public URLs +- OCI packages are not directly supported — note that Docker manual setup is required +- Check that required environment variables are documented +- Prefer actively maintained packages with recent versions + +## Output + +Write skill-report.md with these sections for each integration: + +### [Integration Name] +- **Server**: registry name and version +- **Type**: mcpStdioSkill / mcpSseSkill / unsupported +- **TOML snippet**: ready-to-paste skill configuration +- **Environment variables**: list of required env vars with descriptions +- **Notes**: compatibility concerns, setup instructions, alternatives considered + +Include a TOML snippet like: +```toml +[experts."expert-name".skills."skill-key"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@scope/package-name" +requiredEnv = ["API_KEY"] +``` + +If no suitable MCP server is found, document the fallback approach (e.g., using exec with CLI tools). + +After writing skill-report.md, attemptCompletion with the file path. +""" + +[experts."@create-expert/skill-finder".skills."@perstack/base"] +type = "mcpStdioSkill" +command = "npx" +packageName = "@perstack/base" +pick = ["readTextFile", "writeTextFile", "exec", "todo", "attemptCompletion"] + +# When published to npm, use: command = "npx", packageName = "@perstack/create-expert-skill" +[experts."@create-expert/skill-finder".skills."@perstack/create-expert-skill"] +type = "mcpStdioSkill" +command = "bun" +args = ["./apps/create-expert-skill/dist/bin/server.js"] +pick = ["searchMcpRegistry", "getMcpServerDetail"] + [experts."@create-expert/definition-writer"] version = "1.0.0" description = """ Writes Perstack expert definitions in perstack.toml from a design plan. Provide: path to the plan file (e.g. plan.md). \ -Optionally include feedback from a previous test round to address. +Optionally include: (1) feedback from a previous test round to address, (2) path to skill-report.md with MCP registry findings. """ instruction = """ You are a Perstack definition writer. Read a design plan file and write the corresponding perstack.toml. @@ -187,13 +261,27 @@ pick = ["tool1", "tool2"] # optional, include specific tools - Always include version, description, instruction for each expert - Produce valid TOML — no syntax errors +## MCP Registry Skills + +If a skill-report.md path is provided in the query, read it for MCP registry findings. The report contains: +- Recommended MCP servers with TOML configuration snippets +- Environment variables that need to be set (requiredEnv) +- Compatibility notes and fallback suggestions + +To incorporate MCP skills into the expert definition: +- Copy the TOML skill configuration snippets from the report into the appropriate expert's skills section +- Use a descriptive skill key (e.g., `"@github/github-mcp-server"`) +- Include any requiredEnv from the report +- If the report recommends a fallback (exec-based), use that instead + ## Process 1. Read the plan file specified in the query -2. If a perstack.toml already exists, read it first. You MUST preserve ALL existing expert definitions exactly as they are — only add or modify experts described in the plan. -3. Write the perstack.toml with both the preserved existing experts AND the new expert definitions from the plan -4. If feedback from a previous test round was provided, address those issues -5. attemptCompletion when the perstack.toml has been written +2. If a skill-report.md path was provided, read it for MCP skill configurations +3. If a perstack.toml already exists, read it first. You MUST preserve ALL existing expert definitions exactly as they are — only add or modify experts described in the plan. +4. Write the perstack.toml with both the preserved existing experts AND the new expert definitions from the plan and skill report +5. If feedback from a previous test round was provided, address those issues +6. attemptCompletion when the perstack.toml has been written """ [experts."@create-expert/definition-writer".skills."@perstack/base"] diff --git a/bun.lock b/bun.lock index af1764bf..210956ac 100644 --- a/bun.lock +++ b/bun.lock @@ -17,7 +17,7 @@ }, "apps/base": { "name": "@perstack/base", - "version": "0.0.64", + "version": "0.0.65", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", "@perstack/core": "workspace:*", @@ -32,7 +32,7 @@ }, "apps/create-expert": { "name": "create-expert", - "version": "0.0.40", + "version": "0.0.41", "bin": { "create-expert": "bin/cli.ts", }, @@ -67,7 +67,7 @@ }, "apps/perstack": { "name": "perstack", - "version": "0.0.93", + "version": "0.0.94", "dependencies": { "commander": "^14.0.3", }, @@ -110,7 +110,7 @@ }, "packages/installer": { "name": "@perstack/installer", - "version": "0.0.16", + "version": "0.0.17", "dependencies": { "@perstack/api-client": "^0.0.56", "@perstack/core": "workspace:*", @@ -306,7 +306,7 @@ }, "packages/runtime": { "name": "@perstack/runtime", - "version": "0.0.113", + "version": "0.0.114", "dependencies": { "@ai-sdk/amazon-bedrock": "^4.0.60", "@ai-sdk/anthropic": "^3.0.44", @@ -342,7 +342,7 @@ }, "packages/skill-manager": { "name": "@perstack/skill-manager", - "version": "0.0.10", + "version": "0.0.11", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", "@paralleldrive/cuid2": "^3.3.0", @@ -627,17 +627,17 @@ "@pkgjs/parseargs": ["@pkgjs/parseargs@0.11.0", "", {}, "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg=="], - "@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.8", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.12.0", "@smithy/util-hex-encoding": "^4.2.0", "tslib": "^2.6.2" } }, "sha512-jS/O5Q14UsufqoGhov7dHLOPCzkYJl9QDzusI2Psh4wyYx/izhzvX9P4D69aTxcdfVhEPhjK+wYyn/PzLjKbbw=="], + "@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.9", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.12.1", "@smithy/util-hex-encoding": "^4.2.1", "tslib": "^2.6.2" } }, "sha512-8/wOb1wm/joXCj6SNHRFnfcNBR4xmumw869UnM+RrjoWeliNcTnOTw2WZXBWoKfszbL/v/AxdijIilqRMst+vA=="], - "@smithy/is-array-buffer": ["@smithy/is-array-buffer@4.2.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-DZZZBvC7sjcYh4MazJSGiWMI2L7E0oCiRHREDzIxi/M2LY79/21iXt6aPLHge82wi5LsuRF5A06Ds3+0mlh6CQ=="], + "@smithy/is-array-buffer": ["@smithy/is-array-buffer@4.2.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-Yfu664Qbf1B4IYIsYgKoABt010daZjkaCRvdU/sPnZG6TtHOB0md0RjNdLGzxe5UIdn9js4ftPICzmkRa9RJ4Q=="], - "@smithy/types": ["@smithy/types@4.12.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-9YcuJVTOBDjg9LWo23Qp0lTQ3D7fQsQtwle0jVfpbUHy9qBwCEgKuVH4FqFB3VYu0nwdHKiEMA+oXz7oV8X1kw=="], + "@smithy/types": ["@smithy/types@4.12.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-ow30Ze/DD02KH2p0eMyIF2+qJzGyNb0kFrnTRtPpuOkQ4hrgvLdaU4YC6r/K8aOrCML4FH0Cmm0aI4503L1Hwg=="], - "@smithy/util-buffer-from": ["@smithy/util-buffer-from@4.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^4.2.0", "tslib": "^2.6.2" } }, "sha512-kAY9hTKulTNevM2nlRtxAG2FQ3B2OR6QIrPY3zE5LqJy1oxzmgBGsHLWTcNhWXKchgA0WHW+mZkQrng/pgcCew=="], + "@smithy/util-buffer-from": ["@smithy/util-buffer-from@4.2.1", "", { "dependencies": { "@smithy/is-array-buffer": "^4.2.1", "tslib": "^2.6.2" } }, "sha512-/swhmt1qTiVkaejlmMPPDgZhEaWb/HWMGRBheaxwuVkusp/z+ErJyQxO6kaXumOciZSWlmq6Z5mNylCd33X7Ig=="], - "@smithy/util-hex-encoding": ["@smithy/util-hex-encoding@4.2.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-CCQBwJIvXMLKxVbO88IukazJD9a4kQ9ZN7/UMGBjBcJYvatpWk+9g870El4cB8/EJxfe+k+y0GmR9CAzkF+Nbw=="], + "@smithy/util-hex-encoding": ["@smithy/util-hex-encoding@4.2.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-c1hHtkgAWmE35/50gmdKajgGAKV3ePJ7t6UtEmpfCWJmQE9BQAQPz0URUVI89eSkcDqCtzqllxzG28IQoZPvwA=="], - "@smithy/util-utf8": ["@smithy/util-utf8@4.2.0", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.0", "tslib": "^2.6.2" } }, "sha512-zBPfuzoI8xyBtR2P6WQj63Rz8i3AmfAaJLuNG8dWsfvPe8lO4aCPYLn879mEgHndZH1zQ2oXmG8O1GGzzaoZiw=="], + "@smithy/util-utf8": ["@smithy/util-utf8@4.2.1", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.1", "tslib": "^2.6.2" } }, "sha512-DSIwNaWtmzrNQHv8g7DBGR9mulSit65KSj5ymGEIAknmIN8IpbZefEep10LaMG/P/xquwbmJ1h9ectz8z6mV6g=="], "@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="], @@ -883,7 +883,7 @@ "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="], - "hono": ["hono@4.12.1", "", {}, "sha512-hi9afu8g0lfJVLolxElAZGANCTTl6bewIdsRNhaywfP9K8BPf++F2z6OLrYGIinUwpRKzbZHMhPwvc0ZEpAwGw=="], + "hono": ["hono@4.12.2", "", {}, "sha512-gJnaDHXKDayjt8ue0n8Gs0A007yKXj4Xzb8+cNjZeYsSzzwKc0Lr+OZgYwVfB0pHfUs17EPoLvrOsEaJ9mj+Tg=="], "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="], diff --git a/e2e/create-expert/create-expert.test.ts b/e2e/create-expert/create-expert.test.ts index 0919b761..b92a72f0 100644 --- a/e2e/create-expert/create-expert.test.ts +++ b/e2e/create-expert/create-expert.test.ts @@ -3,6 +3,8 @@ * * Tests the create-expert agent that creates/modifies perstack.toml files: * - Creates new expert definitions via planner + definition-writer + expert-tester delegates + * - Investigates MCP registry skills via skill-finder delegate when external integrations needed + * - Adds discovered MCP skills to generated perstack.toml expert definitions * - Tests experts via addDelegateFromConfig after writing perstack.toml * - Preserves existing experts when modifying perstack.toml * @@ -23,9 +25,9 @@ const PROJECT_ROOT = path.resolve(process.cwd()) const CLI_PATH = path.join(PROJECT_ROOT, "apps/create-expert/dist/bin/cli.js") // LLM API calls require extended timeout; delegation adds extra LLM round-trips. // The create-expert workflow involves multiple delegation round-trips (planner → -// definition-writer → expert-tester, with possible retries) which can exceed -// 7 minutes in CI environments. -const LLM_TIMEOUT = 600_000 +// skill-finder → definition-writer → expert-tester, with possible retries) which +// can exceed 10 minutes in CI environments. +const LLM_TIMEOUT = 900_000 function runCreateExpert(query: string, cwd: string, timeout = LLM_TIMEOUT): Promise { const args = injectProviderArgs(["--headless", query]) @@ -85,12 +87,13 @@ function diagnostics(result: RunResult): string { describe("create-expert", () => { it( - "should create a new perstack.toml", + "should create a new perstack.toml with MCP skill integration", async () => { const tempDir = createTempDir() + // Request an expert that requires external API integration to trigger skill-finder const result = await runCreateExpert( - "Create a simple hello-world expert that greets the user", + "Create a GitHub repository analyzer expert that reads GitHub issues and pull requests via the GitHub API to generate project status reports", tempDir, ) @@ -114,14 +117,26 @@ describe("create-expert", () => { completeEvents.some((e) => (e as { expertKey: string }).expertKey === "create-expert"), ).toBe(true) - // Verify delegation: at least 2 completeRun (delegate + coordinator) - expect(completeEvents.length).toBeGreaterThanOrEqual(2) + // Verify delegation: at least 3 completeRun (planner + skill-finder + definition-writer/tester + coordinator) + expect(completeEvents.length).toBeGreaterThanOrEqual(3) // Verify definition-writer writes TOML and expert-tester tests via addDelegateFromConfig const toolNames = getAllCalledToolNames(result) expect(toolNames).toContain("writeTextFile") expect(toolNames).toContain("addDelegateFromConfig") + // Verify skill investigation: skill-finder searched the MCP registry + expect(toolNames, "searchMcpRegistry should be called by skill-finder").toContain( + "searchMcpRegistry", + ) + + // Verify skill-report.md was created (skill-finder output) + const skillReportPath = path.join(tempDir, "skill-report.md") + expect( + fs.existsSync(skillReportPath), + "skill-report.md should be created by skill-finder", + ).toBe(true) + // Verify perstack.toml was created with at least one expert definition const tomlPath = path.join(tempDir, "perstack.toml") expect(fs.existsSync(tomlPath)).toBe(true) @@ -129,6 +144,17 @@ describe("create-expert", () => { const expertMatches = tomlContent.match(/\[experts\."[^"]+"\]/g) expect(expertMatches).not.toBeNull() expect(expertMatches!.length).toBeGreaterThanOrEqual(1) + + // Verify skill addition: at least one expert has a non-base skill (MCP integration). + // This depends on the LLM correctly forwarding skill-report.md through the delegation + // chain (coordinator → definition-writer), which can be non-deterministic with smaller models. + const nonBaseSkillMatches = tomlContent.match( + /\[experts\."[^"]+".skills\."(?!@perstack\/base")[^"]+"\]/g, + ) + expect( + nonBaseSkillMatches && nonBaseSkillMatches.length > 0, + "at least one expert should have a non-base MCP skill", + ).toBe(true) }, LLM_TIMEOUT, )