From 7462fab7b14890a8af8f0d11dd43e881cdef06b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Beteg=C3=B3n?= Date: Tue, 7 Apr 2026 17:29:50 +0200 Subject: [PATCH] refactor(eval): replace OpenAI with Anthropic SDK in init-eval judge Standardizes on a single LLM provider across all evals. The skill-eval already used Anthropic; now init-eval does too, removing the openai dependency entirely. Co-Authored-By: Claude Opus 4.6 (1M context) --- bun.lock | 3 --- package.json | 1 - test/init-eval/helpers/judge.ts | 19 ++++++++++--------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/bun.lock b/bun.lock index 12f4b2751..0dd76ba4f 100644 --- a/bun.lock +++ b/bun.lock @@ -28,7 +28,6 @@ "http-cache-semantics": "^4.2.0", "ignore": "^7.0.5", "marked": "^15", - "openai": "^6.22.0", "p-limit": "^7.2.0", "picomatch": "^4.0.3", "pretty-ms": "^9.3.0", @@ -502,8 +501,6 @@ "once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="], - "openai": ["openai@6.25.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-mEh6VZ2ds2AGGokWARo18aPISI1OhlgdEIC1ewhkZr8pSIT31dec0ecr9Nhxx0JlybyOgoAT1sWeKtwPZzJyww=="], - "openapi-types": ["openapi-types@12.1.3", "", {}, "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw=="], "p-limit": ["p-limit@7.2.0", "", { "dependencies": { "yocto-queue": "^1.2.1" } }, "sha512-ATHLtwoTNDloHRFFxFJdHnG6n2WUeFjaR8XQMFdKIv0xkXjrER8/iG9iu265jOM95zXHAfv9oTkqhrfbIzosrQ=="], diff --git a/package.json b/package.json index a18d73c4a..12faa5ae4 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,6 @@ "http-cache-semantics": "^4.2.0", "ignore": "^7.0.5", "marked": "^15", - "openai": "^6.22.0", "p-limit": "^7.2.0", "picomatch": "^4.0.3", "pretty-ms": "^9.3.0", diff --git a/test/init-eval/helpers/judge.ts b/test/init-eval/helpers/judge.ts index 4b8ee863d..103839021 100644 --- a/test/init-eval/helpers/judge.ts +++ b/test/init-eval/helpers/judge.ts @@ -17,7 +17,7 @@ export type JudgeVerdict = { /** * Use an LLM judge to evaluate whether a **single feature** was correctly set - * up by the wizard. Returns null if OPENAI_API_KEY is not set. + * up by the wizard. Returns null if ANTHROPIC_API_KEY is not set. * * `docsContent` is the pre-fetched plain-text documentation to include as * ground truth in the prompt. @@ -28,16 +28,16 @@ export async function judgeFeature( feature: FeatureDoc, docsContent: string ): Promise { - const apiKey = process.env.OPENAI_API_KEY; + const apiKey = process.env.ANTHROPIC_API_KEY; if (!apiKey) { console.log( - ` [judge:${feature.feature}] Skipping LLM judge (no OPENAI_API_KEY set)` + ` [judge:${feature.feature}] Skipping LLM judge (no ANTHROPIC_API_KEY set)` ); return null; } // Restore real fetch — test preload mocks it to catch accidental network - // calls, but we need real HTTP for the OpenAI API. + // calls, but we need real HTTP for the Anthropic API. const realFetch = (globalThis as { __originalFetch?: typeof fetch }) .__originalFetch; if (realFetch) { @@ -45,8 +45,8 @@ export async function judgeFeature( } // Dynamic import so we don't fail when the package isn't installed - const { default: OpenAI } = await import("openai"); - const client = new OpenAI({ apiKey }); + const { default: Anthropic } = await import("@anthropic-ai/sdk"); + const client = new Anthropic({ apiKey }); const newFilesSection = Object.entries(result.newFiles) .map(([path, content]) => `### ${path}\n\`\`\`\n${content}\n\`\`\``) @@ -86,13 +86,14 @@ Return ONLY valid JSON with this structure: "summary": "Brief overall assessment of ${feature.feature} setup" }`; - const response = await client.chat.completions.create({ - model: "gpt-4o", + const response = await client.messages.create({ + model: "claude-sonnet-4-6", max_tokens: 1024, messages: [{ role: "user", content: prompt }], }); - const text = response.choices[0]?.message?.content ?? ""; + const textBlock = response.content.find((b) => b.type === "text"); + const text = textBlock?.text ?? ""; // Extract JSON from response (handle markdown code blocks) const jsonMatch = text.match(/\{[\s\S]*\}/);