From fa2d38cd167c8c35ec6d911403b50f09b43bde7e Mon Sep 17 00:00:00 2001 From: oluwatooki-GA Date: Tue, 18 Nov 2025 11:44:20 +0100 Subject: [PATCH 1/2] Fix technical accuracy evaluator not running - Add evaluator: technical-accuracy field to hallucination-detector prompt - Fix shallow copy bug in TechnicalAccuracyEvaluator that prevented verification results from persisting - Verification results now properly show [supported/unsupported/unverifiable] status with justifications and source URLs --- prompts/hallucination-detector.md | 1 + src/evaluators/technical-accuracy-evaluator.ts | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/prompts/hallucination-detector.md b/prompts/hallucination-detector.md index 7160548..d9222e1 100644 --- a/prompts/hallucination-detector.md +++ b/prompts/hallucination-detector.md @@ -1,5 +1,6 @@ --- specVersion: 1.0.0 +evaluator: technical-accuracy threshold: 12 severity: error name: Hallucination Detector diff --git a/src/evaluators/technical-accuracy-evaluator.ts b/src/evaluators/technical-accuracy-evaluator.ts index 48a64b9..8585090 100644 --- a/src/evaluators/technical-accuracy-evaluator.ts +++ b/src/evaluators/technical-accuracy-evaluator.ts @@ -36,16 +36,15 @@ export class TechnicalAccuracyEvaluator extends BaseEvaluator { async evaluate(_file: string, content: string): Promise { // Step 1: Run base LLM evaluation const schema = buildCriteriaJsonSchema(); - const baseResult = await this.llmProvider.runPromptStructured( + const result = await this.llmProvider.runPromptStructured( content, this.prompt.body, schema ); // Step 2: Verify each violation with web search - const verifiedResult = { ...baseResult }; - - for (const criterion of verifiedResult.criteria) { + // Mutate in place since we're returning this result anyway + for (const criterion of result.criteria) { for (const violation of criterion.violations) { if (!violation.analysis || violation.analysis.trim().length < MIN_CLAIM_LENGTH) { continue; // Skip non-factual violations @@ -61,7 +60,7 @@ export class TechnicalAccuracyEvaluator extends BaseEvaluator { } } - return verifiedResult; + return result; } private async verifyFact(claim: string): Promise { From fa58eb7f8e6fbeaaa81f00df9911552a1a8a009e Mon Sep 17 00:00:00 2001 From: Ayomide Date: Tue, 25 Nov 2025 11:06:57 +0100 Subject: [PATCH 2/2] feat: Add TechnicalAccuracyEvaluator to verify factual claims using web search and LLM. --- src/evaluators/technical-accuracy-evaluator.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluators/technical-accuracy-evaluator.ts b/src/evaluators/technical-accuracy-evaluator.ts index 7c02732..dd3b19a 100644 --- a/src/evaluators/technical-accuracy-evaluator.ts +++ b/src/evaluators/technical-accuracy-evaluator.ts @@ -166,7 +166,7 @@ ${snippets.map((s, i) => `[${i + 1}] ${s.snippet} (${s.url})`).join('\n')} Respond ONLY in JSON: { "status": "supported|unsupported|unverifiable", - "justification": "brief reason (max 25 words)", + "justification": "brief reason (max 10 words)", "link": "most relevant supporting or contradicting source if available" } `;