Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions prompts/hallucination-detector.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
specVersion: 1.0.0
evaluator: technical-accuracy
threshold: 12
severity: error
name: Hallucination Detector
Expand Down
11 changes: 5 additions & 6 deletions src/evaluators/technical-accuracy-evaluator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,15 @@ export class TechnicalAccuracyEvaluator extends BaseEvaluator {
async evaluate(_file: string, content: string): Promise<CriteriaResult> {
// Step 1: Run base LLM evaluation
const schema = buildCriteriaJsonSchema();
const baseResult = await this.llmProvider.runPromptStructured<CriteriaResult>(
const result = await this.llmProvider.runPromptStructured<CriteriaResult>(
content,
this.prompt.body,
schema
);

// Step 2: Verify each violation with web search
const verifiedResult = { ...baseResult };

for (const criterion of verifiedResult.criteria) {
// Mutate in place since we're returning this result anyway
for (const criterion of result.criteria) {
for (const violation of criterion.violations) {
if (!violation.analysis || violation.analysis.trim().length < MIN_CLAIM_LENGTH) {
continue; // Skip non-factual violations
Expand All @@ -61,7 +60,7 @@ export class TechnicalAccuracyEvaluator extends BaseEvaluator {
}
}

return verifiedResult;
return result;
}

private async verifyFact(claim: string): Promise<VerificationResult> {
Expand Down Expand Up @@ -167,7 +166,7 @@ ${snippets.map((s, i) => `[${i + 1}] ${s.snippet} (${s.url})`).join('\n')}
Respond ONLY in JSON:
{
"status": "supported|unsupported|unverifiable",
"justification": "brief reason (max 25 words)",
"justification": "brief reason (max 10 words)",
"link": "most relevant supporting or contradicting source if available"
}
`;
Expand Down