From 057cdc8458532174ae1e52de655ed38610784aeb Mon Sep 17 00:00:00 2001 From: kahboom Date: Sat, 28 Feb 2026 14:22:16 +0000 Subject: [PATCH 1/2] feat: add explainable risk score model with weighted field and claim penalties (F040) Co-Authored-By: Claude Opus 4.6 --- PRD.json | 3 +- progress.txt | 12 ++ src/rules/scoring.test.ts | 312 ++++++++++++++++++++++++++++++++++++++ src/rules/scoring.ts | 90 +++++++++++ src/types/index.ts | 3 + src/types/scan.test.ts | 6 + src/types/scan.ts | 23 +++ 7 files changed, 448 insertions(+), 1 deletion(-) create mode 100644 src/rules/scoring.test.ts create mode 100644 src/rules/scoring.ts diff --git a/PRD.json b/PRD.json index a74625f..1354d78 100644 --- a/PRD.json +++ b/PRD.json @@ -172,7 +172,8 @@ "id": "F040", "phase": 1, "name": "Risk Score Model", - "description": "Explainable scoring based on missing disclosures and risky claims" + "description": "Explainable scoring based on missing disclosures and risky claims", + "status": "passes" }, { "id": "F050", diff --git a/progress.txt b/progress.txt index ae09219..03d169b 100644 --- a/progress.txt +++ b/progress.txt @@ -35,3 +35,15 @@ F030 - Rule Engine v1 [PASSES] - FIELD_SEARCH_PATTERNS: regex map for all 12 fields (product_name, brand, materials, warnings, etc.) - META_KEY_MAP: Open Graph / structured data key lookups for meta-based detection - 20 new unit tests for engine module (59 total passing) + +F040 - Risk Score Model [PASSES] +- scoring.ts: calculateRiskScore() produces explainable breakdown from fields and claims +- calculateFieldPenalties: required fields penalized at 10 pts, optional at 3 pts when missing +- calculateClaimPenalties: high=8, medium=5, low=2 pts per unique unsubstantiated claim (deduplicated) +- calculateMaxFieldScore: computes theoretical max from field definitions +- RiskScoreBreakdown type with fieldPenalties and claimPenalties arrays for explainability +- Updated ScanResult type with riskBreakdown field +- New types: FieldPenalty, ClaimPenalty, RiskScoreBreakdown exported from types/index.ts +- 18 new unit tests for scoring module (78 total passing) + +Next task: F050 - Evidence Clipper (Phase 2) diff --git a/src/rules/scoring.test.ts b/src/rules/scoring.test.ts new file mode 100644 index 0000000..92cf140 --- /dev/null +++ b/src/rules/scoring.test.ts @@ -0,0 +1,312 @@ +import { describe, it, expect } from "vitest"; +import type { FieldResult, ClaimFlag } from "../types/scan.js"; +import { + calculateFieldPenalties, + calculateClaimPenalties, + calculateMaxFieldScore, + calculateRiskScore, + REQUIRED_FIELD_WEIGHT, + OPTIONAL_FIELD_WEIGHT, + CLAIM_RISK_WEIGHTS, +} from "./scoring.js"; + +function makeField(overrides: Partial = {}): FieldResult { + return { + key: "test_field", + group: "Test Group", + required: false, + status: "found", + confidence: 0.9, + ...overrides, + }; +} + +function makeClaim(overrides: Partial = {}): ClaimFlag { + return { + claim: "eco-friendly", + riskLevel: "high", + evidenceRequired: "Third-party certification", + source: "...eco-friendly product...", + ...overrides, + }; +} + +describe("calculateFieldPenalties", () => { + it("returns no penalties when all fields are found", () => { + const fields = [ + makeField({ key: "product_name", required: true, status: "found" }), + makeField({ key: "brand", required: true, status: "found" }), + makeField({ key: "materials", required: false, status: "found" }), + ]; + expect(calculateFieldPenalties(fields)).toEqual([]); + }); + + it("penalizes missing required fields at higher weight", () => { + const fields = [ + makeField({ key: "product_name", required: true, status: "missing" }), + ]; + const penalties = calculateFieldPenalties(fields); + expect(penalties).toHaveLength(1); + expect(penalties[0].penalty).toBe(REQUIRED_FIELD_WEIGHT); + expect(penalties[0].required).toBe(true); + expect(penalties[0].reason).toContain("Required"); + }); + + it("penalizes missing optional fields at lower weight", () => { + const fields = [ + makeField({ key: "materials", required: false, status: "missing" }), + ]; + const penalties = calculateFieldPenalties(fields); + expect(penalties).toHaveLength(1); + expect(penalties[0].penalty).toBe(OPTIONAL_FIELD_WEIGHT); + expect(penalties[0].required).toBe(false); + expect(penalties[0].reason).toContain("Optional"); + }); + + it("ignores found and partial fields", () => { + const fields = [ + makeField({ key: "product_name", required: true, status: "found" }), + makeField({ key: "brand", required: true, status: "partial" }), + makeField({ key: "materials", required: false, status: "missing" }), + ]; + const penalties = calculateFieldPenalties(fields); + expect(penalties).toHaveLength(1); + expect(penalties[0].key).toBe("materials"); + }); + + it("preserves group in penalty output", () => { + const fields = [ + makeField({ + key: "warnings", + group: "Safety & Use", + required: false, + status: "missing", + }), + ]; + const penalties = calculateFieldPenalties(fields); + expect(penalties[0].group).toBe("Safety & Use"); + }); +}); + +describe("calculateClaimPenalties", () => { + it("returns no penalties when no claims", () => { + expect(calculateClaimPenalties([])).toEqual([]); + }); + + it("assigns weight by risk level", () => { + const claims = [ + makeClaim({ claim: "eco-friendly", riskLevel: "high" }), + makeClaim({ claim: "organic", riskLevel: "medium" }), + makeClaim({ claim: "recyclable", riskLevel: "low" }), + ]; + const penalties = calculateClaimPenalties(claims); + expect(penalties).toHaveLength(3); + + const high = penalties.find((p) => p.claim === "eco-friendly"); + expect(high?.penalty).toBe(CLAIM_RISK_WEIGHTS.high); + + const medium = penalties.find((p) => p.claim === "organic"); + expect(medium?.penalty).toBe(CLAIM_RISK_WEIGHTS.medium); + + const low = penalties.find((p) => p.claim === "recyclable"); + expect(low?.penalty).toBe(CLAIM_RISK_WEIGHTS.low); + }); + + it("deduplicates claims by keyword", () => { + const claims = [ + makeClaim({ claim: "eco-friendly", riskLevel: "high" }), + makeClaim({ claim: "eco-friendly", riskLevel: "high" }), + makeClaim({ claim: "eco-friendly", riskLevel: "high" }), + ]; + const penalties = calculateClaimPenalties(claims); + expect(penalties).toHaveLength(1); + }); + + it("includes reason with risk level", () => { + const claims = [makeClaim({ claim: "non-toxic", riskLevel: "high" })]; + const penalties = calculateClaimPenalties(claims); + expect(penalties[0].reason).toContain("non-toxic"); + expect(penalties[0].reason).toContain("high risk"); + }); +}); + +describe("calculateMaxFieldScore", () => { + it("sums required and optional weights for all fields", () => { + const fields = [ + makeField({ required: true }), + makeField({ required: true }), + makeField({ required: false }), + makeField({ required: false }), + makeField({ required: false }), + ]; + const expected = 2 * REQUIRED_FIELD_WEIGHT + 3 * OPTIONAL_FIELD_WEIGHT; + expect(calculateMaxFieldScore(fields)).toBe(expected); + }); + + it("returns 0 for empty fields", () => { + expect(calculateMaxFieldScore([])).toBe(0); + }); +}); + +describe("calculateRiskScore", () => { + it("returns score 0 when all fields present and no claims", () => { + const fields = [ + makeField({ key: "product_name", required: true, status: "found" }), + makeField({ key: "brand", required: true, status: "found" }), + makeField({ key: "materials", required: false, status: "found" }), + ]; + const result = calculateRiskScore(fields, []); + expect(result.score).toBe(0); + expect(result.fieldPenalties).toHaveLength(0); + expect(result.claimPenalties).toHaveLength(0); + }); + + it("returns max field score when all fields missing and no claims", () => { + const fields = [ + makeField({ required: true, status: "missing" }), + makeField({ key: "brand", required: true, status: "missing" }), + makeField({ key: "materials", required: false, status: "missing" }), + ]; + const result = calculateRiskScore(fields, []); + const expectedMax = 2 * REQUIRED_FIELD_WEIGHT + 1 * OPTIONAL_FIELD_WEIGHT; + expect(result.score).toBe(expectedMax); + expect(result.maxScore).toBe(expectedMax); + expect(result.score).toBe(result.maxScore); + }); + + it("combines field and claim penalties", () => { + const fields = [ + makeField({ key: "product_name", required: true, status: "missing" }), + makeField({ key: "materials", required: false, status: "found" }), + ]; + const claims = [makeClaim({ claim: "sustainable", riskLevel: "high" })]; + const result = calculateRiskScore(fields, claims); + expect(result.score).toBe(REQUIRED_FIELD_WEIGHT + CLAIM_RISK_WEIGHTS.high); + expect(result.fieldPenalties).toHaveLength(1); + expect(result.claimPenalties).toHaveLength(1); + }); + + it("maxScore includes max field penalties plus actual claim penalties", () => { + const fields = [ + makeField({ key: "product_name", required: true, status: "found" }), + makeField({ key: "materials", required: false, status: "found" }), + ]; + const claims = [makeClaim({ claim: "eco-friendly", riskLevel: "high" })]; + const result = calculateRiskScore(fields, claims); + const expectedMax = + REQUIRED_FIELD_WEIGHT + OPTIONAL_FIELD_WEIGHT + CLAIM_RISK_WEIGHTS.high; + expect(result.maxScore).toBe(expectedMax); + }); + + it("handles realistic full scan with 12 fields", () => { + const fields: FieldResult[] = [ + makeField({ + key: "product_name", + group: "Identity & Contacts", + required: true, + status: "found", + }), + makeField({ + key: "brand", + group: "Identity & Contacts", + required: true, + status: "found", + }), + makeField({ + key: "manufacturer_name", + group: "Identity & Contacts", + required: false, + status: "missing", + }), + makeField({ + key: "manufacturer_address", + group: "Identity & Contacts", + required: false, + status: "missing", + }), + makeField({ + key: "contact_email_or_url", + group: "Identity & Contacts", + required: false, + status: "found", + }), + makeField({ + key: "materials", + group: "Composition & Origin", + required: false, + status: "found", + }), + makeField({ + key: "country_of_origin", + group: "Composition & Origin", + required: false, + status: "missing", + }), + makeField({ + key: "warnings", + group: "Safety & Use", + required: false, + status: "missing", + }), + makeField({ + key: "instructions", + group: "Safety & Use", + required: false, + status: "found", + }), + makeField({ + key: "care_instructions", + group: "Safety & Use", + required: false, + status: "missing", + }), + makeField({ + key: "marketing_claims", + group: "Claims & Evidence", + required: false, + status: "found", + }), + makeField({ + key: "certifications", + group: "Claims & Evidence", + required: false, + status: "missing", + }), + ]; + const claims = [ + makeClaim({ claim: "eco-friendly", riskLevel: "high" }), + makeClaim({ claim: "organic", riskLevel: "medium" }), + ]; + + const result = calculateRiskScore(fields, claims); + + // 6 missing optional fields * 3 = 18, plus claims: 8 + 5 = 13, total = 31 + expect(result.score).toBe( + 6 * OPTIONAL_FIELD_WEIGHT + + CLAIM_RISK_WEIGHTS.high + + CLAIM_RISK_WEIGHTS.medium, + ); + expect(result.fieldPenalties).toHaveLength(6); + expect(result.claimPenalties).toHaveLength(2); + expect(result.score).toBeLessThan(result.maxScore); + }); + + it("handles empty fields and empty claims", () => { + const result = calculateRiskScore([], []); + expect(result.score).toBe(0); + expect(result.maxScore).toBe(0); + expect(result.fieldPenalties).toEqual([]); + expect(result.claimPenalties).toEqual([]); + }); + + it("deduplicates repeated claims in score", () => { + const fields = [makeField({ required: false, status: "found" })]; + const claims = [ + makeClaim({ claim: "sustainable", riskLevel: "high" }), + makeClaim({ claim: "sustainable", riskLevel: "high" }), + ]; + const result = calculateRiskScore(fields, claims); + expect(result.claimPenalties).toHaveLength(1); + expect(result.score).toBe(CLAIM_RISK_WEIGHTS.high); + }); +}); diff --git a/src/rules/scoring.ts b/src/rules/scoring.ts new file mode 100644 index 0000000..4b75320 --- /dev/null +++ b/src/rules/scoring.ts @@ -0,0 +1,90 @@ +import type { + FieldResult, + ClaimFlag, + FieldPenalty, + ClaimPenalty, + RiskScoreBreakdown, +} from "../types/scan.js"; + +export const REQUIRED_FIELD_WEIGHT = 10; +export const OPTIONAL_FIELD_WEIGHT = 3; + +export const CLAIM_RISK_WEIGHTS: Record<"low" | "medium" | "high", number> = { + low: 2, + medium: 5, + high: 8, +}; + +export function calculateFieldPenalties(fields: FieldResult[]): FieldPenalty[] { + const penalties: FieldPenalty[] = []; + + for (const field of fields) { + if (field.status === "missing") { + const weight = field.required + ? REQUIRED_FIELD_WEIGHT + : OPTIONAL_FIELD_WEIGHT; + penalties.push({ + key: field.key, + group: field.group, + required: field.required, + penalty: weight, + reason: field.required + ? `Required field "${field.key}" is missing` + : `Optional field "${field.key}" is missing`, + }); + } + } + + return penalties; +} + +export function calculateClaimPenalties(claims: ClaimFlag[]): ClaimPenalty[] { + const seen = new Set(); + const penalties: ClaimPenalty[] = []; + + for (const claim of claims) { + if (seen.has(claim.claim)) continue; + seen.add(claim.claim); + + const weight = CLAIM_RISK_WEIGHTS[claim.riskLevel]; + penalties.push({ + claim: claim.claim, + riskLevel: claim.riskLevel, + penalty: weight, + reason: `Unsubstantiated "${claim.claim}" claim (${claim.riskLevel} risk)`, + }); + } + + return penalties; +} + +export function calculateMaxFieldScore(fields: FieldResult[]): number { + let max = 0; + for (const field of fields) { + max += field.required ? REQUIRED_FIELD_WEIGHT : OPTIONAL_FIELD_WEIGHT; + } + return max; +} + +export function calculateRiskScore( + fields: FieldResult[], + claims: ClaimFlag[], +): RiskScoreBreakdown { + const fieldPenalties = calculateFieldPenalties(fields); + const claimPenalties = calculateClaimPenalties(claims); + + const fieldScore = fieldPenalties.reduce((sum, p) => sum + p.penalty, 0); + const claimScore = claimPenalties.reduce((sum, p) => sum + p.penalty, 0); + const score = fieldScore + claimScore; + + const maxScore = + calculateMaxFieldScore(fields) + + claimPenalties.reduce((sum, p) => sum + p.penalty, 0); + + return { + score, + maxScore, + fieldPenalties, + claimPenalties, + }; +} diff --git a/src/types/index.ts b/src/types/index.ts index 501a8a7..938be1d 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -3,5 +3,8 @@ export type { FieldStatus, FieldResult, ClaimFlag, + FieldPenalty, + ClaimPenalty, + RiskScoreBreakdown, ScanResult, } from "./scan.js"; diff --git a/src/types/scan.test.ts b/src/types/scan.test.ts index 53f53f0..0d99dd1 100644 --- a/src/types/scan.test.ts +++ b/src/types/scan.test.ts @@ -53,6 +53,12 @@ describe("scan types", () => { claims: [], riskScore: 0, maxScore: 100, + riskBreakdown: { + score: 0, + maxScore: 100, + fieldPenalties: [], + claimPenalties: [], + }, }; expect(result.url).toContain("https://"); expect(result.category).toBe("general"); diff --git a/src/types/scan.ts b/src/types/scan.ts index 15c7a69..310968a 100644 --- a/src/types/scan.ts +++ b/src/types/scan.ts @@ -23,6 +23,28 @@ export interface ClaimFlag { source?: string; } +export interface FieldPenalty { + key: string; + group: string; + required: boolean; + penalty: number; + reason: string; +} + +export interface ClaimPenalty { + claim: string; + riskLevel: "low" | "medium" | "high"; + penalty: number; + reason: string; +} + +export interface RiskScoreBreakdown { + score: number; + maxScore: number; + fieldPenalties: FieldPenalty[]; + claimPenalties: ClaimPenalty[]; +} + export interface ScanResult { url: string; title: string; @@ -32,4 +54,5 @@ export interface ScanResult { claims: ClaimFlag[]; riskScore: number; maxScore: number; + riskBreakdown: RiskScoreBreakdown; } From 1895e22153e3f84f1cf586904d1e43ec2c53fd6c Mon Sep 17 00:00:00 2001 From: kahboom Date: Sat, 28 Feb 2026 14:31:31 +0000 Subject: [PATCH 2/2] chore: make riskBreakdown optional, rm redundant score fields --- src/rules/scoring.test.ts | 11 +++++++++++ src/rules/scoring.ts | 19 +++++++++++++++++-- src/types/scan.test.ts | 6 +++--- src/types/scan.ts | 4 +--- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/rules/scoring.test.ts b/src/rules/scoring.test.ts index 92cf140..5e06d8c 100644 --- a/src/rules/scoring.test.ts +++ b/src/rules/scoring.test.ts @@ -122,6 +122,17 @@ describe("calculateClaimPenalties", () => { expect(penalties).toHaveLength(1); }); + it("deduplicates claims case-insensitively", () => { + const claims = [ + makeClaim({ claim: "Eco-Friendly", riskLevel: "high" }), + makeClaim({ claim: "eco-friendly", riskLevel: "high" }), + makeClaim({ claim: "ECO-FRIENDLY", riskLevel: "high" }), + ]; + const penalties = calculateClaimPenalties(claims); + expect(penalties).toHaveLength(1); + expect(penalties[0].claim).toBe("Eco-Friendly"); // preserves first occurrence's casing + }); + it("includes reason with risk level", () => { const claims = [makeClaim({ claim: "non-toxic", riskLevel: "high" })]; const penalties = calculateClaimPenalties(claims); diff --git a/src/rules/scoring.ts b/src/rules/scoring.ts index 4b75320..830de41 100644 --- a/src/rules/scoring.ts +++ b/src/rules/scoring.ts @@ -43,8 +43,9 @@ export function calculateClaimPenalties(claims: ClaimFlag[]): ClaimPenalty[] { const penalties: ClaimPenalty[] = []; for (const claim of claims) { - if (seen.has(claim.claim)) continue; - seen.add(claim.claim); + const key = claim.claim.toLowerCase(); + if (seen.has(key)) continue; + seen.add(key); const weight = CLAIM_RISK_WEIGHTS[claim.riskLevel]; penalties.push({ @@ -66,6 +67,20 @@ export function calculateMaxFieldScore(fields: FieldResult[]): number { return max; } +/** + * Calculates the risk score breakdown for a scan result. + * + * The score is the sum of field penalties (missing required/optional fields) + * and claim penalties (unsubstantiated marketing claims). + * + * The maxScore represents the maximum possible penalty: + * - All fields missing (sum of all field weights) + * - Plus actual claim penalties found on the page + * + * Note: Since claim penalties are added to both score and maxScore, + * the score/maxScore ratio varies primarily based on field completeness. + * Claims increase the absolute score but don't change the percentage. + */ export function calculateRiskScore( fields: FieldResult[], claims: ClaimFlag[], diff --git a/src/types/scan.test.ts b/src/types/scan.test.ts index 0d99dd1..39214b2 100644 --- a/src/types/scan.test.ts +++ b/src/types/scan.test.ts @@ -51,8 +51,6 @@ describe("scan types", () => { timestamp: new Date().toISOString(), fields: [], claims: [], - riskScore: 0, - maxScore: 100, riskBreakdown: { score: 0, maxScore: 100, @@ -63,6 +61,8 @@ describe("scan types", () => { expect(result.url).toContain("https://"); expect(result.category).toBe("general"); expect(result.fields).toEqual([]); - expect(result.riskScore).toBeLessThanOrEqual(result.maxScore); + expect(result.riskBreakdown?.score).toBeLessThanOrEqual( + result.riskBreakdown?.maxScore ?? 0, + ); }); }); diff --git a/src/types/scan.ts b/src/types/scan.ts index 310968a..297d171 100644 --- a/src/types/scan.ts +++ b/src/types/scan.ts @@ -52,7 +52,5 @@ export interface ScanResult { timestamp: string; fields: FieldResult[]; claims: ClaimFlag[]; - riskScore: number; - maxScore: number; - riskBreakdown: RiskScoreBreakdown; + riskBreakdown?: RiskScoreBreakdown; }