From be2234c8b31f55541ae6653834d190765a3c00ae Mon Sep 17 00:00:00 2001 From: kahboom Date: Sat, 28 Feb 2026 12:53:07 +0000 Subject: [PATCH 1/3] chore: update ralph to specify next task --- ralph-once.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ralph-once.sh b/ralph-once.sh index 8a9b778..956ccb5 100755 --- a/ralph-once.sh +++ b/ralph-once.sh @@ -8,5 +8,5 @@ claude --permission-mode acceptEdits "@PRD.json @progress.txt \ 3. Implement it completely with tests. \ 4. Update the feature status to 'passes' in PRD.json. \ 5. Commit your changes (pre-commit hooks will enforce quality automatically). \ -6. Append to progress.txt with what you did. \ +6. Append to progress.txt with what you did, and the next task (if applicable). \ 7. EXIT IMMEDIATELY - do not process any additional tasks." From 78eefb24e93da8b57c00f35356293d63c51a3c1c Mon Sep 17 00:00:00 2001 From: kahboom Date: Sat, 28 Feb 2026 12:54:44 +0000 Subject: [PATCH 2/3] feat: add data-driven rule engine for field detection and claim scanning (F030) Co-Authored-By: Claude Opus 4.6 --- PRD.json | 3 +- src/rules/engine.test.ts | 222 +++++++++++++++++++++++++++++++++++++++ src/rules/engine.ts | 149 ++++++++++++++++++++++++++ 3 files changed, 373 insertions(+), 1 deletion(-) create mode 100644 src/rules/engine.test.ts create mode 100644 src/rules/engine.ts diff --git a/PRD.json b/PRD.json index 9e97efa..a74625f 100644 --- a/PRD.json +++ b/PRD.json @@ -165,7 +165,8 @@ "id": "F030", "phase": 1, "name": "Rule Engine v1", - "description": "Data-driven JSON rules" + "description": "Data-driven JSON rules", + "status": "passes" }, { "id": "F040", diff --git a/src/rules/engine.test.ts b/src/rules/engine.test.ts new file mode 100644 index 0000000..1417ae6 --- /dev/null +++ b/src/rules/engine.test.ts @@ -0,0 +1,222 @@ +import { describe, it, expect } from "vitest"; +import type { PageSnapshot } from "../content/snapshot.js"; +import { + detectField, + evaluateFields, + detectClaims, + runRules, +} from "./engine.js"; + +function makeSnapshot(overrides: Partial = {}): PageSnapshot { + return { + url: "https://example.com/product", + title: "Test Product", + timestamp: new Date().toISOString(), + meta: {}, + textContent: "", + skuHints: [], + ...overrides, + }; +} + +describe("detectField", () => { + it("finds a field via meta tag", () => { + const result = detectField("product_name", "", { "og:title": "Widget X" }); + expect(result.status).toBe("found"); + expect(result.value).toBe("Widget X"); + expect(result.confidence).toBe(0.9); + }); + + it("finds a field via text pattern", () => { + const result = detectField( + "country_of_origin", + "This product is Made in USA with care", + {}, + ); + expect(result.status).toBe("found"); + expect(result.value).toBeDefined(); + expect(result.confidence).toBe(0.7); + }); + + it("returns missing when field not found", () => { + const result = detectField("manufacturer_address", "Nothing here", {}); + expect(result.status).toBe("missing"); + expect(result.confidence).toBe(1.0); + expect(result.value).toBeUndefined(); + }); + + it("prefers meta over text when both present", () => { + const result = detectField("brand", "Brand: TextBrand", { + "og:brand": "MetaBrand", + }); + expect(result.value).toBe("MetaBrand"); + expect(result.confidence).toBe(0.9); + }); + + it("detects email in contact field", () => { + const result = detectField( + "contact_email_or_url", + "Reach us at help@example.com for support", + {}, + ); + expect(result.status).toBe("found"); + }); + + it("detects warnings", () => { + const result = detectField( + "warnings", + "WARNING: This product contains chemicals known to cause harm", + {}, + ); + expect(result.status).toBe("found"); + }); + + it("detects materials", () => { + const result = detectField( + "materials", + "Materials: 100% organic cotton", + {}, + ); + expect(result.status).toBe("found"); + expect(result.value).toContain("cotton"); + }); + + it("detects care instructions", () => { + const result = detectField( + "care_instructions", + "Care instructions: Machine wash cold, tumble dry low", + {}, + ); + expect(result.status).toBe("found"); + }); + + it("detects certifications", () => { + const result = detectField( + "certifications", + "Certified by OEKO-TEX Standard 100", + {}, + ); + expect(result.status).toBe("found"); + }); +}); + +describe("evaluateFields", () => { + it("returns results for all 12 defined fields", () => { + const snapshot = makeSnapshot(); + const results = evaluateFields(snapshot, "general"); + expect(results).toHaveLength(12); + }); + + it("marks fields as found when text matches", () => { + const snapshot = makeSnapshot({ + textContent: + "Brand: Acme Corp. Materials: 100% cotton. Made in Portugal. Warning: Keep away from fire.", + meta: { "og:title": "Acme T-Shirt" }, + }); + const results = evaluateFields(snapshot, "textiles"); + const found = results.filter((r) => r.status === "found"); + expect(found.length).toBeGreaterThanOrEqual(4); + + const productName = results.find((r) => r.key === "product_name"); + expect(productName?.status).toBe("found"); + + const brand = results.find((r) => r.key === "brand"); + expect(brand?.status).toBe("found"); + }); + + it("marks all fields missing for empty snapshot", () => { + const snapshot = makeSnapshot(); + const results = evaluateFields(snapshot, "general"); + const missing = results.filter((r) => r.status === "missing"); + expect(missing).toHaveLength(12); + }); + + it("preserves group and required from field definitions", () => { + const snapshot = makeSnapshot(); + const results = evaluateFields(snapshot, "general"); + + const productName = results.find((r) => r.key === "product_name"); + expect(productName?.group).toBe("Identity & Contacts"); + expect(productName?.required).toBe(true); + + const materials = results.find((r) => r.key === "materials"); + expect(materials?.group).toBe("Composition & Origin"); + expect(materials?.required).toBe(false); + }); +}); + +describe("detectClaims", () => { + it("flags eco-friendly as high risk", () => { + const snapshot = makeSnapshot({ + textContent: "Our eco-friendly product is made with care", + }); + const claims = detectClaims(snapshot); + expect(claims).toHaveLength(1); + expect(claims[0].claim).toBe("eco-friendly"); + expect(claims[0].riskLevel).toBe("high"); + expect(claims[0].source).toBeTruthy(); + }); + + it("flags multiple claims", () => { + const snapshot = makeSnapshot({ + textContent: + "This sustainable, biodegradable, and organic product is vegan", + }); + const claims = detectClaims(snapshot); + expect(claims.length).toBeGreaterThanOrEqual(4); + const claimNames = claims.map((c) => c.claim); + expect(claimNames).toContain("sustainable"); + expect(claimNames).toContain("biodegradable"); + expect(claimNames).toContain("organic"); + expect(claimNames).toContain("vegan"); + }); + + it("returns empty for text without risky claims", () => { + const snapshot = makeSnapshot({ + textContent: "A regular product description with no special claims", + }); + expect(detectClaims(snapshot)).toEqual([]); + }); + + it("is case-insensitive", () => { + const snapshot = makeSnapshot({ + textContent: "ECO-FRIENDLY and SUSTAINABLE materials", + }); + const claims = detectClaims(snapshot); + expect(claims.length).toBeGreaterThanOrEqual(2); + }); + + it("includes surrounding context as source", () => { + const snapshot = makeSnapshot({ + textContent: "We are proud to offer a non-toxic cleaning solution", + }); + const claims = detectClaims(snapshot); + const nonToxic = claims.find((c) => c.claim === "non-toxic"); + expect(nonToxic?.source).toContain("non-toxic"); + expect(nonToxic?.source?.length).toBeGreaterThan(10); + }); +}); + +describe("runRules", () => { + it("returns both fields and claims", () => { + const snapshot = makeSnapshot({ + textContent: + "Brand: TestCo. Materials: recycled plastic. This eco-friendly product is non-toxic.", + meta: { "og:title": "TestCo Green Widget" }, + }); + const result = runRules(snapshot, "general"); + + expect(result.fields).toHaveLength(12); + expect(result.claims.length).toBeGreaterThanOrEqual(2); + + const brand = result.fields.find((f) => f.key === "brand"); + expect(brand?.status).toBe("found"); + }); + + it("works with empty snapshot", () => { + const snapshot = makeSnapshot(); + const result = runRules(snapshot, "general"); + expect(result.fields).toHaveLength(12); + expect(result.claims).toHaveLength(0); + }); +}); diff --git a/src/rules/engine.ts b/src/rules/engine.ts new file mode 100644 index 0000000..fb97650 --- /dev/null +++ b/src/rules/engine.ts @@ -0,0 +1,149 @@ +import type { PageSnapshot } from "../content/snapshot.js"; +import type { + ProductCategory, + FieldResult, + FieldStatus, + ClaimFlag, +} from "../types/scan.js"; +import { FIELD_GROUPS } from "./field-groups.js"; +import { CLAIM_KEYWORDS } from "./claim-keywords.js"; + +const FIELD_SEARCH_PATTERNS: Record = { + product_name: [ + /(?:product\s*name|item\s*name)[:\s]+(.+?)(?:\n|$)/i, + /(?:og:title|twitter:title)/i, + ], + brand: [ + /(?:brand|manufacturer)[:\s]+(.+?)(?:\n|$)/i, + /(?:og:brand|product:brand)/i, + ], + manufacturer_name: [ + /(?:manufacturer|made\s*by|produced\s*by)[:\s]+(.+?)(?:\n|$)/i, + ], + manufacturer_address: [ + /(?:manufacturer\s*address|company\s*address|business\s*address)[:\s]+(.+?)(?:\n|$)/i, + ], + contact_email_or_url: [ + /(?:contact\s*us|customer\s*service|support|email)[:\s]+(.+?)(?:\n|$)/i, + /[\w.-]+@[\w.-]+\.\w{2,}/i, + ], + materials: [ + /(?:materials?|composition|made\s*(?:from|of|with)|fabric|ingredients?)[:\s]+(.+?)(?:\n|$)/i, + ], + country_of_origin: [ + /(?:country\s*of\s*origin|made\s*in|manufactured\s*in|origin|product\s*of)[:\s]+(.+?)(?:\n|$)/i, + ], + warnings: [ + /(?:warning|caution|danger|hazard|prop\s*65|⚠)[:\s]+(.+?)(?:\n|$)/i, + ], + instructions: [ + /(?:instructions?|directions?|how\s*to\s*use|usage)[:\s]+(.+?)(?:\n|$)/i, + ], + care_instructions: [ + /(?:care\s*instructions?|wash|cleaning|maintenance)[:\s]+(.+?)(?:\n|$)/i, + ], + marketing_claims: [/(?:features?|benefits?|highlights?)[:\s]+(.+?)(?:\n|$)/i], + certifications: [ + /(?:certif(?:ied|ication)|certified\s*by|compliant|approved\s*by|tested\s*by)[:\s]+(.+?)(?:\n|$)/i, + ], +}; + +const META_KEY_MAP: Record = { + product_name: ["og:title", "twitter:title", "product:name", "name"], + brand: ["og:brand", "product:brand", "brand"], + materials: ["product:material"], + country_of_origin: ["product:origin", "og:country-name"], +}; + +export function detectField( + key: string, + text: string, + meta: Record, +): { status: FieldStatus; value?: string; confidence: number } { + // Check meta tags first + const metaKeys = META_KEY_MAP[key] || []; + for (const mk of metaKeys) { + if (meta[mk]) { + return { status: "found", value: meta[mk], confidence: 0.9 }; + } + } + + // Check text content with patterns + const patterns = FIELD_SEARCH_PATTERNS[key] || []; + for (const pattern of patterns) { + const match = text.match(pattern); + if (match) { + const value = match[1]?.trim() || match[0].trim(); + return { status: "found", value, confidence: 0.7 }; + } + } + + return { status: "missing", confidence: 1.0 }; +} + +export function evaluateFields( + snapshot: PageSnapshot, + _category: ProductCategory, +): FieldResult[] { + const results: FieldResult[] = []; + + for (const group of FIELD_GROUPS) { + for (const field of group.fields) { + const detection = detectField( + field.key, + snapshot.textContent, + snapshot.meta, + ); + results.push({ + key: field.key, + group: group.group, + required: field.required, + status: detection.status, + value: detection.value, + confidence: detection.confidence, + }); + } + } + + return results; +} + +export function detectClaims(snapshot: PageSnapshot): ClaimFlag[] { + const flags: ClaimFlag[] = []; + const lowerText = snapshot.textContent.toLowerCase(); + + for (const kw of CLAIM_KEYWORDS) { + const pattern = new RegExp(`\\b${escapeRegex(kw.pattern)}\\b`, "gi"); + const match = lowerText.match(pattern); + if (match) { + // Extract surrounding context as source + const idx = lowerText.indexOf(kw.pattern.toLowerCase()); + const start = Math.max(0, idx - 40); + const end = Math.min(lowerText.length, idx + kw.pattern.length + 40); + const source = snapshot.textContent.slice(start, end).trim(); + + flags.push({ + claim: kw.pattern, + riskLevel: kw.riskLevel, + evidenceRequired: kw.evidenceRequired, + source, + }); + } + } + + return flags; +} + +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +export function runRules( + snapshot: PageSnapshot, + category: ProductCategory, +): { fields: FieldResult[]; claims: ClaimFlag[] } { + return { + fields: evaluateFields(snapshot, category), + claims: detectClaims(snapshot), + }; +} From 5e7013e6bc628f17e7fd1d1ab93ad56cb62f8e37 Mon Sep 17 00:00:00 2001 From: kahboom Date: Sat, 28 Feb 2026 13:23:18 +0000 Subject: [PATCH 3/3] chore: fix engine and test per feedback --- CLAUDE.md | 182 +++++++++++++++++++++++++++++++++++++ README.md | 190 +++++++++++++++++++++++++++++++++++++++ progress.txt | 9 ++ src/rules/engine.test.ts | 12 +++ src/rules/engine.ts | 22 +++-- 5 files changed, 406 insertions(+), 9 deletions(-) create mode 100644 CLAUDE.md create mode 100644 README.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..a2bd32b --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,182 @@ +# OpenThreads Trace - Developer Guide for Claude Code + +## Project Overview + +OpenThreads Trace is a cross-browser extension that scans consumer product pages for compliance-surface completeness signals. It detects missing disclosure fields, flags risky marketing claims, and exports structured compliance data in Threadmark-compatible JSON format. + +**Target Users**: DTC merchants, compliance consultants +**Platform**: Cross-platform browser extension (Chrome MV3, future Firefox/Safari) +**Architecture**: TypeScript + Vite, data-driven rule engine, client-side only + +## Core Architecture + +### Technology Stack +- **Build**: Vite 6 + TypeScript 5.7 +- **Testing**: Vitest + jsdom (59 passing tests, 50% coverage floor) +- **Linting**: ESLint 9 (flat config) + Prettier +- **CI/CD**: GitHub Actions with CodeQL security scanning +- **Extension**: Chrome Manifest V3 (popup + content script + background service worker) + +### Project Structure +``` +src/ +├── background/ # Background service worker +├── content/ # Content scripts (snapshot capture) +│ ├── snapshot.ts # DOM extraction: metadata, text, SKU hints +│ └── index.ts # Message handler for SCAN requests +├── popup/ # Extension popup UI +│ ├── popup.html # 360x480px UI with category selector + scan button +│ ├── popup.ts # DOM event wiring, chrome.tabs messaging +│ └── popup-ui.ts # Pure testable UI functions +├── rules/ # Data-driven compliance rule engine +│ ├── engine.ts # detectField(), detectClaims(), runRules() +│ ├── field-groups.ts # 12 compliance field definitions (4 groups) +│ └── claim-keywords.ts # Risk claim keywords (eco, sustainable, etc.) +└── types/ # TypeScript types + ├── scan.ts # ScanResult, FieldResult, ClaimFlag, PageSnapshot + └── index.ts # Exports and ProductCategory enum +``` + +## Coding Conventions + +### TypeScript Patterns +- **Pure functions first**: Separate testable logic from DOM/chrome API calls + - Example: `popup-ui.ts` has pure functions, `popup.ts` wires DOM +- **Strong typing**: All interfaces in `types/`, no `any` +- **Data-driven rules**: JSON-like structures in `field-groups.ts` and `claim-keywords.ts` +- **Confidence scoring**: Return `0.0-1.0` confidence with detection results + - Meta tags: 0.9 confidence + - Text patterns: 0.7 confidence + +### File Organization +- **Test files**: Co-located `*.test.ts` next to source +- **One concern per file**: `snapshot.ts` only handles DOM capture, `engine.ts` only handles rule evaluation +- **Exports**: Use named exports, centralize in `index.ts` where appropriate + +### Naming Conventions +- **Functions**: `camelCase`, verb-first (`extractMetaTags`, `detectClaims`) +- **Types**: `PascalCase` (`PageSnapshot`, `FieldResult`) +- **Constants**: `SCREAMING_SNAKE_CASE` for data maps (`FIELD_SEARCH_PATTERNS`, `META_KEY_MAP`) +- **Files**: `kebab-case.ts` (exception: `popup-ui.ts` for clarity) + +## Feature Development Workflow + +### Current Status (per PRD.json) +- ✅ F005: Engineering baseline (hooks, CI, CodeQL) +- ✅ F010: Extension shell UI +- ✅ F020: DOM snapshot capture +- ✅ F030: Rule engine v1 +- 🔨 F040: Risk score model (NEXT) +- 📋 F050: Evidence clipper +- 📋 F060: Threadmark JSON export + +### Adding New Features +1. **Read PRD.json first**: Check acceptance criteria for the feature ID +2. **Write tests first**: Add `*.test.ts` with expected behavior +3. **Keep pure functions testable**: Separate DOM/chrome APIs from logic +4. **Update progress.txt**: Document what was implemented and test count +5. **Run full checks**: `npm run typecheck && npm test && npm run build` + +### Adding New Compliance Fields +1. Add field definition to `field-groups.ts` (specify group, key, required flag) +2. Add detection pattern to `FIELD_SEARCH_PATTERNS` in `engine.ts` +3. Add meta tag mapping to `META_KEY_MAP` if applicable +4. Write unit tests in `rules/engine.test.ts` + +### Adding New Claim Keywords +1. Add category to `claim-keywords.ts` (use lowercase for case-insensitive matching) +2. Engine will auto-detect via `detectClaims()` with context extraction +3. Write tests in `rules/claim-keywords.test.ts` + +## Testing Philosophy + +### Unit Test Requirements +- **Coverage**: Maintain ≥50% threshold (configured in vitest.config.ts) +- **Isolation**: Mock chrome APIs, use jsdom for DOM tests +- **Fast**: Pre-push hook runs tests in <60s +- **Descriptive**: Use `describe()` blocks per function/module + +### Test Structure +```typescript +import { describe, it, expect } from 'vitest'; + +describe('moduleName', () => { + describe('functionName', () => { + it('should handle expected case', () => { + // Arrange + const input = {...}; + // Act + const result = functionName(input); + // Assert + expect(result).toEqual({...}); + }); + }); +}); +``` + +## Git & CI Workflow + +### Pre-commit Hook (auto-installed) +- Runs format + lint + typecheck in <15s +- Located in `.husky/pre-commit` + +### Pre-push Hook +- Runs full unit test suite in <60s + +### CI Checks (GitHub Actions) +- **Required**: lint, typecheck, test, build, package artifact +- **Security**: CodeQL scan on PR + weekly scheduled +- **Coverage**: Uploaded to coverage service (configured threshold) + +### Commit Messages +- Use Conventional Commits: `feat:`, `fix:`, `chore:`, `test:`, `refactor:` +- Example: `feat: add risk score calculation with weighted field penalties` +- Always include co-author: `Co-Authored-By: Claude Sonnet 4.5 ` + +## Security & Privacy Constraints + +### Hard Requirements (per PRD) +- **No automatic data exfiltration**: User-initiated scans only +- **Sanitized DOM parsing**: Strip scripts/styles in `extractTextContent()` +- **No authentication**: Do not prompt for credentials or access private portals +- **Disclaimer required**: Extension provides signals, not legal advice + +### Chrome API Usage +- **activeTab**: Only access current tab on user click +- **scripting**: Inject content scripts declaratively via manifest +- **No network**: All processing is local, no external API calls in v1 + +## Known Patterns & Anti-Patterns + +### ✅ DO +- Extract pure functions for testability (`popup-ui.ts` pattern) +- Use confidence scores with detection results +- Return structured data with context (e.g., `ClaimFlag` includes surrounding text) +- Co-locate tests with source files +- Document acceptance criteria in `progress.txt` + +### ❌ DON'T +- Mix DOM manipulation with business logic +- Use `any` type (all types in `types/`) +- Make network calls or external API requests +- Promise legal compliance guarantees in UI text +- Bypass hooks with `--no-verify` + +## Next Steps (F040: Risk Score Model) + +When implementing the risk score: +1. Create `scoring.ts` with `calculateRiskScore(scanResult: ScanResult): RiskScoreBreakdown` +2. Weight by field importance (required fields > optional) +3. Penalize risky claims without evidence +4. Return explainable breakdown (which fields/claims contribute) +5. Add unit tests for edge cases (all fields present, all missing, mixed) +6. Update `ScanResult` type to include `riskScore` and `riskBreakdown` + +## Questions & Support + +- **PRD Reference**: `/PRD.json` (source of truth for features) +- **Progress Tracking**: `/progress.txt` (current implementation status) +- **CI Configuration**: `.github/workflows/` (build, test, release) +- **Rule Definitions**: `src/rules/field-groups.ts` and `claim-keywords.ts` + +This is a long-running agent-friendly codebase following Anthropic recommendations for structured, testable, data-driven extension development. diff --git a/README.md b/README.md new file mode 100644 index 0000000..89154e8 --- /dev/null +++ b/README.md @@ -0,0 +1,190 @@ +# OpenThreads Trace + +**Compliance Exposure Scanner Browser Extension** + +A cross-browser extension that scans consumer product pages (Shopify, WooCommerce, Amazon, Etsy, DTC sites) for compliance-surface completeness signals. Flags missing disclosure fields, detects risky marketing claims, and exports structured compliance data. + +[![CI](https://github.com/openthreads/surface/actions/workflows/ci.yml/badge.svg)](https://github.com/openthreads/surface/actions/workflows/ci.yml) +[![CodeQL](https://github.com/openthreads/surface/actions/workflows/codeql.yml/badge.svg)](https://github.com/openthreads/surface/actions/workflows/codeql.yml) + +## Features + +- **One-click page scan** – Extract product metadata, text content, and SKU hints +- **Rule-based compliance detection** – 12 disclosure fields across 4 categories (Identity, Composition, Safety, Claims) +- **Claim risk flagging** – Detects eco/sustainability/health claims requiring evidence +- **Product category selector** – Textiles, Children's Products, Cosmetics, Electronics, General +- **Explainable results** – Confidence scores and detection context +- **Privacy-first** – All processing is local, no data leaves your browser +- **Threadmark export** *(coming soon)* – Structured JSON bundle for compliance workflows + +## Installation + +### For Development + +```bash +# Clone the repository +git clone https://github.com/openthreads/surface.git +cd surface + +# Install dependencies +npm install + +# Build the extension +npm run build + +# Load in Chrome +# 1. Open chrome://extensions/ +# 2. Enable "Developer mode" +# 3. Click "Load unpacked" +# 4. Select the `dist/` directory +``` + +### For Users + +*(Coming soon: Chrome Web Store, Firefox Add-ons, Edge Add-ons)* + +## Usage + +1. **Navigate** to any product page (e.g., Shopify store, Amazon listing) +2. **Click** the OpenThreads Trace extension icon +3. **Select** your product category (Textiles, Children's Products, etc.) +4. **Click "Scan Page"** +5. **Review** missing fields and flagged claims +6. **Export** results *(coming soon)* + +## Development + +### Prerequisites + +- Node.js 18+ (20 LTS recommended) +- npm 9+ + +### Scripts + +```bash +# Development +npm run build # Build extension for production +npm run typecheck # TypeScript type checking +npm run lint # ESLint check +npm run lint:fix # Auto-fix linting issues +npm run format # Format with Prettier +npm run format:check # Check formatting + +# Testing +npm test # Run unit tests +npm run test:watch # Watch mode +npm run test:coverage # Generate coverage report +``` + +### Pre-commit & Pre-push Hooks + +Hooks are auto-installed on `npm install` via Husky: + +- **Pre-commit**: Format, lint, typecheck (<15s) +- **Pre-push**: Full test suite (<60s) + +To skip hooks temporarily (not recommended): +```bash +git commit --no-verify +``` + +### Project Structure + +``` +src/ +├── background/ # Background service worker +├── content/ # Content scripts (DOM snapshot capture) +├── popup/ # Extension popup UI +├── rules/ # Compliance rule engine (field groups, claim keywords) +└── types/ # TypeScript type definitions +``` + +See [CLAUDE.md](./CLAUDE.md) for detailed architecture and coding conventions. + +## Feature Roadmap + +Current status (v1.0.0): + +- ✅ **F005**: Engineering baseline (CI, hooks, CodeQL security scanning) +- ✅ **F010**: Extension shell UI +- ✅ **F020**: DOM snapshot capture +- ✅ **F030**: Rule engine v1 +- 🔨 **F040**: Risk score model *(in progress)* +- 📋 **F050**: Evidence clipper +- 📋 **F060**: Threadmark JSON export + +See [PRD.json](./PRD.json) for full product requirements. + +## Compliance Fields Detected + +### Identity & Contacts +- Product name ✅ (required) +- Brand ✅ (required) +- Manufacturer name/address +- Contact email or URL + +### Composition & Origin +- Materials (fiber content, ingredients) +- Country of origin + +### Safety & Use +- Warnings +- Instructions +- Care instructions + +### Claims & Evidence +- Marketing claims (eco, sustainable, biodegradable, etc.) +- Certifications (GOTS, OEKO-TEX, etc.) + +## Security & Privacy + +- **No data collection**: All processing happens locally in your browser +- **No network calls**: Extension does not send data to external servers +- **User-initiated only**: Scans require explicit user action +- **Sanitized parsing**: Scripts and styles are stripped from analyzed content +- **CodeQL scanning**: Continuous security analysis via GitHub Actions + +## Contributing + +Contributions are welcome! Please: + +1. Fork the repository +2. Create a feature branch (`git checkout -b feat/your-feature`) +3. Follow existing code conventions (see [CLAUDE.md](./CLAUDE.md)) +4. Write tests for new features +5. Ensure all checks pass (`npm run typecheck && npm test && npm run build`) +6. Submit a pull request + +### Commit Convention + +Use [Conventional Commits](https://www.conventionalcommits.org/): +- `feat:` New features +- `fix:` Bug fixes +- `refactor:` Code refactoring +- `test:` Test additions/changes +- `chore:` Tooling, dependencies +- `docs:` Documentation only + +## License + +Apache License 2.0 - See [LICENSE](./LICENSE) for details. + +## Disclaimer + +**This extension provides heuristic completeness signals only.** +It does not constitute legal advice or guarantee regulatory compliance. +Users remain responsible for all compliance decisions and verification. + +## Acknowledgments + +Built with: +- [TypeScript](https://www.typescriptlang.org/) +- [Vite](https://vitejs.dev/) +- [Vitest](https://vitest.dev/) +- [Chrome Extensions API](https://developer.chrome.com/docs/extensions/) + +Developed by [OpenThreads.dev](https://openthreads.dev) to accelerate structured compliance workflows. + +--- + +**Questions or feedback?** Open an issue or visit [OpenThreads Documentation](https://docs.openthreads.dev). diff --git a/progress.txt b/progress.txt index 1be6354..ae09219 100644 --- a/progress.txt +++ b/progress.txt @@ -26,3 +26,12 @@ F020 - DOM Snapshot Capture [PASSES] - Content script handles SCAN message and returns snapshot to popup - jsdom added for DOM-based testing - 19 new unit tests for snapshot module (39 total passing) + +F030 - Rule Engine v1 [PASSES] +- engine.ts: runRules() orchestrates field evaluation and claim detection against a PageSnapshot +- detectField: checks meta tags (high confidence 0.9) then text patterns (0.7) for each compliance field +- evaluateFields: runs all 12 field definitions from field-groups.ts against snapshot, returns FieldResult[] +- detectClaims: case-insensitive keyword matching against claim-keywords.ts with surrounding context extraction +- FIELD_SEARCH_PATTERNS: regex map for all 12 fields (product_name, brand, materials, warnings, etc.) +- META_KEY_MAP: Open Graph / structured data key lookups for meta-based detection +- 20 new unit tests for engine module (59 total passing) diff --git a/src/rules/engine.test.ts b/src/rules/engine.test.ts index 1417ae6..cf4efc2 100644 --- a/src/rules/engine.test.ts +++ b/src/rules/engine.test.ts @@ -195,6 +195,18 @@ describe("detectClaims", () => { expect(nonToxic?.source).toContain("non-toxic"); expect(nonToxic?.source?.length).toBeGreaterThan(10); }); + + it("detects multiple occurrences of the same claim", () => { + const snapshot = makeSnapshot({ + textContent: + "Our eco-friendly products are great. We also have eco-friendly packaging.", + }); + const claims = detectClaims(snapshot); + const ecoFriendlyClaims = claims.filter((c) => c.claim === "eco-friendly"); + expect(ecoFriendlyClaims).toHaveLength(2); + expect(ecoFriendlyClaims[0].source).toContain("products"); + expect(ecoFriendlyClaims[1].source).toContain("packaging"); + }); }); describe("runRules", () => { diff --git a/src/rules/engine.ts b/src/rules/engine.ts index fb97650..08a563d 100644 --- a/src/rules/engine.ts +++ b/src/rules/engine.ts @@ -31,7 +31,7 @@ const FIELD_SEARCH_PATTERNS: Record = { /(?:materials?|composition|made\s*(?:from|of|with)|fabric|ingredients?)[:\s]+(.+?)(?:\n|$)/i, ], country_of_origin: [ - /(?:country\s*of\s*origin|made\s*in|manufactured\s*in|origin|product\s*of)[:\s]+(.+?)(?:\n|$)/i, + /(?:country\s*of\s*origin|made\s*in|manufactured\s*in|product\s*of)[:\s]+(.+?)(?:\n|$)/i, ], warnings: [ /(?:warning|caution|danger|hazard|prop\s*65|⚠)[:\s]+(.+?)(?:\n|$)/i, @@ -40,7 +40,7 @@ const FIELD_SEARCH_PATTERNS: Record = { /(?:instructions?|directions?|how\s*to\s*use|usage)[:\s]+(.+?)(?:\n|$)/i, ], care_instructions: [ - /(?:care\s*instructions?|wash|cleaning|maintenance)[:\s]+(.+?)(?:\n|$)/i, + /(?:care\s*instructions?|cleaning|maintenance)[:\s]+(.+?)(?:\n|$)/i, ], marketing_claims: [/(?:features?|benefits?|highlights?)[:\s]+(.+?)(?:\n|$)/i], certifications: [ @@ -83,7 +83,7 @@ export function detectField( export function evaluateFields( snapshot: PageSnapshot, - _category: ProductCategory, + _category: ProductCategory, // TODO: Use category to apply category-specific field requirements ): FieldResult[] { const results: FieldResult[] = []; @@ -110,16 +110,20 @@ export function evaluateFields( export function detectClaims(snapshot: PageSnapshot): ClaimFlag[] { const flags: ClaimFlag[] = []; - const lowerText = snapshot.textContent.toLowerCase(); for (const kw of CLAIM_KEYWORDS) { const pattern = new RegExp(`\\b${escapeRegex(kw.pattern)}\\b`, "gi"); - const match = lowerText.match(pattern); - if (match) { - // Extract surrounding context as source - const idx = lowerText.indexOf(kw.pattern.toLowerCase()); + let match: RegExpExecArray | null; + + // Find all matches of this claim keyword + while ((match = pattern.exec(snapshot.textContent)) !== null) { + const idx = match.index; + const matchedText = match[0]; const start = Math.max(0, idx - 40); - const end = Math.min(lowerText.length, idx + kw.pattern.length + 40); + const end = Math.min( + snapshot.textContent.length, + idx + matchedText.length + 40, + ); const source = snapshot.textContent.slice(start, end).trim(); flags.push({