Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,43 @@ test/ # Unit and integration tests
- Findings types: `found`, `missing`, `info`, `timeout`, `unreachable`
- Unused vars starting with `_` are ignored by ESLint

## Versioning

Follows semver. Bump `version` in `package.json` on every change that ships:

- **Patch** (`1.3.x`) — bug fixes, scoring corrections, internal refactors with no API change
- **Minor** (`1.x.0`) — new analyzers, new CLI flags, new exported helpers (backwards-compatible)
- **Major** (`x.0.0`) — breaking changes to the public API or scoring weights that would change existing audit results

The ClawHub skill version must match `package.json`.

## ClawHub Publishing

Publish the skill to ClawHub after updating `skills/aeo/SKILL.md`:
Publish the skill to ClawHub after updating `skills/aeo/SKILL.md`.

### Verify login

```bash
clawhub whoami # should print your handle (e.g. arberx)
```

If not logged in: `clawhub login`

### Publish

```bash
clawhub publish skills/aeo --version <semver> --changelog "<description of changes>"
```

The `--version` flag must be valid semver and should match `package.json`. Include a short changelog summarizing what changed.
- `--version` must be valid semver and **must match `package.json`**
- Include a short changelog summarizing what changed
- Run from the repo root (the `skills/` directory is resolved relative to cwd)

### Example

```bash
clawhub publish skills/aeo --version 1.3.3 --changelog "Fix nested schema detection and E-E-A-T signal scoping"
```

### ClawHub Security Guidelines

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@ainyc/aeo-audit",
"version": "1.3.2",
"version": "1.3.3",
"description": "The most comprehensive open-source Answer Engine Optimization (AEO) audit tool. Scores websites across 13 ranking factors that determine AI citation.",
"type": "module",
"main": "./dist/index.js",
Expand Down
46 changes: 31 additions & 15 deletions src/analyzers/eeat-signals.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
import { clampScore, extractSchemaTypes } from './helpers.js'
import { clampScore, extractSchemaTypes, findTopLevelSchemaByType } from './helpers.js'
import type { AnalysisResult, AuditContext, StructuredDataEntry } from '../types.js'

function findSchemaByType(structuredData: StructuredDataEntry[], typeName: string): StructuredDataEntry[] {
return structuredData.filter((item) => {
const rawType = item?.['@type']
const types = Array.isArray(rawType) ? rawType : [rawType]
return types.some((type) => typeof type === 'string' && type === typeName)
})
}

export function analyzeEeatSignals(context: AuditContext): AnalysisResult {
const findings: AnalysisResult['findings'] = []
const recommendations: string[] = []
let score = 0

// Person schema with credentials (jobTitle, alumniOf, hasCredential)
const persons = findSchemaByType(context.structuredData, 'Person')
// Person schema with credentials (jobTitle, alumniOf, hasCredential).
// Count top-level Person declarations AND Persons in explicit authorial roles
// (author, creator, contributor) on any top-level schema — e.g. Article.author.
// Exclude deeply nested Persons (review authors, customers, etc.) which are not
// authoritative E-E-A-T signals for the page itself.
const persons: StructuredDataEntry[] = [
...findTopLevelSchemaByType(context.structuredData, 'Person'),
]
for (const item of context.structuredData) {
for (const prop of ['author', 'creator', 'contributor']) {
const val = (item as Record<string, unknown>)[prop]
if (!val) continue
for (const c of (Array.isArray(val) ? val : [val])) {
if (!c || typeof c !== 'object' || Array.isArray(c)) continue
const entry = c as StructuredDataEntry
const rawType = entry['@type']
const entryTypes = Array.isArray(rawType) ? rawType : [rawType]
if (entryTypes.some((t) => typeof t === 'string' && t === 'Person')) {
persons.push(entry)
}
}
}
}
const credentialedPersons = persons.filter((person) =>
person.jobTitle || person.alumniOf || person.hasCredential,
)
Expand Down Expand Up @@ -78,11 +91,14 @@ export function analyzeEeatSignals(context: AuditContext): AnalysisResult {
recommendations.push('Add footer links to privacy, terms, and about pages.')
}

// Organization schema with founder or employee
// Organization schema with founder or employee.
// Only consider top-level entity declarations — not orgs nested as publisher,
// brand, memberOf, etc. inside other schemas — since those are references rather
// than the primary entity the page represents.
const orgs = [
...findSchemaByType(context.structuredData, 'Organization'),
...findSchemaByType(context.structuredData, 'LocalBusiness'),
...findSchemaByType(context.structuredData, 'ProfessionalService'),
...findTopLevelSchemaByType(context.structuredData, 'Organization'),
...findTopLevelSchemaByType(context.structuredData, 'LocalBusiness'),
...findTopLevelSchemaByType(context.structuredData, 'ProfessionalService'),
]

const orgWithPeople = orgs.filter((org) => org.founder || org.employee || org.member)
Expand Down
84 changes: 80 additions & 4 deletions src/analyzers/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,32 @@ export function extractSchemaTypes(structuredData: StructuredDataEntry[]): Set<s
const types = new Set<string>()

for (const item of structuredData) {
const rawType = item?.['@type']
if (!rawType) {
continue
collectNestedTypes(item, types)
}

return types
}

function collectNestedTypes(obj: unknown, types: Set<string>, seen = new WeakSet<object>()): void {
if (!obj || typeof obj !== 'object') {
return
}

if (seen.has(obj as object)) {
return
}
seen.add(obj as object)

if (Array.isArray(obj)) {
for (const item of obj) {
collectNestedTypes(item, types, seen)
}
return
}

const record = obj as Record<string, unknown>
const rawType = record['@type']
if (rawType) {
const typeValues = Array.isArray(rawType) ? rawType : [rawType]
for (const type of typeValues) {
if (typeof type === 'string' && type.trim()) {
Expand All @@ -108,7 +129,62 @@ export function extractSchemaTypes(structuredData: StructuredDataEntry[]): Set<s
}
}

return types
for (const value of Object.values(record)) {
if (value && typeof value === 'object') {
collectNestedTypes(value, types, seen)
}
}
}

export function findTopLevelSchemaByType(structuredData: StructuredDataEntry[], typeName: string): StructuredDataEntry[] {
return structuredData.filter((item) => {
const rawType = item?.['@type']
const types = Array.isArray(rawType) ? rawType : [rawType]
return types.some((type) => typeof type === 'string' && type === typeName)
})
}

export function findSchemaByType(structuredData: StructuredDataEntry[], typeName: string): StructuredDataEntry[] {
const results: StructuredDataEntry[] = []

for (const item of structuredData) {
collectNestedByType(item, typeName, results, new WeakSet())
}

return results
}

function collectNestedByType(obj: unknown, typeName: string, results: StructuredDataEntry[], seen: WeakSet<object>): void {
if (!obj || typeof obj !== 'object') {
return
}

if (seen.has(obj as object)) {
return
}
seen.add(obj as object)

if (Array.isArray(obj)) {
for (const item of obj) {
collectNestedByType(item, typeName, results, seen)
}
return
}

const record = obj as StructuredDataEntry
const rawType = record['@type']
if (rawType) {
const types = Array.isArray(rawType) ? rawType : [rawType]
if (types.some((type) => typeof type === 'string' && type === typeName)) {
results.push(record)
}
}

for (const value of Object.values(record)) {
if (value && typeof value === 'object') {
collectNestedByType(value, typeName, results, seen)
}
}
}

export function getStructuredDataNames(structuredData: StructuredDataEntry[]): string[] {
Expand Down
10 changes: 1 addition & 9 deletions src/analyzers/schema-completeness.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
import { clampScore } from './helpers.js'
import { clampScore, findSchemaByType } from './helpers.js'
import type { AnalysisResult, AuditContext, StructuredDataEntry } from '../types.js'

interface BestSchemaMatch {
score: number
item: StructuredDataEntry | null
}

function findSchemaByType(structuredData: StructuredDataEntry[], typeName: string): StructuredDataEntry[] {
return structuredData.filter((item) => {
const rawType = item?.['@type']
const types = Array.isArray(rawType) ? rawType : [rawType]
return types.some((type) => typeof type === 'string' && type === typeName)
})
}

function propertyCompleteness(item: StructuredDataEntry | null, requiredProps: string[]): number {
if (!item) {
return 0
Expand Down
70 changes: 69 additions & 1 deletion test/analyzers/legacy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import assert from 'node:assert/strict'
import test from 'node:test'
import { load } from 'cheerio'

import { parseJsonLdScripts, getVisibleText } from '../../src/analyzers/helpers.js'
import { parseJsonLdScripts, getVisibleText, extractSchemaTypes } from '../../src/analyzers/helpers.js'
import { analyzeStructuredData } from '../../src/analyzers/structured-data.js'
import { analyzeAiReadableContent } from '../../src/analyzers/ai-readable-content.js'
import { analyzeContentDepth } from '../../src/analyzers/content-depth.js'
Expand Down Expand Up @@ -67,6 +67,74 @@ test('freshness analyzer reports sitemap timeout as timeout finding', () => {
assert.ok(result.findings.some((finding) => finding.type === 'timeout'))
})

test('extractSchemaTypes finds nested HowTo inside a parent schema', () => {
const html = `<html><head>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@graph": [
{
"@type": ["ProfessionalService", "LocalBusiness"],
"name": "Test Biz",
"hasProcess": {
"@type": "HowTo",
"name": "How It Works",
"step": [{"@type": "HowToStep", "name": "Step 1"}]
}
}
]
}
</script>
</head><body></body></html>`
const $ = load(html)
const structuredData = parseJsonLdScripts($)
const types = extractSchemaTypes(structuredData)

assert.ok(types.has('HowTo'), 'should detect nested HowTo')
assert.ok(types.has('ProfessionalService'), 'should detect top-level ProfessionalService')
assert.ok(types.has('LocalBusiness'), 'should detect top-level LocalBusiness')
assert.ok(types.has('HowToStep'), 'should detect deeply nested HowToStep')
})

test('structured data analyzer detects nested HowTo schema', () => {
const html = `<html><head>
<title>Test</title>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@graph": [
{
"@type": ["ProfessionalService", "LocalBusiness"],
"name": "Test Biz",
"url": "https://example.com",
"telephone": "+1-555-0100",
"email": "info@example.com",
"address": { "@type": "PostalAddress", "streetAddress": "123 Main" },
"hasProcess": {
"@type": "HowTo",
"name": "How It Works",
"step": [{"@type": "HowToStep", "name": "Step 1"}]
}
},
{
"@type": "FAQPage",
"mainEntity": [{"@type": "Question", "name": "Q1", "acceptedAnswer": {"@type": "Answer", "text": "A1"}}]
},
{
"@type": "Service",
"name": "Consulting"
}
]
}
</script>
</head><body><p>Hello world</p></body></html>`

const result = analyzeStructuredData(buildContext(html))
const howToFinding = result.findings.find((f) => f.message.includes('HowTo'))
assert.ok(howToFinding, 'should have a HowTo finding')
assert.equal(howToFinding?.type, 'found', 'HowTo should be detected as found, not missing')
})

test('scoring engine computes grades and statuses', () => {
const scored = scoreFactors([
{
Expand Down
Loading