From fe47223625b177948dad417cd12cc2a3f648eb71 Mon Sep 17 00:00:00 2001 From: Arber Xhindoli <14798762+arberx@users.noreply.github.com> Date: Fri, 13 Mar 2026 20:21:28 -0400 Subject: [PATCH 1/2] Add sitemap audit command with site-wide pattern detection Enables auditing entire websites by parsing sitemaps, filtering to HTML content, and detecting cross-cutting issues across pages. Includes --sitemap flag for auto-discovery or explicit URL, --limit to cap pages by priority, and --top-issues to show only aggregate patterns. Co-Authored-By: Claude Haiku 4.5 --- src/cli.ts | 76 ++++++++-- src/formatters/json.ts | 6 +- src/formatters/markdown.ts | 57 +++++++- src/formatters/text.ts | 61 +++++++- src/index.ts | 3 + src/sitemap.ts | 279 +++++++++++++++++++++++++++++++++++++ src/types.ts | 41 ++++++ test/sitemap.test.ts | 64 +++++++++ 8 files changed, 574 insertions(+), 13 deletions(-) create mode 100644 src/sitemap.ts create mode 100644 test/sitemap.test.ts diff --git a/src/cli.ts b/src/cli.ts index 97d71e5..38a61b0 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,9 +1,11 @@ import { runAeoAudit } from './index.js' +import { runSitemapAudit } from './sitemap.js' import { isAeoAuditError } from './errors.js' import { formatJson } from './formatters/json.js' -import { formatMarkdown } from './formatters/markdown.js' -import { formatText } from './formatters/text.js' -import type { AuditReport, RunAeoAuditOptions } from './types.js' +import { formatSitemapJson } from './formatters/json.js' +import { formatMarkdown, formatSitemapMarkdown } from './formatters/markdown.js' +import { formatText, formatSitemapText } from './formatters/text.js' +import type { AuditReport, SitemapAuditReport, SitemapAuditOptions } from './types.js' const FORMATTERS = { json: formatJson, @@ -11,6 +13,12 @@ const FORMATTERS = { text: formatText, } +const SITEMAP_FORMATTERS = { + json: (report: SitemapAuditReport, _topIssuesOnly: boolean) => formatSitemapJson(report), + markdown: (report: SitemapAuditReport, topIssuesOnly: boolean) => formatSitemapMarkdown(report, topIssuesOnly), + text: (report: SitemapAuditReport, topIssuesOnly: boolean) => formatSitemapText(report, topIssuesOnly), +} + type FormatterName = keyof typeof FORMATTERS interface ParsedArgs { @@ -19,6 +27,10 @@ interface ParsedArgs { factors: string[] | null includeGeo: boolean help: boolean + sitemap: boolean + sitemapUrl: string | null + limit: number | null + topIssues: boolean } function isFormatterName(value: string): value is FormatterName { @@ -27,7 +39,17 @@ function isFormatterName(value: string): value is FormatterName { function parseArgs(argv: string[]): ParsedArgs { const args = argv.slice(2) - const result: ParsedArgs = { url: null, format: 'text', factors: null, includeGeo: false, help: false } + const result: ParsedArgs = { + url: null, + format: 'text', + factors: null, + includeGeo: false, + help: false, + sitemap: false, + sitemapUrl: null, + limit: null, + topIssues: false, + } for (let i = 0; i < args.length; i += 1) { if (args[i] === '--format' && args[i + 1]) { @@ -38,6 +60,21 @@ function parseArgs(argv: string[]): ParsedArgs { i += 1 } else if (args[i] === '--include-geo') { result.includeGeo = true + } else if (args[i] === '--sitemap') { + result.sitemap = true + // Check if the next arg is an explicit sitemap URL (not another flag) + if (args[i + 1] && !args[i + 1].startsWith('--')) { + result.sitemapUrl = args[i + 1] + i += 1 + } + } else if (args[i] === '--limit' && args[i + 1]) { + const num = parseInt(args[i + 1], 10) + if (Number.isFinite(num) && num > 0) { + result.limit = num + } + i += 1 + } else if (args[i] === '--top-issues') { + result.topIssues = true } else if (args[i] === '--help' || args[i] === '-h') { result.help = true } else if (!args[i].startsWith('-')) { @@ -56,6 +93,9 @@ Options: --format Output format: text (default), json, markdown --factors Comma-separated factor IDs to run (runs all if omitted) --include-geo Include optional geographic signals factor + --sitemap [url] Audit all pages from sitemap (auto-discovers /sitemap.xml or use explicit URL) + --limit Max pages to audit in sitemap mode (sorted by sitemap priority) + --top-issues In sitemap mode, skip per-page output and show only cross-cutting issues -h, --help Show this help message Examples: @@ -63,6 +103,10 @@ Examples: aeo-audit https://example.com --format json aeo-audit https://example.com --factors structured-data,faq-content aeo-audit https://example.com --include-geo + aeo-audit https://example.com --sitemap + aeo-audit https://example.com --sitemap https://example.com/sitemap.xml + aeo-audit https://example.com --sitemap --limit 10 + aeo-audit https://example.com --sitemap --top-issues `) } @@ -84,15 +128,27 @@ export async function main(argv: string[] = process.argv): Promise { return 1 } - const formatter = FORMATTERS[args.format] - try { - const options: RunAeoAuditOptions = { - factors: args.factors, - includeGeo: args.includeGeo, + if (args.sitemap) { + const options: SitemapAuditOptions = { + factors: args.factors, + includeGeo: args.includeGeo, + sitemapUrl: args.sitemapUrl ?? undefined, + limit: args.limit ?? undefined, + topIssuesOnly: args.topIssues, + } + + const report = await runSitemapAudit(args.url, options) + const sitemapFormatter = SITEMAP_FORMATTERS[args.format] + console.log(sitemapFormatter(report, args.topIssues)) + return report.aggregateScore >= 70 ? 0 : 1 } - const report = await runAeoAudit(args.url, options) + const formatter = FORMATTERS[args.format] + const report = await runAeoAudit(args.url, { + factors: args.factors, + includeGeo: args.includeGeo, + }) console.log(formatter(report)) return report.overallScore >= 70 ? 0 : 1 diff --git a/src/formatters/json.ts b/src/formatters/json.ts index 1aac604..279e0e5 100644 --- a/src/formatters/json.ts +++ b/src/formatters/json.ts @@ -1,5 +1,9 @@ -import type { AuditReport } from '../types.js' +import type { AuditReport, SitemapAuditReport } from '../types.js' export function formatJson(report: AuditReport): string { return JSON.stringify(report, null, 2) } + +export function formatSitemapJson(report: SitemapAuditReport): string { + return JSON.stringify(report, null, 2) +} diff --git a/src/formatters/markdown.ts b/src/formatters/markdown.ts index 5fc9917..0a8be25 100644 --- a/src/formatters/markdown.ts +++ b/src/formatters/markdown.ts @@ -1,4 +1,4 @@ -import type { AuditReport } from '../types.js' +import type { AuditReport, SitemapAuditReport } from '../types.js' export function formatMarkdown(report: AuditReport): string { const lines = [] @@ -55,3 +55,58 @@ export function formatMarkdown(report: AuditReport): string { return lines.join('\n') } + +export function formatSitemapMarkdown(report: SitemapAuditReport, topIssuesOnly = false): string { + const lines = [] + + lines.push(`# AEO Sitemap Audit Report`) + lines.push(``) + lines.push(`**Sitemap:** ${report.sitemapUrl}`) + lines.push(`**Aggregate Grade:** ${report.aggregateGrade} (${report.aggregateScore}/100)`) + lines.push(`**Pages:** ${report.pagesAudited} audited, ${report.pagesSkipped} skipped, ${report.pagesDiscovered} discovered`) + lines.push(`**Audited:** ${report.auditedAt}`) + lines.push(``) + + if (!topIssuesOnly) { + lines.push(`## Per-Page Scores`) + lines.push(``) + lines.push(`| URL | Score | Grade | Status |`) + lines.push(`|-----|-------|-------|--------|`) + + for (const page of report.pages) { + const url = page.url.length > 60 ? page.url.slice(0, 57) + '...' : page.url + if (page.status === 'error') { + lines.push(`| ${url} | - | - | error: ${page.error} |`) + } else { + lines.push(`| ${url} | ${page.overallScore} | ${page.overallGrade} | ${page.status} |`) + } + } + + lines.push(``) + } + + if (report.crossCuttingIssues.length > 0) { + lines.push(`## Cross-Cutting Issues`) + lines.push(``) + lines.push(`| Factor | Avg Score | Avg Grade | Affected Pages |`) + lines.push(`|--------|-----------|-----------|----------------|`) + + for (const issue of report.crossCuttingIssues) { + const pct = Math.round((issue.affectedPages / issue.totalPages) * 100) + lines.push(`| ${issue.factorName} | ${issue.avgScore} | ${issue.avgGrade} | ${issue.affectedPages}/${issue.totalPages} (${pct}%) |`) + } + + lines.push(``) + } + + if (report.prioritizedFixes.length > 0) { + lines.push(`## Prioritized Fixes (by site-wide impact)`) + lines.push(``) + for (let i = 0; i < report.prioritizedFixes.length; i++) { + lines.push(`${i + 1}. ${report.prioritizedFixes[i]}`) + } + lines.push(``) + } + + return lines.join('\n') +} diff --git a/src/formatters/text.ts b/src/formatters/text.ts index e40942b..3cd5885 100644 --- a/src/formatters/text.ts +++ b/src/formatters/text.ts @@ -6,7 +6,7 @@ const YELLOW = '\x1b[33m' const RED = '\x1b[31m' const CYAN = '\x1b[36m' -import type { AuditReport, ScoredFactor } from '../types.js' +import type { AuditReport, ScoredFactor, SitemapAuditReport } from '../types.js' function gradeColor(grade: string): string { if (grade.startsWith('A')) return GREEN @@ -72,3 +72,62 @@ export function formatText(report: AuditReport): string { return lines.join('\n') } + +export function formatSitemapText(report: SitemapAuditReport, topIssuesOnly = false): string { + const lines = [] + + const gc = gradeColor(report.aggregateGrade) + lines.push(``) + lines.push(`${BOLD}AEO Sitemap Audit Report${RESET}`) + lines.push(`${DIM}${report.sitemapUrl}${RESET}`) + lines.push(``) + lines.push(` ${BOLD}Aggregate Grade:${RESET} ${gc}${BOLD}${report.aggregateGrade}${RESET} ${bar(report.aggregateScore, 30)} ${report.aggregateScore}/100`) + lines.push(` ${DIM}${report.pagesAudited} pages audited, ${report.pagesSkipped} skipped, ${report.pagesDiscovered} discovered${RESET}`) + lines.push(``) + + if (!topIssuesOnly) { + lines.push(`${BOLD}Per-Page Scores${RESET}`) + lines.push(`${'─'.repeat(70)}`) + + const sorted = [...report.pages].sort((a, b) => b.overallScore - a.overallScore) + for (const page of sorted) { + if (page.status === 'error') { + const url = page.url.length > 50 ? page.url.slice(0, 47) + '...' : page.url + lines.push(` ${RED}✗${RESET} ${url.padEnd(50)} ${RED}error${RESET}`) + } else { + const url = page.url.length > 50 ? page.url.slice(0, 47) + '...' : page.url + const pgc = gradeColor(page.overallGrade) + lines.push(` ${statusIcon(page.overallScore >= 70 ? 'pass' : page.overallScore >= 40 ? 'partial' : 'fail')} ${url.padEnd(50)} ${bar(page.overallScore, 15)} ${pgc}${page.overallGrade.padEnd(3)}${RESET}`) + } + } + + lines.push(`${'─'.repeat(70)}`) + lines.push(``) + } + + if (report.crossCuttingIssues.length > 0) { + lines.push(`${BOLD}Cross-Cutting Issues${RESET}`) + lines.push(`${'─'.repeat(70)}`) + + for (const issue of report.crossCuttingIssues) { + const pct = Math.round((issue.affectedPages / issue.totalPages) * 100) + const igc = gradeColor(issue.avgGrade) + lines.push(` ${igc}${issue.avgGrade.padEnd(3)}${RESET} ${issue.factorName.padEnd(32)} ${DIM}avg ${issue.avgScore}/100, affects ${pct}% of pages${RESET}`) + } + + lines.push(`${'─'.repeat(70)}`) + lines.push(``) + } + + if (report.prioritizedFixes.length > 0) { + lines.push(`${BOLD}Prioritized Fixes (by site-wide impact)${RESET}`) + for (let i = 0; i < report.prioritizedFixes.length; i++) { + lines.push(` ${CYAN}${i + 1}.${RESET} ${report.prioritizedFixes[i]}`) + } + lines.push(``) + } + + lines.push(`${DIM}${report.auditedAt}${RESET}`) + + return lines.join('\n') +} diff --git a/src/index.ts b/src/index.ts index 0df3071..c6c63b7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -19,6 +19,9 @@ import { getVisibleText, parseJsonLdScripts, countWords } from './analyzers/help import { FACTOR_DEFINITIONS, OPTIONAL_FACTOR_DEFINITIONS, scoreFactors } from './scoring.js' import type { Analyzer, AuditContext, AuditReport, RunAeoAuditOptions, ScoredFactor } from './types.js' +export { runSitemapAudit } from './sitemap.js' +export type { SitemapAuditReport, SitemapAuditOptions } from './types.js' + const ANALYZER_BY_ID: Record = { 'structured-data': analyzeStructuredData, 'ai-readable-content': analyzeAiReadableContent, diff --git a/src/sitemap.ts b/src/sitemap.ts new file mode 100644 index 0000000..e42b099 --- /dev/null +++ b/src/sitemap.ts @@ -0,0 +1,279 @@ +import { AeoAuditError } from './errors.js' +import { normalizeTargetUrl } from './fetch-page.js' +import { runAeoAudit } from './index.js' +import { scoreToGrade } from './scoring.js' +import type { + AuditReport, + CrossCuttingIssue, + RunAeoAuditOptions, + SitemapAuditOptions, + SitemapAuditReport, + SitemapPageResult, +} from './types.js' + +const USER_AGENT = 'AINYC-AEO-Audit/1.0' +const SITEMAP_TIMEOUT_MS = 10_000 +const SITEMAP_MAX_BYTES = 5 * 1024 * 1024 + +const SKIP_EXTENSIONS = new Set(['.pdf', '.txt', '.xml', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.mp4', '.mp3', '.zip', '.gz', '.css', '.js']) + +function shouldSkipUrl(url: string): boolean { + try { + const pathname = new URL(url).pathname.toLowerCase() + return SKIP_EXTENSIONS.has(pathname.slice(pathname.lastIndexOf('.'))) + } catch { + return true + } +} + +interface SitemapEntry { + loc: string + priority?: number +} + +function parseSitemapXml(xml: string): SitemapEntry[] { + const entries: SitemapEntry[] = [] + + // Extract elements and optional from blocks + const urlBlockRe = /]*>([\s\S]*?)<\/url>/gi + let urlMatch + while ((urlMatch = urlBlockRe.exec(xml)) !== null) { + const block = urlMatch[1] + const locMatch = block.match(/]*>([\s\S]*?)<\/loc>/i) + if (!locMatch) continue + + const loc = locMatch[1].trim() + if (!loc) continue + + const priorityMatch = block.match(/]*>([\s\S]*?)<\/priority>/i) + const priority = priorityMatch ? parseFloat(priorityMatch[1].trim()) : undefined + + entries.push({ loc, priority: Number.isFinite(priority) ? priority : undefined }) + } + + // Handle sitemap index files — extract nested sitemap URLs + if (entries.length === 0) { + const sitemapLocRe = /]*>[\s\S]*?]*>([\s\S]*?)<\/loc>[\s\S]*?<\/sitemap>/gi + let sitemapMatch + while ((sitemapMatch = sitemapLocRe.exec(xml)) !== null) { + entries.push({ loc: sitemapMatch[1].trim() }) + } + } + + return entries +} + +async function fetchSitemapBody(url: string): Promise { + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), SITEMAP_TIMEOUT_MS) + + try { + const response = await fetch(url, { + method: 'GET', + signal: controller.signal, + headers: { 'User-Agent': USER_AGENT, Accept: '*/*' }, + }) + + if (!response.ok) { + throw new AeoAuditError('UNREACHABLE', `Sitemap returned HTTP ${response.status}.`) + } + + const reader = response.body?.getReader() + if (!reader) return '' + + const chunks: Buffer[] = [] + let totalBytes = 0 + + for (;;) { + const { done, value } = await reader.read() + if (done) break + const chunk = Buffer.from(value) + totalBytes += chunk.length + if (totalBytes > SITEMAP_MAX_BYTES) { + await reader.cancel() + throw new AeoAuditError('BODY_TOO_LARGE', `Sitemap exceeded ${SITEMAP_MAX_BYTES} bytes.`) + } + chunks.push(chunk) + } + + return Buffer.concat(chunks).toString('utf8') + } catch (error) { + if (error instanceof AeoAuditError) throw error + if (error instanceof Error && error.name === 'AbortError') { + throw new AeoAuditError('TIMEOUT', `Sitemap fetch timed out after ${SITEMAP_TIMEOUT_MS}ms.`) + } + throw new AeoAuditError('UNREACHABLE', 'Could not fetch sitemap.', { cause: error }) + } finally { + clearTimeout(timer) + } +} + +async function resolveSitemapUrls(sitemapUrl: string): Promise { + const body = await fetchSitemapBody(sitemapUrl) + const entries = parseSitemapXml(body) + + // If it's a sitemap index, fetch child sitemaps + const isSitemapIndex = body.includes(' { + try { + const childBody = await fetchSitemapBody(entry.loc) + return parseSitemapXml(childBody) + } catch { + return [] + } + }), + ) + return childResults.flat() + } + + return entries +} + +function buildCrossCuttingIssues(successPages: AuditReport[]): CrossCuttingIssue[] { + if (successPages.length === 0) return [] + + // Collect scores per factor across all pages + const factorScores = new Map }>() + + for (const page of successPages) { + for (const factor of page.factors) { + let entry = factorScores.get(factor.id) + if (!entry) { + entry = { name: factor.name, scores: [], recommendations: new Map() } + factorScores.set(factor.id, entry) + } + entry.scores.push(factor.score) + + for (const rec of factor.recommendations) { + entry.recommendations.set(rec, (entry.recommendations.get(rec) || 0) + 1) + } + } + } + + const issues: CrossCuttingIssue[] = [] + + for (const [factorId, entry] of factorScores) { + const avgScore = Math.round(entry.scores.reduce((a, b) => a + b, 0) / entry.scores.length) + const affectedPages = entry.scores.filter((s) => s < 70).length + + if (affectedPages === 0) continue + + // Sort recommendations by frequency + const topRecs = [...entry.recommendations.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, 3) + .map(([rec]) => rec) + + issues.push({ + factorId, + factorName: entry.name, + avgScore, + avgGrade: scoreToGrade(avgScore), + affectedPages, + totalPages: successPages.length, + topRecommendations: topRecs, + }) + } + + // Sort by impact: most affected pages first, then lowest avg score + issues.sort((a, b) => b.affectedPages - a.affectedPages || a.avgScore - b.avgScore) + + return issues +} + +function buildPrioritizedFixes(issues: CrossCuttingIssue[], totalPages: number): string[] { + return issues + .slice(0, 5) + .map((issue) => { + const pct = Math.round((issue.affectedPages / totalPages) * 100) + const rec = issue.topRecommendations[0] || 'Review and improve this factor.' + return `${issue.factorName} (avg ${issue.avgGrade}, affects ${pct}% of pages): ${rec}` + }) +} + +export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptions = {}): Promise { + const normalizedUrl = normalizeTargetUrl(rawUrl) + const origin = normalizedUrl.origin + + // Determine sitemap URL + const sitemapUrl = options.sitemapUrl || `${origin}/sitemap.xml` + + // Fetch and parse sitemap + let entries = await resolveSitemapUrls(sitemapUrl) + + // Filter to HTML content pages + const allCount = entries.length + entries = entries.filter((e) => !shouldSkipUrl(e.loc)) + + // Sort by priority (highest first) if priorities exist + entries.sort((a, b) => (b.priority ?? 0.5) - (a.priority ?? 0.5)) + + // Apply limit + if (options.limit && options.limit > 0) { + entries = entries.slice(0, options.limit) + } + + if (entries.length === 0) { + throw new AeoAuditError('BAD_INPUT', 'No auditable URLs found in sitemap.') + } + + const skipped = allCount - entries.length + const auditOptions: RunAeoAuditOptions = { + factors: options.factors, + includeGeo: options.includeGeo, + } + + // Audit each page (sequentially to avoid hammering the target) + const pageResults: SitemapPageResult[] = [] + const successReports: AuditReport[] = [] + + for (const entry of entries) { + try { + const report = await runAeoAudit(entry.loc, auditOptions) + successReports.push(report) + pageResults.push({ + url: report.finalUrl, + overallScore: report.overallScore, + overallGrade: report.overallGrade, + status: 'success', + factors: report.factors, + metadata: report.metadata, + }) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + pageResults.push({ + url: entry.loc, + overallScore: 0, + overallGrade: 'F', + status: 'error', + error: message, + }) + } + } + + // Calculate aggregate score from successful audits + const successScores = pageResults.filter((p) => p.status === 'success').map((p) => p.overallScore) + const aggregateScore = successScores.length > 0 + ? Math.round(successScores.reduce((a, b) => a + b, 0) / successScores.length) + : 0 + + const crossCuttingIssues = buildCrossCuttingIssues(successReports) + const prioritizedFixes = buildPrioritizedFixes(crossCuttingIssues, successReports.length) + + return { + sitemapUrl, + auditedAt: new Date().toISOString(), + pagesDiscovered: allCount, + pagesAudited: entries.length, + pagesSkipped: skipped, + aggregateScore, + aggregateGrade: scoreToGrade(aggregateScore), + pages: pageResults, + crossCuttingIssues, + prioritizedFixes, + } +} + +export { parseSitemapXml, shouldSkipUrl } diff --git a/src/types.ts b/src/types.ts index 98f4c4d..2bfa24b 100644 --- a/src/types.ts +++ b/src/types.ts @@ -138,3 +138,44 @@ export interface FetchedPage { } export type Analyzer = (context: AuditContext) => AnalysisResult | Promise + +/* ── Sitemap audit types ── */ + +export interface SitemapPageResult { + url: string + overallScore: number + overallGrade: string + status: 'success' | 'error' + error?: string + factors?: ScoredFactor[] + metadata?: AuditMetadata +} + +export interface CrossCuttingIssue { + factorId: string + factorName: string + avgScore: number + avgGrade: string + affectedPages: number + totalPages: number + topRecommendations: string[] +} + +export interface SitemapAuditReport { + sitemapUrl: string + auditedAt: string + pagesDiscovered: number + pagesAudited: number + pagesSkipped: number + aggregateScore: number + aggregateGrade: string + pages: SitemapPageResult[] + crossCuttingIssues: CrossCuttingIssue[] + prioritizedFixes: string[] +} + +export interface SitemapAuditOptions extends RunAeoAuditOptions { + sitemapUrl?: string + limit?: number + topIssuesOnly?: boolean +} diff --git a/test/sitemap.test.ts b/test/sitemap.test.ts new file mode 100644 index 0000000..fe5434f --- /dev/null +++ b/test/sitemap.test.ts @@ -0,0 +1,64 @@ +import assert from 'node:assert/strict' +import test from 'node:test' + +import { parseSitemapXml, shouldSkipUrl } from '../src/sitemap.js' + +test('parseSitemapXml extracts loc and priority from url blocks', () => { + const xml = ` + + + https://example.com/ + 1.0 + + + https://example.com/about + 0.8 + + + https://example.com/blog + +` + + const entries = parseSitemapXml(xml) + assert.equal(entries.length, 3) + assert.equal(entries[0].loc, 'https://example.com/') + assert.equal(entries[0].priority, 1.0) + assert.equal(entries[1].loc, 'https://example.com/about') + assert.equal(entries[1].priority, 0.8) + assert.equal(entries[2].loc, 'https://example.com/blog') + assert.equal(entries[2].priority, undefined) +}) + +test('parseSitemapXml handles sitemap index files', () => { + const xml = ` + + + https://example.com/sitemap-posts.xml + + + https://example.com/sitemap-pages.xml + +` + + const entries = parseSitemapXml(xml) + assert.equal(entries.length, 2) + assert.equal(entries[0].loc, 'https://example.com/sitemap-posts.xml') + assert.equal(entries[1].loc, 'https://example.com/sitemap-pages.xml') +}) + +test('shouldSkipUrl filters non-HTML URLs', () => { + assert.equal(shouldSkipUrl('https://example.com/doc.pdf'), true) + assert.equal(shouldSkipUrl('https://example.com/image.png'), true) + assert.equal(shouldSkipUrl('https://example.com/data.xml'), true) + assert.equal(shouldSkipUrl('https://example.com/robots.txt'), true) + assert.equal(shouldSkipUrl('https://example.com/style.css'), true) + assert.equal(shouldSkipUrl('https://example.com/app.js'), true) +}) + +test('shouldSkipUrl allows HTML content pages', () => { + assert.equal(shouldSkipUrl('https://example.com/'), false) + assert.equal(shouldSkipUrl('https://example.com/about'), false) + assert.equal(shouldSkipUrl('https://example.com/blog/post-1'), false) + assert.equal(shouldSkipUrl('https://example.com/page.html'), false) + assert.equal(shouldSkipUrl('https://example.com/page.htm'), false) +}) From 006b331a638d63f0ed5bd342ff471756a01e057b Mon Sep 17 00:00:00 2001 From: Arber Xhindoli <14798762+arberx@users.noreply.github.com> Date: Fri, 13 Mar 2026 20:22:16 -0400 Subject: [PATCH 2/2] Bump version to 1.3.0 and document sitemap mode in SKILL.md Co-Authored-By: Claude Opus 4.6 --- package.json | 2 +- skills/aeo/SKILL.md | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 07720d3..915922d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ainyc/aeo-audit", - "version": "1.2.2", + "version": "1.3.0", "description": "The most comprehensive open-source Answer Engine Optimization (AEO) audit tool. Scores websites across 13 ranking factors that determine AI citation.", "type": "module", "main": "./dist/index.js", diff --git a/skills/aeo/SKILL.md b/skills/aeo/SKILL.md index 1dbf012..1763852 100644 --- a/skills/aeo/SKILL.md +++ b/skills/aeo/SKILL.md @@ -51,6 +51,9 @@ If no mode is provided, default to `audit`. ## Examples - `audit https://example.com` +- `audit https://example.com --sitemap` +- `audit https://example.com --sitemap --limit 10` +- `audit https://example.com --sitemap --top-issues` - `fix https://example.com` - `schema https://example.com` - `llms https://example.com` @@ -81,6 +84,28 @@ Use for broad requests such as "audit this site" or "why am I not being cited?" - Top fixes - Metadata such as fetch time and auxiliary file availability +### Sitemap Mode + +Use `--sitemap` to audit all pages discovered from the site's sitemap: + +```bash +npx @ainyc/aeo-audit@latest "" --sitemap --format json +npx @ainyc/aeo-audit@latest "" --sitemap https://example.com/sitemap.xml --format json +npx @ainyc/aeo-audit@latest "" --sitemap --limit 10 --format json +npx @ainyc/aeo-audit@latest "" --sitemap --top-issues --format json +``` + +Flags: +- `--sitemap [url]` — auto-discover `/sitemap.xml` or provide an explicit URL +- `--limit ` — cap pages audited (sorted by sitemap priority) +- `--top-issues` — skip per-page output, show only cross-cutting patterns + +Returns: +- Per-page scores and grades +- Cross-cutting issues (factors failing across multiple pages) +- Aggregate score and grade +- Prioritized fixes ranked by site-wide impact + ## Fix Use when the user wants code changes applied after the audit.