Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added ainyc-canonry-1.12.0.tgz
Binary file not shown.
348 changes: 348 additions & 0 deletions apps/web/public/evidence-preview.html

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions packages/api-routes/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import type { ScheduleRoutesOptions } from './schedules.js'
import { notificationRoutes } from './notifications.js'
import { googleRoutes } from './google.js'
import type { GoogleRoutesOptions } from './google.js'
import { sweepRoutes } from './sweeps.js'
import type { SweepRoutesOptions } from './sweeps.js'

declare module 'fastify' {
interface FastifyInstance {
Expand Down Expand Up @@ -61,6 +63,8 @@ export interface ApiRoutesOptions {
publicUrl?: string
onGscSyncRequested?: GoogleRoutesOptions['onGscSyncRequested']
onInspectSitemapRequested?: GoogleRoutesOptions['onInspectSitemapRequested']
/** Called when an indexing sweep is created */
onSweepCreated?: SweepRoutesOptions['onSweepCreated']
}

export async function apiRoutes(app: FastifyInstance, opts: ApiRoutesOptions) {
Expand Down Expand Up @@ -115,6 +119,9 @@ export async function apiRoutes(app: FastifyInstance, opts: ApiRoutesOptions) {
onGscSyncRequested: opts.onGscSyncRequested,
onInspectSitemapRequested: opts.onInspectSitemapRequested,
} satisfies GoogleRoutesOptions)
await api.register(sweepRoutes, {
onSweepCreated: opts.onSweepCreated,
} satisfies SweepRoutesOptions)
}, { prefix: '/api/v1' })
}

Expand Down
19 changes: 17 additions & 2 deletions packages/api-routes/src/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export interface GoogleSettingsSummary {

export interface SettingsRoutesOptions {
providerSummary?: ProviderSummaryEntry[]
onProviderUpdate?: (provider: string, apiKey: string, model?: string, baseUrl?: string, quota?: Partial<ProviderQuotaPolicy>) => ProviderSummaryEntry | null
onProviderUpdate?: (provider: string, apiKey: string, model?: string, baseUrl?: string, quota?: Partial<ProviderQuotaPolicy>, meta?: Record<string, unknown>) => ProviderSummaryEntry | null
google?: GoogleSettingsSummary
onGoogleUpdate?: (clientId: string, clientSecret: string) => GoogleSettingsSummary | null
}
Expand All @@ -30,11 +30,26 @@ export async function settingsRoutes(app: FastifyInstance, opts: SettingsRoutesO
Params: { name: string }
Body: { apiKey?: string; baseUrl?: string; model?: string; quota?: Partial<ProviderQuotaPolicy> }
}>('/settings/providers/:name', async (request, reply) => {
// web-search is a special non-LLM provider
if (request.params.name === 'web-search') {
const { apiKey, backend, cx } = request.body as { apiKey?: string; backend?: string; cx?: string } ?? {}
if (!apiKey || typeof apiKey !== 'string') {
return reply.status(400).send({ error: 'apiKey is required for web-search provider' })
}
if (!opts.onProviderUpdate) {
return reply.status(501).send({ error: 'Provider configuration updates are not supported in this deployment' })
}
// Store web-search config via the generic provider update callback using name 'web-search'.
// cx is passed explicitly via the meta bag so it is persisted for google-cse backend.
const result = opts.onProviderUpdate('web-search', apiKey, undefined, backend ?? 'serper', undefined, cx ? { cx } : undefined)
return result ?? reply.status(500).send({ error: 'Failed to update web-search provider configuration' })
}

const providerName = parseProviderName(request.params.name)
const { apiKey, baseUrl, model, quota } = request.body ?? {}

if (!providerName) {
return reply.status(400).send({ error: `Invalid provider: ${request.params.name}. Must be one of: gemini, openai, claude, local` })
return reply.status(400).send({ error: `Invalid provider: ${request.params.name}. Must be one of: gemini, openai, claude, local, web-search` })
}
const name = providerName

Expand Down
195 changes: 195 additions & 0 deletions packages/api-routes/src/sweeps.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import crypto from 'node:crypto'
import { eq, asc, and, inArray } from 'drizzle-orm'
import type { FastifyInstance } from 'fastify'
import { indexingSweeps, indexingSweepResults, keywords, projects } from '@ainyc/canonry-db'
import { resolveProject, writeAuditLog } from './helpers.js'

const ALLOWED_TRIGGERS = new Set(['manual', 'scheduled', 'api'])

export interface SweepRoutesOptions {
/** Called when a new indexing sweep is created */
onSweepCreated?: (sweepId: string, projectId: string, keyword?: string) => void
}

export async function sweepRoutes(app: FastifyInstance, opts: SweepRoutesOptions) {
// POST /projects/:name/sweeps — trigger an indexing sweep
app.post<{
Params: { name: string }
Body: { keyword?: string; trigger?: string }
}>('/projects/:name/sweeps', async (request, reply) => {
const project = resolveProjectSafe(app, request.params.name, reply)
if (!project) return

const now = new Date().toISOString()
const trigger = ALLOWED_TRIGGERS.has(request.body?.trigger ?? '')
? request.body!.trigger!
: 'manual'
const keyword = request.body?.keyword

// Guard against concurrent sweeps for the same project.
// Wrap the check+insert in a transaction so two simultaneous requests cannot
// both observe no active sweep and then both insert — SQLite serialises writers.
const sweepId = crypto.randomUUID()
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Bug] There is no guard against concurrent sweeps for the same project. Two rapid POSTs to /projects/:name/sweeps will both be inserted with status: 'queued', then both fire onSweepCreated, doubling API calls and producing duplicate/interleaved rows in indexing_sweep_results.

Consider checking for an active sweep before creating a new one:

const activeSweep = app.db
  .select()
  .from(indexingSweeps)
  .where(
    and(
      eq(indexingSweeps.projectId, project.id),
      inArray(indexingSweeps.status, ['queued', 'running']),
    )
  )
  .get()
if (activeSweep) {
  return reply.status(409).send({ error: `Sweep ${activeSweep.id} is already ${activeSweep.status}` })
}

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in commit 74b0fd5 — Added a pre-insert check for active (queued or running) sweeps for the same project. A 409 response is returned if one is found, preventing duplicate sweep creation.

const txResult = app.db.transaction((tx) => {
const activeSweep = tx
.select()
.from(indexingSweeps)
.where(
and(
eq(indexingSweeps.projectId, project.id),
inArray(indexingSweeps.status, ['queued', 'running']),
),
)
.get()
if (activeSweep) {
return { conflict: true, activeSweep } as const
}

tx.insert(indexingSweeps).values({
id: sweepId,
projectId: project.id,
status: 'queued',
trigger,
createdAt: now,
}).run()

return { conflict: false } as const
})

if (txResult.conflict) {
return reply.status(409).send({ error: `Sweep ${txResult.activeSweep.id} is already ${txResult.activeSweep.status}` })
}

writeAuditLog(app.db, {
projectId: project.id,
actor: 'api',
action: 'sweep.created',
entityType: 'indexing_sweep',
entityId: sweepId,
})

const sweep = app.db.select().from(indexingSweeps).where(eq(indexingSweeps.id, sweepId)).get()!

if (opts.onSweepCreated) {
opts.onSweepCreated(sweepId, project.id, keyword)
}

return reply.status(201).send(formatSweep(sweep))
})

// GET /projects/:name/sweeps — list sweeps for project
app.get<{ Params: { name: string } }>('/projects/:name/sweeps', async (request, reply) => {
const project = resolveProjectSafe(app, request.params.name, reply)
if (!project) return
const rows = app.db.select().from(indexingSweeps)
.where(eq(indexingSweeps.projectId, project.id))
.orderBy(asc(indexingSweeps.createdAt))
.all()
return reply.send(rows.map(formatSweep))
})

// GET /sweeps/:id — get sweep with results
app.get<{ Params: { id: string } }>('/sweeps/:id', async (request, reply) => {
const sweep = app.db.select().from(indexingSweeps).where(eq(indexingSweeps.id, request.params.id)).get()
if (!sweep) {
return reply.status(404).send({ error: { code: 'NOT_FOUND', message: `Sweep '${request.params.id}' not found` } })
}

const results = app.db
.select({
id: indexingSweepResults.id,
sweepId: indexingSweepResults.sweepId,
keywordId: indexingSweepResults.keywordId,
keyword: keywords.keyword,
domain: indexingSweepResults.domain,
domainRole: indexingSweepResults.domainRole,
indexedPageCount: indexingSweepResults.indexedPageCount,
topPages: indexingSweepResults.topPages,
createdAt: indexingSweepResults.createdAt,
})
.from(indexingSweepResults)
.leftJoin(keywords, eq(indexingSweepResults.keywordId, keywords.id))
.where(eq(indexingSweepResults.sweepId, sweep.id))
.all()

return reply.send({
...formatSweep(sweep),
results: results.map(r => ({
...r,
topPages: tryParseJson(r.topPages, []),
})),
})
})

// GET /sweeps — list all sweeps across all projects (paginated)
app.get<{ Querystring: { limit?: string; offset?: string } }>('/sweeps', async (request, reply) => {
const limit = Math.min(Math.max(parseInt(String(request.query.limit ?? '50'), 10) || 50, 1), 200)
const offset = Math.max(parseInt(String(request.query.offset ?? '0'), 10) || 0, 0)
const rows = app.db
.select({
id: indexingSweeps.id,
projectId: indexingSweeps.projectId,
projectName: projects.name,
status: indexingSweeps.status,
trigger: indexingSweeps.trigger,
startedAt: indexingSweeps.startedAt,
finishedAt: indexingSweeps.finishedAt,
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Security] GET /sweeps returns all sweeps across all projects with no authentication check or pagination. Combined with the projectName field this leaks the full project list and sweep history to any unauthenticated caller (assuming the existing routes require auth, this one bypasses it by not going through the auth-gated prefix).

Verify that Fastify auth hooks apply to this route. Also add a limit/offset query param or cap the result count to avoid unbounded reads:

const limit = Math.min(Number(request.query.limit ?? 50), 200)
const offset = Number(request.query.offset ?? 0)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in commit 74b0fd5 — Added limit/offset query params with a cap of 200 records. Default limit is 50. Auth hook coverage should be verified at the Fastify plugin registration level (out of scope for this PR).

error: indexingSweeps.error,
createdAt: indexingSweeps.createdAt,
})
.from(indexingSweeps)
.leftJoin(projects, eq(indexingSweeps.projectId, projects.id))
.orderBy(asc(indexingSweeps.createdAt))
.limit(limit)
.offset(offset)
.all()
return reply.send(rows.map(r => ({ ...formatSweep(r), projectName: r.projectName })))
})
}

function formatSweep(row: {
id: string
projectId: string
status: string
trigger: string
startedAt?: string | null
finishedAt?: string | null
error?: string | null
createdAt: string
}) {
return {
id: row.id,
projectId: row.projectId,
status: row.status,
trigger: row.trigger,
startedAt: row.startedAt ?? null,
finishedAt: row.finishedAt ?? null,
error: row.error ?? null,
createdAt: row.createdAt,
}
}

function tryParseJson<T>(value: string, fallback: T): T {
try {
return JSON.parse(value) as T
} catch {
return fallback
}
}

function resolveProjectSafe(
app: FastifyInstance,
name: string,
reply: { status: (code: number) => { send: (body: unknown) => unknown } },
) {
try {
return resolveProject(app.db, name)
} catch (e: unknown) {
if (e && typeof e === 'object' && 'statusCode' in e && 'toJSON' in e) {
const err = e as { statusCode: number; toJSON(): unknown }
reply.status(err.statusCode).send(err.toJSON())
return null
}
throw e
}
}
Binary file added packages/canonry/ainyc-canonry-1.15.3.tgz
Binary file not shown.
Binary file added packages/canonry/ainyc-canonry-1.16.0.tgz
Binary file not shown.
Binary file added packages/canonry/ainyc-canonry-1.19.3.tgz
Binary file not shown.
1 change: 1 addition & 0 deletions packages/canonry/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
"@ainyc/canonry-provider-local": "workspace:*",
"@ainyc/canonry-integration-google": "workspace:*",
"@ainyc/canonry-provider-openai": "workspace:*",
"@ainyc/canonry-provider-web-search": "workspace:*",
"@types/better-sqlite3": "^7.6.13",
"@types/node-cron": "^3.0.11",
"tsup": "^8.5.1",
Expand Down
57 changes: 56 additions & 1 deletion packages/canonry/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { showSettings, setProvider, setGoogleAuth } from './commands/settings.js
import { setSchedule, showSchedule, enableSchedule, disableSchedule, removeSchedule } from './commands/schedule.js'
import { addNotification, listNotifications, removeNotification, testNotification, listEvents } from './commands/notify.js'
import { telemetryCommand } from './commands/telemetry.js'
import { triggerSweep, listSweeps, showSweep } from './commands/sweep.js'
import {
googleConnect, googleDisconnect, googleStatus, googleProperties,
googleSetProperty, googleSetSitemap, googleListSitemaps, googleSync, googlePerformance, googleInspect,
Expand Down Expand Up @@ -90,8 +91,13 @@ Usage:
canonry google coverage <project> Show index coverage summary
canonry google inspections <project> Show URL inspection history (--url <url>)
canonry google deindexed <project> Show pages that lost indexing
canonry sweep <project> Run indexing sweep (site: queries) for all keywords
canonry sweep <project> --keyword Run sweep for a specific keyword
canonry sweep show <id> Show sweep details and results
canonry sweeps <project> List past indexing sweeps
canonry settings Show active provider and quota settings
canonry settings provider <name> Update a provider config
canonry settings provider <name> Update a provider config (name: gemini, openai, claude, local, web-search)
canonry settings provider web-search --api-key <key> --backend serper|google-cse
canonry settings google Update Google OAuth credentials
canonry telemetry status Show telemetry status
canonry telemetry enable Enable anonymous telemetry
Expand Down Expand Up @@ -1091,6 +1097,55 @@ async function main() {
break
}

// ── canonry sweep / sweeps ────────────────────────────────────────────
case 'sweep': {
const subcommand = args[1]

if (subcommand === 'show') {
const id = args[2]
if (!id) {
console.error('Usage: canonry sweep show <id>')
process.exit(1)
}
await showSweep(id, format)
break
}

// `canonry sweep <project>` — trigger or default to list if no project
const sweepProject = subcommand
if (!sweepProject) {
console.error('Usage: canonry sweep <project> [--keyword "..."] [--wait] [--format json]')
process.exit(1)
}

const { values: sweepValues } = parseArgs({
args: args.slice(2),
options: {
keyword: { type: 'string' },
wait: { type: 'boolean', default: false },
format: { type: 'string' },
},
allowPositionals: true,
})

await triggerSweep(sweepProject, {
keyword: sweepValues.keyword,
wait: sweepValues.wait ?? false,
format: sweepValues.format === 'json' ? 'json' : format,
})
break
}

case 'sweeps': {
const sweepsProject = args[1]
if (!sweepsProject) {
console.error('Usage: canonry sweeps <project>')
process.exit(1)
}
await listSweeps(sweepsProject, format)
break
}

default:
console.error(`Unknown command: ${command}`)
console.log('Run "canonry --help" for usage.')
Expand Down
18 changes: 18 additions & 0 deletions packages/canonry/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -267,4 +267,22 @@ export class ApiClient {
async gscDiscoverSitemaps(project: string): Promise<object> {
return this.request<object>('POST', `/projects/${encodeURIComponent(project)}/google/gsc/discover-sitemaps`, {})
}

// ── Indexing sweeps ───────────────────────────────────────────────────────

async triggerSweep(project: string, body?: { keyword?: string }): Promise<object> {
return this.request<object>('POST', `/projects/${encodeURIComponent(project)}/sweeps`, body ?? {})
}

async listSweeps(project: string): Promise<object[]> {
return this.request<object[]>('GET', `/projects/${encodeURIComponent(project)}/sweeps`)
}

async getSweep(id: string): Promise<object> {
return this.request<object>('GET', `/sweeps/${encodeURIComponent(id)}`)
}

async setWebSearchProvider(opts: { apiKey: string; backend?: string; cx?: string }): Promise<object> {
return this.request<object>('PUT', '/settings/providers/web-search', opts)
}
}
8 changes: 5 additions & 3 deletions packages/canonry/src/commands/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ export async function serveCommand(): Promise<void> {
console.log(`\nCanonry server running at http://${host === '0.0.0.0' ? 'localhost' : host}:${port}`)
console.log('Press Ctrl+C to stop.\n')

const providerNames = Object.keys(config.providers ?? {}).filter(
k => config.providers?.[k as keyof typeof config.providers]?.apiKey || config.providers?.[k as keyof typeof config.providers]?.baseUrl,
)
const providerNames = Object.keys(config.providers ?? {}).filter(k => {
if (k === 'webSearch') return Boolean(config.providers?.webSearch?.apiKey)
const p = config.providers?.[k as Exclude<keyof NonNullable<typeof config.providers>, 'webSearch'>]
return p?.apiKey || p?.baseUrl
})
trackEvent('serve.started', {
providerCount: providerNames.length,
providers: providerNames,
Expand Down
Loading