diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000..0d88a7cb --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,116 @@ +name: API Benchmarks + +on: + push: + branches: [master] + workflow_dispatch: + +# Only allow one benchmark run at a time to avoid resource contention +concurrency: + group: benchmark + cancel-in-progress: false + +jobs: + benchmark: + runs-on: ubuntu-latest + timeout-minutes: 30 + + services: + postgres: + image: postgres:16 + env: + POSTGRES_DB: benchmark + POSTGRES_USER: benchmark + POSTGRES_PASSWORD: benchmark + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + redis: + image: redis:7 + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - uses: actions/checkout@v4 + + - uses: pnpm/action-setup@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Run database migrations + run: pnpm db:migrate + env: + DATABASE_URL: postgresql://benchmark:benchmark@localhost:5432/benchmark + + - name: Run benchmarks + run: pnpm benchmark + env: + DATABASE_URL: postgresql://benchmark:benchmark@localhost:5432/benchmark + REDIS_URL: redis://localhost:6379 + NODE_ENV: production + timeout-minutes: 20 + + - name: Upload results as artifact + uses: actions/upload-artifact@v4 + if: always() + with: + name: benchmark-results-${{ github.sha }} + path: benchmarks/results/*.json + retention-days: 90 + + - name: Publish to GitHub Pages + if: success() && github.ref == 'refs/heads/master' + run: | + # Configure git + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + # Get the result file + RESULT_FILE=$(ls benchmarks/results/*.json | head -1) + RESULT_BASENAME=$(basename "$RESULT_FILE") + + # Switch to gh-pages branch (create if needed) + git fetch origin gh-pages || true + git checkout gh-pages 2>/dev/null || git checkout --orphan gh-pages + + # Ensure results directory exists + mkdir -p results + + # Copy new result + git checkout "${{ github.sha }}" -- benchmarks/results/"$RESULT_BASENAME" 2>/dev/null || true + cp benchmarks/results/"$RESULT_BASENAME" results/ 2>/dev/null || cp "$RESULT_FILE" results/ + + # Copy dashboard + git checkout "${{ github.sha }}" -- benchmarks/dashboard.html 2>/dev/null || true + cp benchmarks/dashboard.html index.html 2>/dev/null || true + + # Update index.json manifest + node -e " + const fs = require('fs'); + const dir = 'results'; + const files = fs.readdirSync(dir).filter(f => f.endsWith('.json') && f !== 'index.json').sort(); + fs.writeFileSync(dir + '/index.json', JSON.stringify({ files }, null, 2)); + " + + # Commit and push + git add results/ index.html + git commit -m "Update benchmark results for ${{ github.sha }}" || echo "No changes to commit" + git push origin gh-pages || echo "Failed to push to gh-pages" + + # Return to original branch + git checkout "${{ github.sha }}" diff --git a/.gitignore b/.gitignore index 23977a29..e8d2e13c 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,6 @@ extension/*.zip # source maps *.js.map +# Benchmark results (committed to gh-pages branch, not main) +benchmarks/results/ + diff --git a/benchmarks/dashboard.html b/benchmarks/dashboard.html new file mode 100644 index 00000000..e0361695 --- /dev/null +++ b/benchmarks/dashboard.html @@ -0,0 +1,409 @@ + + + + + + Lion Reader - API Benchmarks + + + + +

Lion Reader API Benchmarks

+

Historical performance tracking across commits

+ +
+
+ + +
+
+ + +
+
+ +
Loading results...
+ +
+
+

Latency Over Time (p50 / p95 / p99)

+ +
+
+

Requests Per Second (QPS)

+ +
+
+

Server Peak RSS (KB)

+ +
+
+

Error Count

+ +
+
+ +

Latest Run Details

+ + + + + + + + + + + + + +
Scenariop50 (ms)p95 (ms)p99 (ms)QPSErrorsRequests
+ + + + diff --git a/benchmarks/lib/http.ts b/benchmarks/lib/http.ts new file mode 100644 index 00000000..028b6781 --- /dev/null +++ b/benchmarks/lib/http.ts @@ -0,0 +1,89 @@ +/** + * HTTP client for tRPC API calls during benchmarks. + * + * Handles authentication (login + session cookie) and provides typed + * helpers for calling tRPC queries and mutations. + */ + +import { BASE_URL } from "./server"; + +export interface BenchmarkClient { + sessionToken: string; + baseUrl: string; +} + +/** + * Log in as a user and return a client with the session token. + */ +export async function createClient(email: string, password: string): Promise { + const response = await fetch(`${BASE_URL}/api/trpc/auth.login`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ json: { email, password } }), + }); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`Login failed (${response.status}): ${body}`); + } + + const data = await response.json(); + const sessionToken = data.result?.data?.json?.sessionToken; + if (!sessionToken) { + throw new Error(`Login response missing sessionToken: ${JSON.stringify(data)}`); + } + + return { sessionToken, baseUrl: BASE_URL }; +} + +/** + * Execute a tRPC query (GET request). + */ +export async function trpcQuery( + client: BenchmarkClient, + procedure: string, + input?: Record +): Promise { + const url = new URL(`/api/trpc/${procedure}`, client.baseUrl); + if (input !== undefined) { + url.searchParams.set("input", JSON.stringify({ json: input })); + } + + const response = await fetch(url, { + headers: { + Authorization: `Bearer ${client.sessionToken}`, + }, + }); + + if (!response.ok) { + throw new Error(`${procedure} failed: ${response.status}`); + } + + const data = await response.json(); + return data.result?.data?.json; +} + +/** + * Execute a tRPC mutation (POST request). + */ +export async function trpcMutation( + client: BenchmarkClient, + procedure: string, + input?: Record +): Promise { + const response = await fetch(`${client.baseUrl}/api/trpc/${procedure}`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${client.sessionToken}`, + }, + body: JSON.stringify({ json: input ?? {} }), + }); + + if (!response.ok) { + throw new Error(`${procedure} failed: ${response.status}`); + } + + const data = await response.json(); + return data.result?.data?.json; +} diff --git a/benchmarks/lib/load.ts b/benchmarks/lib/load.ts new file mode 100644 index 00000000..d3c4c81d --- /dev/null +++ b/benchmarks/lib/load.ts @@ -0,0 +1,56 @@ +/** + * Concurrent load runner with latency collection. + * + * Runs a workload function at a specified concurrency level for a given + * duration, collecting per-request latency measurements. + */ + +export interface LoadConfig { + /** Number of concurrent workers */ + concurrency: number; + /** Duration in milliseconds */ + durationMs: number; + /** Async function to execute for each request */ + fn: () => Promise; +} + +export interface LoadResult { + latencies: number[]; + errors: number; + durationMs: number; +} + +/** + * Run a load test with the given configuration. + * + * Spawns `concurrency` workers that continuously call `fn()` until + * `durationMs` has elapsed. Returns all latency measurements. + */ +export async function runLoad(config: LoadConfig): Promise { + const { concurrency, durationMs, fn } = config; + const latencies: number[] = []; + let errors = 0; + const startTime = Date.now(); + const endTime = startTime + durationMs; + + const workers = Array.from({ length: concurrency }, async () => { + while (Date.now() < endTime) { + const reqStart = performance.now(); + try { + await fn(); + const elapsed = Math.round(performance.now() - reqStart); + latencies.push(elapsed); + } catch { + errors++; + } + } + }); + + await Promise.all(workers); + + return { + latencies, + errors, + durationMs: Date.now() - startTime, + }; +} diff --git a/benchmarks/lib/memory.ts b/benchmarks/lib/memory.ts new file mode 100644 index 00000000..506c1887 --- /dev/null +++ b/benchmarks/lib/memory.ts @@ -0,0 +1,51 @@ +/** + * Memory monitoring via /proc/{pid}/status polling. + * + * Polls VmRSS from /proc/{pid}/status at regular intervals to build + * a time-series of memory usage. Linux only. + */ + +import { readFile } from "node:fs/promises"; +import type { MemorySample } from "./results"; + +export interface MemoryMonitor { + stop: () => MemorySample[]; +} + +/** + * Start polling memory usage for a given PID. + * Returns a handle with a stop() method that returns collected samples. + */ +export function startMemoryMonitor(pid: number, intervalMs = 1000): MemoryMonitor { + const samples: MemorySample[] = []; + const startTime = Date.now(); + let running = true; + + const poll = async () => { + while (running) { + try { + const status = await readFile(`/proc/${pid}/status`, "utf-8"); + const match = status.match(/VmRSS:\s+(\d+)\s+kB/); + if (match) { + samples.push({ + timestampMs: Date.now() - startTime, + rssKb: parseInt(match[1], 10), + }); + } + } catch { + // Process may have exited + } + await new Promise((resolve) => setTimeout(resolve, intervalMs)); + } + }; + + // Start polling in background (fire and forget) + poll(); + + return { + stop: () => { + running = false; + return samples; + }, + }; +} diff --git a/benchmarks/lib/results.ts b/benchmarks/lib/results.ts new file mode 100644 index 00000000..e628f846 --- /dev/null +++ b/benchmarks/lib/results.ts @@ -0,0 +1,44 @@ +/** + * Benchmark results I/O - read/write JSON result files. + */ + +import { writeFile, readFile, mkdir } from "node:fs/promises"; +import { join } from "node:path"; +import type { LatencyStats } from "./stats"; + +export interface ScenarioResult { + name: string; + latency: LatencyStats; + qps: number; + errors: number; + durationMs: number; +} + +export interface MemorySample { + timestampMs: number; + rssKb: number; +} + +export interface BenchmarkResult { + commitSha: string; + commitDate: string; + runDate: string; + scenarios: ScenarioResult[]; + peakRssKb: number | null; + memoryTimeSeries: MemorySample[]; +} + +const RESULTS_DIR = join(process.cwd(), "benchmarks", "results"); + +export async function writeResults(result: BenchmarkResult): Promise { + await mkdir(RESULTS_DIR, { recursive: true }); + const filename = `${result.commitSha}-${Date.now()}.json`; + const filepath = join(RESULTS_DIR, filename); + await writeFile(filepath, JSON.stringify(result, null, 2)); + return filepath; +} + +export async function readResults(filepath: string): Promise { + const content = await readFile(filepath, "utf-8"); + return JSON.parse(content) as BenchmarkResult; +} diff --git a/benchmarks/lib/scenario.ts b/benchmarks/lib/scenario.ts new file mode 100644 index 00000000..ed1461b7 --- /dev/null +++ b/benchmarks/lib/scenario.ts @@ -0,0 +1,21 @@ +/** + * Scenario type definition shared by all benchmark scenarios. + */ + +import type { BenchmarkClient } from "./http"; + +export interface ScenarioConfig { + /** Display name for the scenario */ + name: string; + /** Number of concurrent workers */ + concurrency: number; + /** Duration in milliseconds */ + durationMs: number; + /** + * Set up any per-scenario state. Called once before the load test. + * Can return data used by the run function. + */ + setup?: (client: BenchmarkClient) => Promise; + /** The function to execute for each request */ + run: (client: BenchmarkClient) => Promise; +} diff --git a/benchmarks/lib/server.ts b/benchmarks/lib/server.ts new file mode 100644 index 00000000..648d58b2 --- /dev/null +++ b/benchmarks/lib/server.ts @@ -0,0 +1,116 @@ +/** + * Start/stop the Next.js server as a child process for benchmarking. + * + * Spawns `tsx scripts/server.ts` wrapped with `/usr/bin/time -v` to capture + * peak RSS. Polls a health endpoint until the server is ready. + */ + +import { spawn, type ChildProcess } from "node:child_process"; +import { setTimeout as delay } from "node:timers/promises"; + +export const BENCHMARK_PORT = 3456; +export const BASE_URL = `http://localhost:${BENCHMARK_PORT}`; + +interface ServerHandle { + process: ChildProcess; + pid: number; + /** Resolves to peak RSS in KB from /usr/bin/time -v, or null if unavailable. */ + peakRssPromise: Promise; +} + +export async function startServer(): Promise { + let peakRssResolve: (val: number | null) => void; + const peakRssPromise = new Promise((resolve) => { + peakRssResolve = resolve; + }); + + let stderr = ""; + + // Try to use /usr/bin/time -v for peak RSS measurement + const hasGnuTime = await checkGnuTime(); + + const args = hasGnuTime ? ["-v", "tsx", "scripts/server.ts"] : []; + const command = hasGnuTime ? "/usr/bin/time" : "tsx"; + const commandArgs = hasGnuTime ? args : ["scripts/server.ts"]; + + const child = spawn(command, commandArgs, { + env: { + ...process.env, + PORT: String(BENCHMARK_PORT), + NODE_ENV: "production", + }, + stdio: ["ignore", "pipe", "pipe"], + cwd: process.cwd(), + }); + + child.stderr?.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + child.on("close", () => { + // Parse peak RSS from GNU time output + const match = stderr.match(/Maximum resident set size \(kbytes\): (\d+)/); + peakRssResolve!(match ? parseInt(match[1], 10) : null); + }); + + if (!child.pid) { + throw new Error("Failed to spawn server process"); + } + + // Wait for the server to become ready + await waitForServer(); + + return { + process: child, + pid: child.pid, + peakRssPromise, + }; +} + +export async function stopServer(handle: ServerHandle): Promise { + handle.process.kill("SIGTERM"); + + // Wait for graceful shutdown (up to 10s) + await new Promise((resolve) => { + const timeout = setTimeout(() => { + handle.process.kill("SIGKILL"); + resolve(); + }, 10_000); + + handle.process.on("close", () => { + clearTimeout(timeout); + resolve(); + }); + }); +} + +async function waitForServer(maxAttempts = 120, intervalMs = 1000): Promise { + for (let i = 0; i < maxAttempts; i++) { + try { + const response = await fetch(`${BASE_URL}/api/trpc/auth.providers`, { + signal: AbortSignal.timeout(2000), + }); + if (response.ok) { + return; + } + } catch { + // Server not ready yet + } + await delay(intervalMs); + } + throw new Error(`Server failed to start after ${maxAttempts * intervalMs}ms`); +} + +async function checkGnuTime(): Promise { + try { + const proc = spawn("/usr/bin/time", ["--version"], { + stdio: ["ignore", "pipe", "pipe"], + }); + return new Promise((resolve) => { + proc.on("close", (code) => resolve(code === 0)); + proc.on("error", () => resolve(false)); + }); + } catch { + return false; + } +} diff --git a/benchmarks/lib/stats.ts b/benchmarks/lib/stats.ts new file mode 100644 index 00000000..ac95bdf3 --- /dev/null +++ b/benchmarks/lib/stats.ts @@ -0,0 +1,37 @@ +/** + * Statistical functions for benchmark latency analysis. + */ + +export interface LatencyStats { + min: number; + max: number; + mean: number; + p50: number; + p95: number; + p99: number; + count: number; +} + +export function computeStats(latencies: number[]): LatencyStats { + if (latencies.length === 0) { + return { min: 0, max: 0, mean: 0, p50: 0, p95: 0, p99: 0, count: 0 }; + } + + const sorted = [...latencies].sort((a, b) => a - b); + const sum = sorted.reduce((acc, val) => acc + val, 0); + + return { + min: sorted[0], + max: sorted[sorted.length - 1], + mean: Math.round(sum / sorted.length), + p50: percentile(sorted, 50), + p95: percentile(sorted, 95), + p99: percentile(sorted, 99), + count: sorted.length, + }; +} + +function percentile(sorted: number[], pct: number): number { + const index = Math.ceil((pct / 100) * sorted.length) - 1; + return sorted[Math.max(0, index)]; +} diff --git a/benchmarks/run.ts b/benchmarks/run.ts new file mode 100644 index 00000000..418d689e --- /dev/null +++ b/benchmarks/run.ts @@ -0,0 +1,216 @@ +/** + * Main benchmark orchestrator. + * + * 1. Seeds the database with realistic data + * 2. Builds the Next.js app + * 3. Starts the HTTP server + * 4. Runs all benchmark scenarios under concurrent load + * 5. Collects metrics and writes results to JSON + * + * Usage: + * pnpm benchmark # Full run (seed + build + bench) + * pnpm benchmark:run # Skip seeding (assumes data exists) + */ + +import { execSync } from "node:child_process"; +import { seed, BENCHMARK_USER_EMAIL, BENCHMARK_USER_PASSWORD } from "./seed"; +import { startServer, stopServer } from "./lib/server"; +import { createClient, trpcQuery } from "./lib/http"; +import { runLoad } from "./lib/load"; +import { computeStats } from "./lib/stats"; +import { startMemoryMonitor } from "./lib/memory"; +import { writeResults, type ScenarioResult, type BenchmarkResult } from "./lib/results"; +import type { ScenarioConfig } from "./lib/scenario"; + +// Import scenarios +import { + entriesListNoFilter, + entriesListUnreadOnly, + createEntriesListBySubscription, + createEntriesListByTag, +} from "./scenarios/entries-list"; +import { markReadBatch1, markReadBatch10 } from "./scenarios/entries-mark-read"; +import { + entriesCountNoFilter, + entriesCountUnread, + createEntriesCountBySubscription, +} from "./scenarios/entries-count"; +import { subscriptionsList } from "./scenarios/subscriptions-list"; +import { syncEvents } from "./scenarios/sync-events"; +import { tagsList } from "./scenarios/tags-list"; +import { entriesGet } from "./scenarios/entries-get"; +import { mixedWorkload } from "./scenarios/mixed-workload"; + +// ============================================================================ +// Main +// ============================================================================ + +async function main() { + const skipSeed = process.argv.includes("--skip-seed"); + const skipBuild = process.argv.includes("--skip-build"); + + // Step 1: Seed database + if (!skipSeed) { + console.log("\n=== Seeding database ===\n"); + await seed(); + } else { + console.log("\n=== Skipping seed (--skip-seed) ===\n"); + } + + // Step 2: Build the app + if (!skipBuild) { + console.log("\n=== Building Next.js app ===\n"); + execSync("pnpm build", { stdio: "inherit", cwd: process.cwd() }); + } else { + console.log("\n=== Skipping build (--skip-build) ===\n"); + } + + // Step 3: Start server + console.log("\n=== Starting server ===\n"); + const server = await startServer(); + console.log(`Server started (PID: ${server.pid})`); + + // Start memory monitoring + const memMonitor = startMemoryMonitor(server.pid, 2000); + + try { + // Step 4: Authenticate + console.log("\n=== Authenticating ===\n"); + const client = await createClient(BENCHMARK_USER_EMAIL, BENCHMARK_USER_PASSWORD); + console.log("Authenticated successfully"); + + // Step 5: Discover dynamic IDs for parameterized scenarios + const subResult = (await trpcQuery(client, "subscriptions.list", {})) as + | { items: Array<{ id: string }> } + | undefined; + const firstSubId = subResult?.items?.[0]?.id; + + const tagsResult = (await trpcQuery(client, "tags.list", {})) as + | Array<{ id: string }> + | undefined; + const firstTagId = tagsResult?.[0]?.id; + + // Build scenario list + const scenarios: ScenarioConfig[] = [ + // Tier 1 (10 concurrent, 30s) + entriesListNoFilter, + entriesListUnreadOnly, + ...(firstSubId ? [createEntriesListBySubscription(firstSubId)] : []), + ...(firstTagId ? [createEntriesListByTag(firstTagId)] : []), + markReadBatch1, + markReadBatch10, + entriesCountNoFilter, + entriesCountUnread, + ...(firstSubId ? [createEntriesCountBySubscription(firstSubId)] : []), + subscriptionsList, + syncEvents, + + // Tier 2 (5 concurrent, 15s) + entriesGet, + tagsList, + + // Mixed (10 concurrent, 60s) + mixedWorkload, + ]; + + // Step 6: Run scenarios + const results: ScenarioResult[] = []; + + for (const scenario of scenarios) { + console.log( + `\n--- ${scenario.name} (${scenario.concurrency} concurrent, ${scenario.durationMs / 1000}s) ---` + ); + + // Run setup if defined + if (scenario.setup) { + await scenario.setup(client); + } + + const loadResult = await runLoad({ + concurrency: scenario.concurrency, + durationMs: scenario.durationMs, + fn: () => scenario.run(client), + }); + + const stats = computeStats(loadResult.latencies); + const qps = loadResult.durationMs > 0 ? (stats.count / loadResult.durationMs) * 1000 : 0; + + results.push({ + name: scenario.name, + latency: stats, + qps: Math.round(qps * 100) / 100, + errors: loadResult.errors, + durationMs: loadResult.durationMs, + }); + + console.log( + ` p50=${stats.p50}ms p95=${stats.p95}ms p99=${stats.p99}ms ` + + `qps=${qps.toFixed(1)} errors=${loadResult.errors} count=${stats.count}` + ); + } + + // Step 7: Collect results + const memorySamples = memMonitor.stop(); + + // Stop server and get peak RSS + await stopServer(server); + const peakRssKb = await server.peakRssPromise; + + // Get git info + const commitSha = getGitSha(); + const commitDate = getGitDate(); + + const benchmarkResult: BenchmarkResult = { + commitSha, + commitDate, + runDate: new Date().toISOString(), + scenarios: results, + peakRssKb, + memoryTimeSeries: memorySamples, + }; + + // Step 8: Write results + const filepath = await writeResults(benchmarkResult); + console.log(`\n=== Results written to ${filepath} ===`); + + // Print summary + console.log("\n=== Summary ===\n"); + console.log(`Commit: ${commitSha}`); + console.log(`Peak RSS: ${peakRssKb ? `${peakRssKb} KB` : "N/A"}`); + console.log(`Memory samples: ${memorySamples.length}`); + console.log("\nScenario Results:"); + for (const r of results) { + console.log( + ` ${r.name.padEnd(40)} p50=${String(r.latency.p50).padStart(5)}ms ` + + `p95=${String(r.latency.p95).padStart(5)}ms ` + + `qps=${String(r.qps).padStart(7)} ` + + `errors=${r.errors}` + ); + } + } catch (error) { + memMonitor.stop(); + await stopServer(server); + throw error; + } +} + +function getGitSha(): string { + try { + return execSync("git rev-parse --short HEAD", { encoding: "utf-8" }).trim(); + } catch { + return "unknown"; + } +} + +function getGitDate(): string { + try { + return execSync("git log -1 --format=%cI", { encoding: "utf-8" }).trim(); + } catch { + return new Date().toISOString(); + } +} + +main().catch((err) => { + console.error("Benchmark failed:", err); + process.exit(1); +}); diff --git a/benchmarks/scenarios/entries-count.ts b/benchmarks/scenarios/entries-count.ts new file mode 100644 index 00000000..8743f94b --- /dev/null +++ b/benchmarks/scenarios/entries-count.ts @@ -0,0 +1,35 @@ +/** + * Benchmark scenario: entries.count with various filters. + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, type BenchmarkClient } from "../lib/http"; + +export const entriesCountNoFilter: ScenarioConfig = { + name: "entries.count (no filter)", + concurrency: 10, + durationMs: 30_000, + run: async (client: BenchmarkClient) => { + await trpcQuery(client, "entries.count", {}); + }, +}; + +export const entriesCountUnread: ScenarioConfig = { + name: "entries.count (unread only)", + concurrency: 10, + durationMs: 30_000, + run: async (client: BenchmarkClient) => { + await trpcQuery(client, "entries.count", { unreadOnly: true }); + }, +}; + +export function createEntriesCountBySubscription(subscriptionId: string): ScenarioConfig { + return { + name: "entries.count (by subscription)", + concurrency: 10, + durationMs: 30_000, + run: async (client: BenchmarkClient) => { + await trpcQuery(client, "entries.count", { subscriptionId }); + }, + }; +} diff --git a/benchmarks/scenarios/entries-get.ts b/benchmarks/scenarios/entries-get.ts new file mode 100644 index 00000000..468b5dd4 --- /dev/null +++ b/benchmarks/scenarios/entries-get.ts @@ -0,0 +1,27 @@ +/** + * Benchmark scenario: entries.get (Tier 2 - single entry fetch). + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, type BenchmarkClient } from "../lib/http"; + +/** Cache of entry IDs to avoid fetching them every request */ +let cachedEntryIds: string[] = []; + +export const entriesGet: ScenarioConfig = { + name: "entries.get", + concurrency: 5, + durationMs: 15_000, + setup: async (client: BenchmarkClient) => { + // Pre-fetch a pool of entry IDs to use during the load test + const result = (await trpcQuery(client, "entries.list", { + limit: 50, + })) as { items: Array<{ id: string }> } | undefined; + cachedEntryIds = result?.items?.map((e) => e.id) ?? []; + }, + run: async (client: BenchmarkClient) => { + if (cachedEntryIds.length === 0) return; + const id = cachedEntryIds[Math.floor(Math.random() * cachedEntryIds.length)]; + await trpcQuery(client, "entries.get", { id }); + }, +}; diff --git a/benchmarks/scenarios/entries-list.ts b/benchmarks/scenarios/entries-list.ts new file mode 100644 index 00000000..a9f4d2a8 --- /dev/null +++ b/benchmarks/scenarios/entries-list.ts @@ -0,0 +1,38 @@ +/** + * Benchmark scenario: entries.list with varying filters. + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, type BenchmarkClient } from "../lib/http"; + +function entriesListScenario(name: string, input: Record): ScenarioConfig { + return { + name, + concurrency: 10, + durationMs: 30_000, + run: async (client: BenchmarkClient) => { + await trpcQuery(client, "entries.list", input); + }, + }; +} + +export const entriesListNoFilter = entriesListScenario("entries.list (no filter)", {}); + +export const entriesListUnreadOnly = entriesListScenario("entries.list (unread only)", { + unreadOnly: true, +}); + +export const entriesListByTag = entriesListScenario( + "entries.list (by tag)", + {} // tagId set during setup +); + +export function createEntriesListBySubscription(subscriptionId: string): ScenarioConfig { + return entriesListScenario("entries.list (by subscription)", { + subscriptionId, + }); +} + +export function createEntriesListByTag(tagId: string): ScenarioConfig { + return entriesListScenario("entries.list (by tag)", { tagId }); +} diff --git a/benchmarks/scenarios/entries-mark-read.ts b/benchmarks/scenarios/entries-mark-read.ts new file mode 100644 index 00000000..711f7dad --- /dev/null +++ b/benchmarks/scenarios/entries-mark-read.ts @@ -0,0 +1,32 @@ +/** + * Benchmark scenario: entries.markRead with batches of 1 and 10. + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, trpcMutation, type BenchmarkClient } from "../lib/http"; + +function createMarkReadScenario(batchSize: number): ScenarioConfig { + return { + name: `entries.markRead (batch=${batchSize})`, + concurrency: 10, + durationMs: 30_000, + run: async (client: BenchmarkClient) => { + // Fetch some entries to get real IDs + const result = (await trpcQuery(client, "entries.list", { + limit: batchSize, + })) as { items: Array<{ id: string }> } | undefined; + + const entries = result?.items?.map((e) => ({ id: e.id })) ?? []; + if (entries.length === 0) return; + + await trpcMutation(client, "entries.markRead", { + entries, + read: true, + fromList: true, + }); + }, + }; +} + +export const markReadBatch1 = createMarkReadScenario(1); +export const markReadBatch10 = createMarkReadScenario(10); diff --git a/benchmarks/scenarios/mixed-workload.ts b/benchmarks/scenarios/mixed-workload.ts new file mode 100644 index 00000000..f83aac57 --- /dev/null +++ b/benchmarks/scenarios/mixed-workload.ts @@ -0,0 +1,97 @@ +/** + * Benchmark scenario: mixed workload simulating real usage. + * + * Weights: 40% entries.list, 20% entries.count, 15% markRead, + * 10% subscriptions.list, 5% tags.list, 5% sync.events, 5% entries.get + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, trpcMutation, type BenchmarkClient } from "../lib/http"; + +/** Cache of entry IDs for markRead and entries.get */ +let cachedEntryIds: string[] = []; + +interface WeightedAction { + weight: number; + fn: (client: BenchmarkClient) => Promise; +} + +const actions: WeightedAction[] = [ + { + weight: 40, + fn: async (client) => { + await trpcQuery(client, "entries.list", {}); + }, + }, + { + weight: 20, + fn: async (client) => { + await trpcQuery(client, "entries.count", {}); + }, + }, + { + weight: 15, + fn: async (client) => { + if (cachedEntryIds.length === 0) return; + const id = cachedEntryIds[Math.floor(Math.random() * cachedEntryIds.length)]; + await trpcMutation(client, "entries.markRead", { + entries: [{ id }], + read: true, + fromList: true, + }); + }, + }, + { + weight: 10, + fn: async (client) => { + await trpcQuery(client, "subscriptions.list", {}); + }, + }, + { + weight: 5, + fn: async (client) => { + await trpcQuery(client, "tags.list", {}); + }, + }, + { + weight: 5, + fn: async (client) => { + const fiveMinAgo = new Date(Date.now() - 5 * 60 * 1000).toISOString(); + await trpcQuery(client, "sync.events", { cursor: fiveMinAgo }); + }, + }, + { + weight: 5, + fn: async (client) => { + if (cachedEntryIds.length === 0) return; + const id = cachedEntryIds[Math.floor(Math.random() * cachedEntryIds.length)]; + await trpcQuery(client, "entries.get", { id }); + }, + }, +]; + +function pickAction(): WeightedAction { + const total = actions.reduce((sum, a) => sum + a.weight, 0); + let rand = Math.random() * total; + for (const action of actions) { + rand -= action.weight; + if (rand <= 0) return action; + } + return actions[0]; +} + +export const mixedWorkload: ScenarioConfig = { + name: "mixed workload", + concurrency: 10, + durationMs: 60_000, + setup: async (client: BenchmarkClient) => { + const result = (await trpcQuery(client, "entries.list", { + limit: 50, + })) as { items: Array<{ id: string }> } | undefined; + cachedEntryIds = result?.items?.map((e) => e.id) ?? []; + }, + run: async (client: BenchmarkClient) => { + const action = pickAction(); + await action.fn(client); + }, +}; diff --git a/benchmarks/scenarios/subscriptions-list.ts b/benchmarks/scenarios/subscriptions-list.ts new file mode 100644 index 00000000..1240a7c4 --- /dev/null +++ b/benchmarks/scenarios/subscriptions-list.ts @@ -0,0 +1,15 @@ +/** + * Benchmark scenario: subscriptions.list + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, type BenchmarkClient } from "../lib/http"; + +export const subscriptionsList: ScenarioConfig = { + name: "subscriptions.list", + concurrency: 10, + durationMs: 30_000, + run: async (client: BenchmarkClient) => { + await trpcQuery(client, "subscriptions.list", {}); + }, +}; diff --git a/benchmarks/scenarios/sync-events.ts b/benchmarks/scenarios/sync-events.ts new file mode 100644 index 00000000..0212777f --- /dev/null +++ b/benchmarks/scenarios/sync-events.ts @@ -0,0 +1,16 @@ +/** + * Benchmark scenario: sync.events with cursor set 5 minutes in past. + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, type BenchmarkClient } from "../lib/http"; + +export const syncEvents: ScenarioConfig = { + name: "sync.events", + concurrency: 10, + durationMs: 30_000, + run: async (client: BenchmarkClient) => { + const fiveMinAgo = new Date(Date.now() - 5 * 60 * 1000).toISOString(); + await trpcQuery(client, "sync.events", { cursor: fiveMinAgo }); + }, +}; diff --git a/benchmarks/scenarios/tags-list.ts b/benchmarks/scenarios/tags-list.ts new file mode 100644 index 00000000..994b0bb0 --- /dev/null +++ b/benchmarks/scenarios/tags-list.ts @@ -0,0 +1,15 @@ +/** + * Benchmark scenario: tags.list (Tier 2 - lower concurrency). + */ + +import type { ScenarioConfig } from "../lib/scenario"; +import { trpcQuery, type BenchmarkClient } from "../lib/http"; + +export const tagsList: ScenarioConfig = { + name: "tags.list", + concurrency: 5, + durationMs: 15_000, + run: async (client: BenchmarkClient) => { + await trpcQuery(client, "tags.list", {}); + }, +}; diff --git a/benchmarks/seed.ts b/benchmarks/seed.ts new file mode 100644 index 00000000..aa5f64fb --- /dev/null +++ b/benchmarks/seed.ts @@ -0,0 +1,312 @@ +/** + * Database seeder for benchmarks. + * + * Creates a realistic dataset: + * - 3 users (1 benchmark user with real argon2 password, 2 others) + * - 500 feeds (shared pool) + * - 200 entries/feed = 100,000 total entries + * - Each user subscribed to 300-400 feeds (~60-80K entries per user) + * - 50% entries read, 5% starred + * - 5 tags per user, feeds distributed across tags + * - Realistic content_cleaned (200-500 chars) + * + * Pattern follows tests/integration/entries-perf.test.ts bulk insert approach. + */ + +import { sql } from "drizzle-orm"; +import * as argon2 from "argon2"; +import { db } from "../src/server/db"; +import { + users, + subscriptions, + subscriptionFeeds, + tags, + subscriptionTags, +} from "../src/server/db/schema"; +import { generateUuidv7 } from "../src/lib/uuidv7"; + +// ============================================================================ +// Configuration +// ============================================================================ + +const NUM_FEEDS = 500; +const ENTRIES_PER_FEED = 200; +const NUM_TAGS = 5; + +/** Benchmark user gets 400 feeds, others get 300 each */ +const BENCHMARK_USER_FEEDS = 400; +const OTHER_USER_FEEDS = 300; + +export const BENCHMARK_USER_EMAIL = "benchmark@test.com"; +export const BENCHMARK_USER_PASSWORD = "benchmark-password-123"; + +function pgUuidArray(ids: string[]) { + return `{${ids.join(",")}}`; +} + +// ============================================================================ +// Seeder +// ============================================================================ + +export async function seed(): Promise<{ userId: string }> { + const startTime = performance.now(); + console.log("Starting benchmark data seeding..."); + console.log( + ` ${NUM_FEEDS} feeds x ${ENTRIES_PER_FEED} entries = ${(NUM_FEEDS * ENTRIES_PER_FEED).toLocaleString()} total entries` + ); + + // Clean existing benchmark data + await db.execute(sql` + DELETE FROM users WHERE email LIKE 'benchmark%@test.com' + `); + + // Step 1: Create users + const passwordHash = await argon2.hash(BENCHMARK_USER_PASSWORD); + const userIds: string[] = []; + const userEmails = [BENCHMARK_USER_EMAIL, "benchmark-2@test.com", "benchmark-3@test.com"]; + + for (let i = 0; i < 3; i++) { + const id = generateUuidv7(); + userIds.push(id); + } + + await db.insert(users).values( + userIds.map((id, i) => ({ + id, + email: userEmails[i], + passwordHash: i === 0 ? passwordHash : "not-a-real-hash", + tosAgreedAt: new Date(), + privacyPolicyAgreedAt: new Date(), + notEuAgreedAt: new Date(), + createdAt: new Date(), + updatedAt: new Date(), + })) + ); + + const benchmarkUserId = userIds[0]; + console.log(` Users created: 3 (${elapsed(startTime)})`); + + // Step 2: Create tags per user + const tagIdsByUser = new Map(); + const allTagValues: { + id: string; + userId: string; + name: string; + createdAt: Date; + updatedAt: Date; + }[] = []; + + for (const userId of userIds) { + const userTagIds: string[] = []; + for (let t = 0; t < NUM_TAGS; t++) { + const tagId = generateUuidv7(); + userTagIds.push(tagId); + allTagValues.push({ + id: tagId, + userId, + name: `Tag ${t}`, + createdAt: new Date(), + updatedAt: new Date(), + }); + } + tagIdsByUser.set(userId, userTagIds); + } + + await db.insert(tags).values(allTagValues); + console.log(` Tags created: ${allTagValues.length} (${elapsed(startTime)})`); + + // Step 3: Create feeds + const feedsStart = performance.now(); + await db.execute(sql` + INSERT INTO feeds (id, type, url, title, last_fetched_at, last_entries_updated_at, created_at, updated_at) + SELECT + gen_random_uuid(), + 'web', + 'https://bench-' || i || '.example.com/feed.xml', + 'Benchmark Feed ' || i, + now(), + now(), + now(), + now() + FROM generate_series(1, ${NUM_FEEDS}) AS i + `); + console.log(` Feeds created: ${NUM_FEEDS} (${elapsed(feedsStart)})`); + + // Fetch feed IDs + const feedRows = await db.execute(sql` + SELECT id FROM feeds WHERE url LIKE 'https://bench-%' ORDER BY url + `); + const allFeedIds = feedRows.rows.map((r: Record) => r.id as string); + + // Step 4: Create subscriptions + const subsStart = performance.now(); + const feedCounts = [BENCHMARK_USER_FEEDS, OTHER_USER_FEEDS, OTHER_USER_FEEDS]; + + for (let u = 0; u < 3; u++) { + const userId = userIds[u]; + const count = feedCounts[u]; + // Offset feeds so there's some overlap between users + const offset = u === 0 ? 0 : (u - 1) * 100; + const userFeedIds = allFeedIds.slice(offset, offset + count); + + const CHUNK = 500; + for (let c = 0; c < userFeedIds.length; c += CHUNK) { + const chunk = userFeedIds.slice(c, c + CHUNK); + const subValues = chunk.map((feedId) => ({ + id: generateUuidv7(), + userId, + feedId, + subscribedAt: new Date(Date.now() - 365 * 24 * 60 * 60 * 1000), + createdAt: new Date(), + updatedAt: new Date(), + })); + await db.insert(subscriptions).values(subValues); + + const sfValues = subValues.map((s) => ({ + subscriptionId: s.id, + feedId: s.feedId, + userId, + })); + await db.insert(subscriptionFeeds).values(sfValues); + } + } + console.log(` Subscriptions created (${elapsed(subsStart)})`); + + // Step 5: Assign tags + const tagsStart = performance.now(); + for (let u = 0; u < 3; u++) { + const userId = userIds[u]; + const userTags = tagIdsByUser.get(userId)!; + + const subRows = await db.execute(sql` + SELECT id FROM subscriptions + WHERE user_id = ${userId} AND unsubscribed_at IS NULL + ORDER BY id + `); + const userSubIds = subRows.rows.map((r: Record) => r.id as string); + + // Leave ~10% uncategorized + const taggedCount = Math.floor(userSubIds.length * 0.9); + const stValues: { subscriptionId: string; tagId: string }[] = []; + for (let i = 0; i < taggedCount; i++) { + stValues.push({ + subscriptionId: userSubIds[i], + tagId: userTags[i % NUM_TAGS], + }); + } + + const CHUNK = 1000; + for (let c = 0; c < stValues.length; c += CHUNK) { + await db.insert(subscriptionTags).values(stValues.slice(c, c + CHUNK)); + } + } + console.log(` Tags assigned (${elapsed(tagsStart)})`); + + // Step 6: Create entries + const entriesStart = performance.now(); + const FEED_BATCH = 100; + for (let b = 0; b < NUM_FEEDS; b += FEED_BATCH) { + const batchEnd = Math.min(b + FEED_BATCH, NUM_FEEDS); + const batchFeedIds = allFeedIds.slice(b, batchEnd); + + await db.execute(sql` + INSERT INTO entries (id, feed_id, type, guid, title, content_cleaned, content_hash, + fetched_at, published_at, last_seen_at, created_at, updated_at) + SELECT + gen_random_uuid(), + f.feed_id, + 'web', + f.feed_id || '-' || e.i, + 'Entry ' || e.i || ' about ' || CASE (e.i % 5) + WHEN 0 THEN 'technology and innovation' + WHEN 1 THEN 'science and research' + WHEN 2 THEN 'business strategy' + WHEN 3 THEN 'product development' + ELSE 'industry analysis' + END, + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ' || + 'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. ' || + CASE (e.i % 3) + WHEN 0 THEN 'Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. ' + WHEN 1 THEN 'Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ' + ELSE 'Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam varius, turpis et commodo pharetra. ' + END, + f.feed_id || '-' || e.i, + now() - ((${ENTRIES_PER_FEED} - e.i) || ' minutes')::interval, + now() - ((${ENTRIES_PER_FEED} - e.i) || ' minutes')::interval, + now(), + now(), + now() + FROM unnest(${pgUuidArray(batchFeedIds)}::uuid[]) AS f(feed_id) + CROSS JOIN generate_series(1, ${ENTRIES_PER_FEED}) AS e(i) + `); + + const progress = batchEnd * ENTRIES_PER_FEED; + const total = NUM_FEEDS * ENTRIES_PER_FEED; + console.log( + ` Entries: ${progress.toLocaleString()} / ${total.toLocaleString()} (${elapsed(entriesStart)})` + ); + } + + // Step 7: Create user_entries + const ueStart = performance.now(); + for (let u = 0; u < 3; u++) { + const userId = userIds[u]; + const count = feedCounts[u]; + const offset = u === 0 ? 0 : (u - 1) * 100; + const userFeedIds = allFeedIds.slice(offset, offset + count); + + const FEED_BATCH_UE = 50; + for (let b = 0; b < userFeedIds.length; b += FEED_BATCH_UE) { + const batchFeedIds = userFeedIds.slice(b, b + FEED_BATCH_UE); + + await db.execute(sql` + INSERT INTO user_entries (user_id, entry_id, read, starred, read_changed_at, starred_changed_at, updated_at) + SELECT + ${userId}::uuid, + e.id, + (row_number() OVER (PARTITION BY e.feed_id ORDER BY e.id)) <= ${ENTRIES_PER_FEED / 2}, + (row_number() OVER (PARTITION BY e.feed_id ORDER BY e.id)) % 20 = 0, + e.published_at, + e.published_at, + now() + FROM entries e + WHERE e.feed_id = ANY(${pgUuidArray(batchFeedIds)}::uuid[]) + `); + } + console.log(` user_entries: user ${u + 1}/3 (${elapsed(ueStart)})`); + } + + // Step 8: ANALYZE + const analyzeStart = performance.now(); + await db.execute(sql`ANALYZE entries`); + await db.execute(sql`ANALYZE user_entries`); + await db.execute(sql`ANALYZE feeds`); + await db.execute(sql`ANALYZE subscriptions`); + await db.execute(sql`ANALYZE subscription_feeds`); + await db.execute(sql`ANALYZE subscription_tags`); + await db.execute(sql`ANALYZE tags`); + console.log(` ANALYZE complete (${elapsed(analyzeStart)})`); + + const totalTime = ((performance.now() - startTime) / 1000).toFixed(1); + console.log(`\nSeeding complete in ${totalTime}s`); + + return { userId: benchmarkUserId }; +} + +function elapsed(start: number): string { + return `${((performance.now() - start) / 1000).toFixed(1)}s`; +} + +// Run directly +if (process.argv[1]?.endsWith("seed.ts")) { + seed() + .then(({ userId }) => { + console.log(`Benchmark user ID: ${userId}`); + process.exit(0); + }) + .catch((err) => { + console.error("Seeding failed:", err); + process.exit(1); + }); +} diff --git a/package.json b/package.json index c2db9da0..15ebd85f 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,10 @@ "test:integration": "pnpm db:migrate:test && pnpm db:schema:test && dotenv -o -e .env.test -- vitest run tests/integration", "worker": "tsx scripts/worker.ts", "mcp:serve": "dotenv -- tsx src/server/mcp/bin.ts", - "discord-bot": "dotenv -- tsx scripts/discord-bot.ts" + "discord-bot": "dotenv -- tsx scripts/discord-bot.ts", + "benchmark": "dotenv -- tsx benchmarks/run.ts", + "benchmark:seed": "dotenv -- tsx benchmarks/seed.ts", + "benchmark:run": "dotenv -- tsx benchmarks/run.ts --skip-seed" }, "lint-staged": { "*.{ts,tsx,js,jsx,mjs,cjs}": [