Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion apps/web/app/api/presets/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type WizardStateV1 = {
modules: {
extractors: string[];
connectors: string[];
batteries?: string[];
};
defaults: {
chunkSize: number;
Expand Down Expand Up @@ -61,6 +62,7 @@ type PresetPayloadV1 = {
modules: {
extractors: string[];
connectors: string[];
batteries: string[];
};
config: {
defaults: {
Expand Down Expand Up @@ -93,6 +95,9 @@ function isWizardStateV1(x: unknown): x is WizardStateV1 {
const o = x as any;
if (o.v !== 1) return false;
if (!o.install || !o.modules || !o.defaults || !o.embedding || !o.storage) return false;
if (o.modules && "batteries" in o.modules && o.modules.batteries != null && !Array.isArray(o.modules.batteries)) {
return false;
}
return true;
}

Expand All @@ -107,6 +112,9 @@ function normalizeWizardState(input: WizardStateV1): WizardStateV1 {
const connectors = Array.isArray(input.modules.connectors)
? input.modules.connectors.map(String).filter(Boolean)
: [];
const batteries = Array.isArray((input.modules as any).batteries)
? (input.modules as any).batteries.map(String).filter(Boolean)
: [];

const chunkSize = Number(input.defaults.chunkSize) || 200;
const chunkOverlap = Number(input.defaults.chunkOverlap) || 40;
Expand Down Expand Up @@ -140,7 +148,7 @@ function normalizeWizardState(input: WizardStateV1): WizardStateV1 {
return {
v: 1,
install: { installDir, storeAdapter, aliasBase },
modules: { extractors, connectors },
modules: { extractors, connectors, batteries },
defaults: { chunkSize, chunkOverlap, topK },
embedding: { type: embeddingType, provider: embeddingProvider, model, timeoutMs },
storage: { storeChunkContent, storeDocumentContent },
Expand All @@ -160,6 +168,7 @@ function makePresetFromWizard(state: WizardStateV1): PresetPayloadV1 {
modules: {
extractors: state.modules.extractors,
connectors: state.modules.connectors,
batteries: (state.modules.batteries ?? []).map(String).filter(Boolean),
},
config: {
defaults: {
Expand Down Expand Up @@ -242,6 +251,11 @@ export async function POST(req: NextRequest) {
.filter((c: any) => c.status === "available")
.map((c: any) => String(c.id))
);
const allowedBatteries = new Set(
(manifest.batteries ?? [])
.filter((b: any) => b.status === "available")
.map((b: any) => String(b.id))
);

const unknownExtractors = state.modules.extractors.filter((x) => !allowedExtractors.has(x));
if (unknownExtractors.length > 0) {
Expand All @@ -259,6 +273,15 @@ export async function POST(req: NextRequest) {
);
}

const batteryIds = (state.modules.batteries ?? []).map(String).filter(Boolean);
const unknownBatteries = batteryIds.filter((x) => !allowedBatteries.has(x));
if (unknownBatteries.length > 0) {
return NextResponse.json(
{ error: "Unknown or unavailable batteries", unknownBatteries },
{ status: 400 }
);
}

const preset = makePresetFromWizard(state);
const id = newPresetId();
const key = `unrag:preset:${id}`;
Expand Down
11 changes: 11 additions & 0 deletions apps/web/app/docs/[[...slug]]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,27 @@ import { notFound } from 'next/navigation';
import { getMDXComponents } from '@/mdx-components';
import type { Metadata } from 'next';
import { createRelativeLink } from 'fumadocs-ui/mdx';
import SystemBanner from '@/components/ui/system-banner';

export default async function Page(props: PageProps<'/docs/[[...slug]]'>) {
const params = await props.params;
const page = source.getPage(params.slug);
if (!page) notFound();

const MDX = page.data.body;
const slug = params.slug ?? [];
const isExperimentalFeature =
slug[0] === 'eval'
|| (slug[0] === 'batteries' && slug[1] === 'eval');

return (
<DocsPage toc={page.data.toc} full={page.data.full}>
<SystemBanner
text="Experimental Feature"
color="bg-amber-600"
size="xs"
show={isExperimentalFeature}
/>
<DocsTitle>{page.data.title}</DocsTitle>
<DocsDescription>{page.data.description}</DocsDescription>
<DocsBody>
Expand Down
25 changes: 17 additions & 8 deletions apps/web/app/install/install-wizard-client.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,7 @@ function BatteryCard({
onToggle: () => void;
}) {
const isAvailable = status === 'available';
const isExperimental = id === 'eval';

return (
<ClickableCard
Expand Down Expand Up @@ -1015,6 +1016,11 @@ function BatteryCard({
>
{isAvailable ? 'available' : 'coming soon'}
</span>
{isExperimental ? (
<span className="text-[10px] px-2 py-0.5 rounded-full border bg-amber-500/10 text-amber-400/80 border-amber-500/20">
experimental
</span>
) : null}
{isAvailable && docsHref ? (
<div className="ml-auto">
<DocsIconLink href={docsHref} label={`${displayName || id} docs`} />
Expand Down Expand Up @@ -1197,6 +1203,16 @@ export default function InstallWizardClient() {
}));
}, [state.embedding.type, state.embedding.provider, state.embedding.model, selectedEmbeddingModelOption, setState]);

function pmExecBase(pm: 'bun' | 'npm' | 'pnpm' | 'yarn') {
return pm === 'bun'
? 'bunx'
: pm === 'pnpm'
? 'pnpm dlx'
: pm === 'yarn'
? 'yarn dlx'
: 'npx';
}

const commandPreview = useMemo(() => {
if (presetId) {
return `bunx unrag@latest init --yes --preset ${presetId}`;
Expand All @@ -1216,14 +1232,7 @@ export default function InstallWizardClient() {

const installCommand = useMemo(() => {
if (!presetId) return null;
const base =
pkgManager === 'bun'
? 'bunx'
: pkgManager === 'pnpm'
? 'pnpm dlx'
: pkgManager === 'yarn'
? 'yarn dlx'
: 'npx';
const base = pmExecBase(pkgManager);
return `${base} unrag@latest init --yes --preset ${presetId}`;
}, [pkgManager, presetId]);

Expand Down
50 changes: 50 additions & 0 deletions apps/web/components/ui/system-banner.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
interface SystemBannerProps {
text?: string;
color?: string;
size?: "xs" | "sm" | "md" | "lg";
show?: boolean;
}

const sizeClasses: Record<NonNullable<SystemBannerProps["size"]>, string> = {
xs: "text-[10px] px-1 py-0.5",
sm: "text-xs px-2 py-0.5",
md: "text-sm px-3 py-1",
lg: "text-base px-4 py-1.5"
};

export default function SystemBanner({
text = "Development Mode",
color = "bg-orange-500",
size = "xs",
show = true
}: SystemBannerProps) {
if (!show) return null;
return (
<div
className={`
fixed top-0 left-0 w-full h-0.5 z-50 flex justify-center
${typeof color === "string" && color.startsWith("#") ? "" : color}
`}
style={
typeof color === "string" && color.startsWith("#")
? { backgroundColor: color }
: undefined
}
>
<span
className={`
absolute -bottom-4 text-white font-bold rounded shadow-md
${sizeClasses[size]}
${typeof color === "string" && color.startsWith("#") ? "" : color}
`}
style={
typeof color === "string" && color.startsWith("#")
? { backgroundColor: color }
: undefined
}
>
{text}
</span>
</div>
);
}
96 changes: 96 additions & 0 deletions apps/web/content/docs/batteries/eval.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
---
title: Eval Harness
description: Measure and improve retrieval quality with deterministic evaluation, metrics, and CI integration.
---

<Callout type="warn">
Evaluation is currently **experimental**. It’s safe to use, but expect some CLI flags, report fields, and defaults to change as the harness matures.
</Callout>

The eval harness is a battery that adds retrieval evaluation capabilities to your Unrag installation. It gives you a structured way to define test datasets, run your retrieval pipeline against them, compute standard metrics (hit@k, recall@k, precision@k, MRR@k), and track quality changes over time.

Unlike the reranker battery which adds a new method to your engine, the eval harness is primarily a development and CI tool. You use it to measure how well your retrieval works, catch regressions before they reach production, and make informed decisions when tuning chunking, embeddings, or adding reranking.

## Installing the eval battery

```bash
bunx unrag@latest add battery eval
```

This creates several files:

<Files>
<Folder name="lib/unrag" defaultOpen>
<Folder name="eval">
<File name="index.ts" />
<File name="dataset.ts" />
<File name="metrics.ts" />
<File name="runner.ts" />
<File name="report.ts" />
</Folder>
</Folder>
<Folder name=".unrag/eval" defaultOpen>
<Folder name="datasets" defaultOpen>
<File name="sample.json" />
</Folder>
<File name="config.json" />
</Folder>
<Folder name="scripts" defaultOpen>
<File name="unrag-eval.ts" />
</Folder>
</Files>

It also adds two npm scripts to your `package.json`:

```json
{
"scripts": {
"unrag:eval": "bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json",
"unrag:eval:ci": "bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json --ci"
}
}
```

## Running your first eval

After installation, run the sample evaluation:

```bash
bun run unrag:eval
```

The harness will ingest the sample documents, run the test queries, and write report files. You'll see output like:

```
[unrag:eval] Wrote report: .unrag/eval/runs/<timestamp>-sample/report.json
[unrag:eval] Wrote summary: .unrag/eval/runs/<timestamp>-sample/summary.md
[unrag:eval] Thresholds: pass
```

## Full documentation

The eval harness is a substantial feature with its own documentation section covering everything from dataset design to CI integration:

<Cards>
<Card title="Evaluation Overview" href="/docs/eval">
Why retrieval evaluation matters and how the harness works
</Card>
<Card title="Getting Started" href="/docs/eval/getting-started">
Complete setup guide with your first evaluation
</Card>
<Card title="Dataset Format" href="/docs/eval/datasets">
How to structure documents, queries, and ground truth
</Card>
<Card title="Understanding Metrics" href="/docs/eval/metrics">
What each metric measures and how to interpret results
</Card>
<Card title="Running Evals" href="/docs/eval/running-evals">
All configuration options and CLI flags
</Card>
<Card title="CI Integration" href="/docs/eval/ci-integration">
Automated quality gates and threshold checking
</Card>
<Card title="Comparing Runs" href="/docs/eval/comparing-runs">
Baseline diffs and tracking changes over time
</Card>
</Cards>
3 changes: 2 additions & 1 deletion apps/web/content/docs/batteries/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The difference is scope. Extractors transform content types (PDFs into text). Co
| Battery | Description | Status |
|---------|-------------|--------|
| [Reranker](/docs/batteries/reranker) | Second-stage reranking using Cohere or custom models | Available |
| [Eval Harness](/docs/batteries/eval) | Deterministic retrieval evaluation with metrics and CI integration | Experimental |

## Installing a battery

Expand All @@ -29,7 +30,7 @@ After installation, you wire the battery into your engine configuration. Each ba

The core Unrag engine handles the fundamental RAG operations: chunking text, generating embeddings, storing vectors, and running similarity search. These operations cover most use cases and keep the default installation small.

But production RAG systems often need more. Reranking can significantly improve precision by reordering initial retrieval results using a more expensive relevance model. Evaluation harnesses help you measure and improve retrieval quality. Hybrid search combines vector similarity with keyword matching.
But production RAG systems often need more. Reranking can significantly improve precision by reordering initial retrieval results using a more expensive relevance model. Hybrid search combines vector similarity with keyword matching.

Rather than bundling these features into the core (adding complexity and dependencies everyone pays for), Unrag provides them as optional batteries. Install what you need, skip what you don't. The code is vendored into your project, so you can read it, understand it, and modify it if your requirements differ from the defaults.

Expand Down
2 changes: 1 addition & 1 deletion apps/web/content/docs/batteries/meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"title": "Batteries",
"badge": "New",
"description": "Optional modules that add capabilities like reranking to your Unrag installation.",
"pages": ["index", "reranker"]
"pages": ["index", "reranker", "eval"]
}
1 change: 0 additions & 1 deletion apps/web/content/docs/batteries/reranker.mdx
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
title: Reranker
new: true
description: Improve retrieval precision with second-stage reranking using Cohere or custom models.
---

Expand Down
Loading