diff --git a/Makefile b/Makefile
index 28abe8e6d..09cf4c69e 100644
--- a/Makefile
+++ b/Makefile
@@ -76,7 +76,9 @@ help:
 	@echo "  verify-imports            - Verify all package imports work"
 	@echo "  show-versions             - Show versions of all packages"
 	@echo "  convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext"
-	@echo "  generate-colab-notebooks  - Generate Colab-compatible notebooks"
+	@echo "  generate-colab-notebooks       - Generate Colab-compatible notebooks"
+	@echo "  generate-fern-notebooks        - Convert notebooks to Fern format for docs"
+	@echo "  generate-fern-notebooks-with-outputs - Execute notebooks first, then convert (requires API key)"
 	@echo "  serve-docs-locally        - Serve documentation locally"
 	@echo "  check-license-headers     - Check if all files have license headers"
 	@echo "  update-license-headers    - Add license headers to all files"
@@ -470,9 +472,29 @@ convert-execute-notebooks:
 
 generate-colab-notebooks:
 	@echo "📓 Generating Colab-compatible notebooks..."
-	uv run --group docs python docs/scripts/generate_colab_notebooks.py
+	@if [ -d docs/notebooks ] && [ -n "$$(ls docs/notebooks/*.ipynb 2>/dev/null)" ]; then \
+		echo "   Using executed notebooks from docs/notebooks (outputs preserved)"; \
+		uv run --group docs python docs/scripts/generate_colab_notebooks.py --executed-dir docs/notebooks; \
+	else \
+		echo "   Using source only (run 'make convert-execute-notebooks' first for outputs)"; \
+		uv run --group docs python docs/scripts/generate_colab_notebooks.py; \
+	fi
 	@echo "✅ Colab notebooks created in docs/colab_notebooks/"
 
+generate-fern-notebooks: generate-colab-notebooks
+	@echo "📓 Converting notebooks to Fern format for NotebookViewer..."
+	@mkdir -p fern/components/notebooks
+	@for f in docs/colab_notebooks/*.ipynb; do \
+		if [ -f "$$f" ]; then \
+			name=$$(basename "$$f" .ipynb); \
+			uv run python fern/scripts/ipynb-to-fern-json.py "$$f" -o fern/components/notebooks/$$name.json; \
+		fi; \
+	done
+	@echo "✅ Fern notebooks created in fern/components/notebooks/"
+
+generate-fern-notebooks-with-outputs: convert-execute-notebooks
+	$(MAKE) generate-fern-notebooks
+
 # ==============================================================================
 # PERFORMANCE
 # ==============================================================================
diff --git a/docs/RFC-FERN-MIGRATION.md b/docs/RFC-FERN-MIGRATION.md
new file mode 100644
index 000000000..6964b2015
--- /dev/null
+++ b/docs/RFC-FERN-MIGRATION.md
@@ -0,0 +1,1371 @@
+# RFC: Migration from MkDocs to Fern Docs
+
+**Status:** Draft  
+**Author:** [Author Name]  
+**Owner:** [Owner Name]  
+**Created:** 2026-01-14  
+**Last Updated:** 2026-01-14
+**Target Completion:** [YYYY-MM-DD]
+
+---
+
+## Summary
+
+This RFC proposes migrating the NeMo Data Designer documentation from MkDocs Material to [Fern Docs](https://buildwithfern.com/learn/docs/getting-started/overview). The migration will be performed incrementally by creating a new `docs-fern/` directory, preserving all existing content while adapting to Fern's component system.
+
+## Motivation
+
+This migration is **mandated** as part of NVIDIA's documentation platform standardization initiative.
+
+**Additional benefits:**
+
+- **Modern documentation platform**: Fern offers AI-native features including Ask Fern and auto-generated MCP servers
+- **Enhanced API documentation**: Better support for API reference documentation from OpenAPI specs
+- **Improved developer experience**: Rich component library with interactive elements
+- **Self-hosting options**: Flexible deployment for enterprise requirements
+
+## Scope
+
+### In Scope
+
+- 1:1 content migration (no content changes)
+- Component mapping from MkDocs Material to Fern equivalents
+- Navigation structure preservation
+- Code reference documentation migration
+
+### Out of Scope
+
+- Content rewrites or restructuring
+- New features or sections
+- Removal of existing documentation
+
+---
+
+## Current Documentation Inventory
+
+### File Structure
+
+```
+docs/
+├── index.md                      # Home page
+├── installation.md               # Installation guide
+├── quick-start.md                # Quick start tutorial
+├── CONTRIBUTING.md               # Contribution guide
+├── concepts/
+│   ├── columns.md
+│   ├── validators.md
+│   ├── processors.md
+│   ├── person_sampling.md
+│   └── models/
+│       ├── default-model-settings.md
+│       ├── custom-model-settings.md
+│       ├── configure-model-settings-with-the-cli.md
+│       ├── model-providers.md
+│       ├── model-configs.md
+│       └── inference-parameters.md
+├── recipes/
+│   ├── cards.md
+│   ├── code_generation/
+│   │   ├── text_to_python.md
+│   │   └── text_to_sql.md
+│   └── qa_and_chat/
+│       ├── product_info_qa.md
+│       └── multi_turn_chat.md
+├── plugins/
+│   ├── overview.md
+│   ├── example.md
+│   └── available.md
+├── code_reference/               # Auto-generated API docs
+│   ├── models.md
+│   ├── column_configs.md
+│   ├── config_builder.md
+│   ├── data_designer_config.md
+│   ├── sampler_params.md
+│   ├── validator_params.md
+│   ├── processors.md
+│   └── analysis.md
+├── colab_notebooks/              # Jupyter notebooks
+│   ├── 1-the-basics.ipynb
+│   ├── 2-structured-outputs-and-jinja-expressions.ipynb
+│   ├── 3-seeding-with-a-dataset.ipynb
+│   └── 4-providing-images-as-context.ipynb
+├── assets/
+│   └── recipes/                  # Downloadable code files
+├── css/                          # Custom styles
+├── js/                           # Custom scripts
+└── overrides/                    # MkDocs template overrides
+```
+
+### Current Navigation Structure
+
+```yaml
+nav:
+  - Getting Started:
+      - Welcome: index.md
+      - Installation: installation.md
+      - Quick Start: quick-start.md
+      - Contributing: CONTRIBUTING.md
+  - Concepts:
+      - Models: (6 sub-pages)
+      - Columns: concepts/columns.md
+      - Validators: concepts/validators.md
+      - Processors: concepts/processors.md
+      - Person Sampling: concepts/person_sampling.md
+  - Tutorials:
+      - Overview + 4 Jupyter notebooks
+  - Recipes:
+      - Recipe Cards + 4 recipes
+  - Plugins:
+      - 3 pages
+  - Code Reference:
+      - 8 auto-generated API docs
+```
+
+---
+
+## Component Mapping
+
+### MkDocs → Fern Component Equivalents
+
+Reference: [Fern Components Overview](https://buildwithfern.com/learn/docs/writing-content/components/overview)
+
+| MkDocs Feature | Current Syntax | Fern Equivalent | Notes |
+|----------------|----------------|-----------------|-------|
+| **Admonitions** | `!!! note "Title"` | `<Note>`, `<Tip>`, `<Warning>`, `<Info>` | See [Callouts](#1-admonitions--callouts) |
+| **Tabbed Content** | `=== "Tab 1"` | `<Tabs>` + `<Tab>` | See [Tabs](#2-tabbed-content) |
+| **Code Blocks** | ` ```python ` | ` ```python ` | Direct compatibility |
+| **Code Snippets** | `--8<-- "path"` | `<CodeBlock>` with `src` | File embedding |
+| **Grid Cards** | `<div class="grid cards">` | `<Cards>` + `<Card>` | See [Cards](#3-grid-cards) |
+| **Icons** | `:material-xxx:` | Fern icons or inline SVG | Limited support |
+| **Download Links** | `{ .md-button download=}` | Standard markdown links | Simplified |
+| **API Docs** | `::: module.path` | Manual or OpenAPI import | See [API Reference](#4-api-reference) |
+| **Jupyter Notebooks** | `.ipynb` files | Convert to MDX or embed | See [Notebooks](#5-jupyter-notebooks) |
+| **Versioning** | Mike plugin | Fern versioning | Built-in support |
+
+---
+
+## Detailed Component Migrations
+
+### 1. Admonitions → Callouts
+
+**Current MkDocs syntax:**
+
+```markdown
+!!! note "The Declarative Approach"
+    Columns are **declarative specifications**. You describe *what* you want...
+
+!!! tip "Conditional Sampling"
+    Samplers support **conditional parameters**...
+
+!!! question "New to Data Designer?"
+    Recipes provide working code...
+
+!!! warning "Important"
+    This action cannot be undone.
+```
+
+**Fern equivalent:**
+
+```mdx
+<Note title="The Declarative Approach">
+Columns are **declarative specifications**. You describe *what* you want...
+</Note>
+
+<Tip title="Conditional Sampling">
+Samplers support **conditional parameters**...
+</Tip>
+
+<Info title="New to Data Designer?">
+Recipes provide working code...
+</Info>
+
+<Warning title="Important">
+This action cannot be undone.
+</Warning>
+```
+
+**Migration mapping:**
+
+| MkDocs Admonition | Fern Callout |
+|-------------------|--------------|
+| `!!! note` | `<Note>` |
+| `!!! tip` | `<Tip>` |
+| `!!! info` | `<Info>` |
+| `!!! warning` | `<Warning>` |
+| `!!! question` | `<Info>` |
+| `!!! danger` | `<Warning>` |
+
+### 2. Tabbed Content
+
+**Current MkDocs syntax (installation.md):**
+
+```markdown
+=== "pip"
+
+    ```bash
+    pip install data-designer
+    ```
+
+=== "uv"
+
+    ```bash
+    uv add data-designer
+    ```
+```
+
+**Fern equivalent:**
+
+```mdx
+<Tabs>
+  <Tab title="pip">
+    ```bash
+    pip install data-designer
+    ```
+  </Tab>
+  <Tab title="uv">
+    ```bash
+    uv add data-designer
+    ```
+  </Tab>
+</Tabs>
+```
+
+### 3. Grid Cards
+
+**Current MkDocs syntax (recipes/cards.md):**
+
+```markdown
+<div class="grid cards" markdown>
+
+-   :material-snake:{ .lg .middle } **Text to Python**
+
+    Generate a dataset of natural language instructions...
+
+    ---
+
+    **Demonstrates:**
+    - Python code generation
+    - Python code validation
+
+    ---
+
+    [:material-book-open-page-variant: View Recipe](code_generation/text_to_python.md){ .md-button }
+
+</div>
+```
+
+**Fern equivalent:**
+
+```mdx
+<Cards>
+  <Card
+    title="Text to Python"
+    icon="code"
+    href="/recipes/code-generation/text-to-python"
+  >
+    Generate a dataset of natural language instructions...
+    
+    **Demonstrates:**
+    - Python code generation
+    - Python code validation
+  </Card>
+</Cards>
+```
+
+### 4. API Reference (mkdocstrings)
+
+**Current MkDocs syntax (code_reference/models.md):**
+
+```markdown
+# Models
+
+The `models` module defines configuration objects...
+
+::: data_designer.config.models
+```
+
+**Fern options:**
+
+**Option A: Manual Documentation**
+Convert auto-generated docs to manually written MDX with code examples.
+
+**Option B: OpenAPI Integration**
+If the API has an OpenAPI spec, use Fern's native API reference generation.
+
+**Option C: TypeDoc/PyDoc Integration**
+Use Fern's SDK documentation features if available.
+
+**Recommendation:** Start with Option A (manual) for initial migration, evaluate automation options post-migration.
+
+### 5. Jupyter Notebooks
+
+**Current approach:** `mkdocs-jupyter` plugin renders `.ipynb` files directly.
+
+**Fern options:**
+
+**Option A: Convert to MDX**
+Convert notebooks to MDX files with code blocks and output screenshots.
+
+**Option B: Embed as iframes**
+Host notebooks on Colab/GitHub and embed links.
+
+**Option C: Use Fern's code playground**
+If available, use interactive code features.
+
+**Recommendation:** Convert to MDX with static code blocks and link to Colab for interactive experience (preserves current Colab badge functionality).
+
+### 6. Code Snippets (pymdownx.snippets)
+
+**Current MkDocs syntax:**
+
+```markdown
+```python
+--8<-- "assets/recipes/code_generation/text_to_python.py"
+```
+```
+
+**Fern equivalent:**
+
+```mdx
+<CodeBlock src="assets/recipes/code_generation/text_to_python.py" />
+```
+
+Or inline the code directly if file embedding isn't supported.
+
+---
+
+## Proposed Directory Structure
+
+```
+docs-fern/
+├── fern.config.json              # Fern configuration
+├── docs.yml                      # Navigation and settings
+├── pages/
+│   ├── index.mdx                 # Home page
+│   ├── installation.mdx
+│   ├── quick-start.mdx
+│   ├── contributing.mdx
+│   ├── concepts/
+│   │   ├── columns.mdx
+│   │   ├── validators.mdx
+│   │   ├── processors.mdx
+│   │   ├── person-sampling.mdx
+│   │   └── models/
+│   │       ├── default-model-settings.mdx
+│   │       ├── custom-model-settings.mdx
+│   │       ├── configure-with-cli.mdx
+│   │       ├── model-providers.mdx
+│   │       ├── model-configs.mdx
+│   │       └── inference-parameters.mdx
+│   ├── tutorials/
+│   │   ├── overview.mdx
+│   │   ├── the-basics.mdx
+│   │   ├── structured-outputs.mdx
+│   │   ├── seeding-with-dataset.mdx
+│   │   └── images-as-context.mdx
+│   ├── recipes/
+│   │   ├── index.mdx             # Recipe cards
+│   │   ├── code-generation/
+│   │   │   ├── text-to-python.mdx
+│   │   │   └── text-to-sql.mdx
+│   │   └── qa-and-chat/
+│   │       ├── product-info-qa.mdx
+│   │       └── multi-turn-chat.mdx
+│   ├── plugins/
+│   │   ├── overview.mdx
+│   │   ├── example.mdx
+│   │   └── available.mdx
+│   └── api-reference/
+│       ├── models.mdx
+│       ├── column-configs.mdx
+│       ├── config-builder.mdx
+│       ├── data-designer-config.mdx
+│       ├── sampler-params.mdx
+│       ├── validator-params.mdx
+│       ├── processors.mdx
+│       └── analysis.mdx
+├── assets/
+│   ├── favicon.png
+│   └── recipes/                  # Downloadable code files
+│       ├── code_generation/
+│       └── qa_and_chat/
+└── styles/
+    └── custom.css                # Custom styling (if needed)
+```
+
+---
+
+## URL Redirect Mapping
+
+To preserve existing bookmarks and SEO, all old URLs must redirect to their new locations.
+
+### Redirect Rules
+
+| Old MkDocs URL | New Fern URL |
+|----------------|--------------|
+| `/` | `/docs` |
+| `/installation/` | `/docs/installation` |
+| `/quick-start/` | `/docs/quick-start` |
+| `/CONTRIBUTING/` | `/docs/contributing` |
+| `/concepts/columns/` | `/docs/concepts/columns` |
+| `/concepts/validators/` | `/docs/concepts/validators` |
+| `/concepts/processors/` | `/docs/concepts/processors` |
+| `/concepts/person_sampling/` | `/docs/concepts/person-sampling` |
+| `/concepts/models/default-model-settings/` | `/docs/concepts/models/default-model-settings` |
+| `/concepts/models/custom-model-settings/` | `/docs/concepts/models/custom-model-settings` |
+| `/concepts/models/configure-model-settings-with-the-cli/` | `/docs/concepts/models/configure-with-cli` |
+| `/concepts/models/model-providers/` | `/docs/concepts/models/model-providers` |
+| `/concepts/models/model-configs/` | `/docs/concepts/models/model-configs` |
+| `/concepts/models/inference-parameters/` | `/docs/concepts/models/inference-parameters` |
+| `/tutorials/` | `/docs/tutorials/overview` |
+| `/recipes/cards/` | `/docs/recipes` |
+| `/recipes/code_generation/text_to_python/` | `/docs/recipes/code-generation/text-to-python` |
+| `/recipes/code_generation/text_to_sql/` | `/docs/recipes/code-generation/text-to-sql` |
+| `/recipes/qa_and_chat/product_info_qa/` | `/docs/recipes/qa-and-chat/product-info-qa` |
+| `/recipes/qa_and_chat/multi_turn_chat/` | `/docs/recipes/qa-and-chat/multi-turn-chat` |
+| `/plugins/overview/` | `/docs/plugins/overview` |
+| `/plugins/example/` | `/docs/plugins/example` |
+| `/plugins/available/` | `/docs/plugins/available` |
+| `/code_reference/*` | `/api/*` |
+
+### Implementation
+
+**Option A: Fern redirects configuration** (if supported)
+
+```yaml
+# In docs.yml
+redirects:
+  - from: /concepts/person_sampling
+    to: /docs/concepts/person-sampling
+  # ... additional redirects
+```
+
+**Option B: Hosting platform redirects**
+
+For Netlify (`_redirects` file):
+```
+/concepts/person_sampling/*  /docs/concepts/person-sampling/:splat  301
+/code_reference/*            /api/:splat                            301
+```
+
+For nginx:
+```nginx
+rewrite ^/concepts/person_sampling(.*)$ /docs/concepts/person-sampling$1 permanent;
+rewrite ^/code_reference/(.*)$ /api/$1 permanent;
+```
+
+---
+
+## Configuration Files
+
+### fern.config.json
+
+```json
+{
+  "organization": "nvidia-nemo",
+  "version": "1.0.0"
+}
+```
+
+### docs.yml
+
+```yaml
+instances:
+  - url: https://datadesigner.docs.nvidia.com
+
+title: NeMo Data Designer
+
+tabs:
+  docs:
+    display-name: Documentation
+    slug: docs
+  api:
+    display-name: API Reference
+    slug: api
+
+navigation:
+  - tab: docs
+    layout:
+      - section: Getting Started
+        contents:
+          - page: Welcome
+            path: pages/index.mdx
+          - page: Installation
+            path: pages/installation.mdx
+          - page: Quick Start
+            path: pages/quick-start.mdx
+          - page: Contributing
+            path: pages/contributing.mdx
+      - section: Concepts
+        contents:
+          - section: Models
+            contents:
+              - page: Default Model Settings
+                path: pages/concepts/models/default-model-settings.mdx
+              - page: Custom Model Settings
+                path: pages/concepts/models/custom-model-settings.mdx
+              - page: Configure with CLI
+                path: pages/concepts/models/configure-with-cli.mdx
+              - page: Model Providers
+                path: pages/concepts/models/model-providers.mdx
+              - page: Model Configs
+                path: pages/concepts/models/model-configs.mdx
+              - page: Inference Parameters
+                path: pages/concepts/models/inference-parameters.mdx
+          - page: Columns
+            path: pages/concepts/columns.mdx
+          - page: Validators
+            path: pages/concepts/validators.mdx
+          - page: Processors
+            path: pages/concepts/processors.mdx
+          - page: Person Sampling
+            path: pages/concepts/person-sampling.mdx
+      - section: Tutorials
+        contents:
+          - page: Overview
+            path: pages/tutorials/overview.mdx
+          - page: The Basics
+            path: pages/tutorials/the-basics.mdx
+          - page: Structured Outputs
+            path: pages/tutorials/structured-outputs.mdx
+          - page: Seeding with a Dataset
+            path: pages/tutorials/seeding-with-dataset.mdx
+          - page: Images as Context
+            path: pages/tutorials/images-as-context.mdx
+      - section: Recipes
+        contents:
+          - page: Recipe Cards
+            path: pages/recipes/index.mdx
+          - section: Code Generation
+            contents:
+              - page: Text to Python
+                path: pages/recipes/code-generation/text-to-python.mdx
+              - page: Text to SQL
+                path: pages/recipes/code-generation/text-to-sql.mdx
+          - section: QA and Chat
+            contents:
+              - page: Product Info QA
+                path: pages/recipes/qa-and-chat/product-info-qa.mdx
+              - page: Multi-Turn Chat
+                path: pages/recipes/qa-and-chat/multi-turn-chat.mdx
+      - section: Plugins
+        contents:
+          - page: Overview
+            path: pages/plugins/overview.mdx
+          - page: Example Plugin
+            path: pages/plugins/example.mdx
+          - page: Available Plugins
+            path: pages/plugins/available.mdx
+  - tab: api
+    layout:
+      - section: API Reference
+        contents:
+          - page: Models
+            path: pages/api-reference/models.mdx
+          - page: Column Configs
+            path: pages/api-reference/column-configs.mdx
+          - page: Config Builder
+            path: pages/api-reference/config-builder.mdx
+          - page: Data Designer Config
+            path: pages/api-reference/data-designer-config.mdx
+          - page: Sampler Params
+            path: pages/api-reference/sampler-params.mdx
+          - page: Validator Params
+            path: pages/api-reference/validator-params.mdx
+          - page: Processors
+            path: pages/api-reference/processors.mdx
+          - page: Analysis
+            path: pages/api-reference/analysis.mdx
+
+colors:
+  accent-primary:
+    dark: "#76B900"
+    light: "#76B900"
+  background:
+    dark: "#1a1a1a"
+    light: "#ffffff"
+
+logo:
+  dark: assets/favicon.png
+  light: assets/favicon.png
+
+favicon: assets/favicon.png
+
+navbar-links:
+  - type: github
+    value: https://github.com/NVIDIA-NeMo/DataDesigner
+```
+
+---
+
+## Migration Plan
+
+### Phase 1: Setup (1 day)
+
+1. Create `docs-fern/` directory structure
+2. Initialize Fern configuration files
+3. Set up local development environment
+4. Verify Fern CLI works (`fern check`, `fern generate`)
+
+### Phase 2: Core Pages Migration (2-3 days)
+
+1. Migrate Getting Started section
+   - `index.md` → `index.mdx`
+   - `installation.md` → `installation.mdx`
+   - `quick-start.md` → `quick-start.mdx`
+   - `CONTRIBUTING.md` → `contributing.mdx`
+
+2. Migrate Concepts section (6 model pages + 4 concept pages)
+
+3. Migrate Plugins section (3 pages)
+
+### Phase 3: Complex Content Migration (3-4 days)
+
+1. Convert Jupyter notebooks to MDX
+   - Extract code cells as code blocks
+   - Convert markdown cells directly
+   - Add Colab badges/links
+
+2. Migrate Recipes section
+   - Convert grid cards to Fern Cards
+   - Migrate recipe content pages
+   - Handle code snippet embedding
+
+### Phase 4: API Reference Migration (2-3 days)
+
+1. Extract API documentation from mkdocstrings output
+2. Manually format as MDX pages
+3. Add code examples and cross-references
+
+### Phase 5: Styling and Polish (1-2 days)
+
+1. Apply NVIDIA branding (green accent color)
+2. Configure navigation and tabs
+3. Add favicon and logos
+4. Test responsive design
+
+### Phase 6: Testing and Validation (1-2 days)
+
+1. Review all pages for rendering issues
+2. Verify all links work
+3. Test navigation flow
+4. Compare against original docs for completeness
+
+---
+
+## CI/CD Pipeline Changes
+
+### Current MkDocs Pipeline
+
+```yaml
+# Current workflow (to be replaced)
+- name: Build docs
+  run: mkdocs build
+
+- name: Deploy docs
+  run: mkdocs gh-deploy
+```
+
+### New Fern Pipeline
+
+```yaml
+# .github/workflows/docs.yml
+name: Documentation
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'docs-fern/**'
+  pull_request:
+    paths:
+      - 'docs-fern/**'
+
+jobs:
+  docs:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Install Fern CLI
+        run: npm install -g fern-api
+
+      - name: Validate Fern config
+        run: fern check
+        working-directory: docs-fern
+
+      - name: Generate docs (PR preview)
+        if: github.event_name == 'pull_request'
+        run: fern generate --docs --preview
+        working-directory: docs-fern
+        env:
+          FERN_TOKEN: ${{ secrets.FERN_TOKEN }}
+
+      - name: Deploy docs (production)
+        if: github.ref == 'refs/heads/main'
+        run: fern generate --docs
+        working-directory: docs-fern
+        env:
+          FERN_TOKEN: ${{ secrets.FERN_TOKEN }}
+```
+
+### Required Secrets
+
+| Secret | Description |
+|--------|-------------|
+| `FERN_TOKEN` | API token from Fern dashboard for deployments |
+
+### Local Development
+
+```bash
+# Install Fern CLI
+npm install -g fern-api
+
+# Navigate to docs directory
+cd docs-fern
+
+# Validate configuration
+fern check
+
+# Local preview (starts dev server)
+fern docs dev
+
+# Generate static output
+fern generate --docs
+```
+
+---
+
+## Deprecation Timeline
+
+### Week 1-2: Parallel Operation
+
+- `docs-fern/` is the primary documentation source
+- `docs/` remains for reference and rollback capability
+- Both directories exist in repository
+- MkDocs config (`mkdocs.yml`) remains but is not used in CI
+
+### Week 3: Soft Deprecation
+
+- Remove MkDocs from CI/CD pipeline
+- Add deprecation notice to `docs/README.md`:
+  ```markdown
+  > ⚠️ **DEPRECATED**: This directory is no longer maintained.
+  > Documentation has moved to `docs-fern/`.
+  > This directory will be removed on [DATE].
+  ```
+- Update `CONTRIBUTING.md` to reference new docs location
+
+### Week 4: Hard Deprecation
+
+- Delete `docs/` directory
+- Delete `mkdocs.yml`
+- Remove MkDocs dependencies from `pyproject.toml`:
+  - `mkdocs`
+  - `mkdocs-material`
+  - `mkdocs-jupyter`
+  - `mkdocstrings`
+  - `mkdocstrings-python`
+- Update `.gitignore` to remove MkDocs artifacts (`site/`)
+- Archive final MkDocs state in git tag: `mkdocs-final`
+
+### Post-Migration Cleanup
+
+- Remove custom CSS (`docs/css/`)
+- Remove custom JS (`docs/js/`)
+- Remove template overrides (`docs/overrides/`)
+- Update README.md documentation links
+
+---
+
+## Risks and Mitigations
+
+| Risk | Impact | Likelihood | Mitigation | Owner |
+|------|--------|------------|------------|-------|
+| API reference quality loss | High | Medium | Document Python APIs manually with curated examples; add to PR checklist | [Owner] |
+| Notebook interactivity loss | Medium | Low | Link to Colab badges at top of each tutorial; keep `.ipynb` files hosted | [Owner] |
+| Icon support gaps | Low | High | Replace `:material-xxx:` with emoji or text labels; document in style guide | [Owner] |
+| Custom CSS incompatibility | Low | Medium | Use Fern's built-in components; minimal custom CSS only if essential | [Owner] |
+| Build/deploy workflow breaks | Medium | Medium | Test CI/CD in separate branch before merging; keep MkDocs as fallback for 2 weeks | [Owner] |
+| SEO ranking drop | Medium | Medium | Implement all redirects before deprecating old URLs; submit sitemap to search engines | [Owner] |
+| Broken links post-migration | Medium | High | Run automated link checker before go-live; fix all broken links | [Owner] |
+
+---
+
+## Common Pitfalls & Troubleshooting
+
+### Pitfall 1: Nested Admonitions
+
+MkDocs supports nested admonitions; Fern callouts do not nest well.
+
+**Problem:**
+```markdown
+!!! note
+    Some text
+    !!! warning
+        Nested warning
+```
+
+**Solution:** Flatten to sequential callouts:
+```mdx
+<Note>
+Some text
+</Note>
+
+<Warning>
+Nested warning (now separate)
+</Warning>
+```
+
+### Pitfall 2: Code Blocks Inside Tabs
+
+Indentation is critical. Fern expects proper nesting.
+
+**Problem (incorrect indentation):**
+```mdx
+<Tabs>
+<Tab title="Python">
+```python
+code
+```
+</Tab>
+</Tabs>
+```
+
+**Solution (correct indentation):**
+```mdx
+<Tabs>
+  <Tab title="Python">
+    ```python
+    code
+    ```
+  </Tab>
+</Tabs>
+```
+
+### Pitfall 3: MkDocs-Specific Syntax
+
+These MkDocs features have no direct Fern equivalent:
+
+| MkDocs Syntax | Action |
+|---------------|--------|
+| `{ .md-button }` | Remove, use standard links |
+| `{ .annotate }` | Remove, use inline notes |
+| `[TOC]` | Remove, Fern auto-generates TOC |
+| `--8<-- "file"` | Inline the code or use `<CodeBlock>` |
+| `::: module.path` | Convert to manual documentation |
+
+### Pitfall 4: Image Paths
+
+MkDocs resolves images relative to the markdown file; Fern resolves from project root.
+
+**MkDocs:**
+```markdown
+![Alt](../assets/image.png)
+```
+
+**Fern:**
+```mdx
+![Alt](/assets/image.png)
+```
+
+### Pitfall 5: Front Matter
+
+Fern uses YAML front matter for page metadata. Add to each file:
+
+```mdx
+---
+title: Page Title
+description: Optional description for SEO
+---
+```
+
+### Troubleshooting Commands
+
+```bash
+# Validate all Fern configuration
+fern check
+
+# See detailed errors
+fern check --log-level debug
+
+# Preview locally before deploying
+fern docs dev
+
+# Check for broken internal links
+grep -r '](/[^)]*\.mdx)' docs-fern/pages/ | grep -v '^#'
+```
+
+---
+
+## Rollback Plan
+
+If critical issues are discovered post-migration, follow this rollback procedure:
+
+### Trigger Conditions
+
+Initiate rollback if any of these occur within 2 weeks of go-live:
+
+- [ ] >10% of pages have rendering issues
+- [ ] Search functionality broken
+- [ ] CI/CD pipeline repeatedly failing
+- [ ] Critical content missing or incorrect
+- [ ] Stakeholder requests rollback
+
+### Rollback Steps
+
+**Step 1: Restore MkDocs CI/CD (15 minutes)**
+
+```yaml
+# Revert .github/workflows/docs.yml to MkDocs version
+git revert <fern-migration-commit>
+git push origin main
+```
+
+**Step 2: Restore DNS/Hosting (if changed)**
+
+Point documentation URL back to MkDocs deployment location.
+
+**Step 3: Communicate**
+
+Notify team:
+> Documentation rollback initiated due to [REASON].
+> MkDocs docs restored at [URL].
+> Fern migration paused pending [ISSUE] resolution.
+
+**Step 4: Preserve Fern Work**
+
+```bash
+# Don't delete - branch and preserve
+git checkout -b fern-migration-paused
+git push origin fern-migration-paused
+```
+
+**Step 5: Post-Mortem**
+
+Document:
+- What triggered the rollback
+- Root cause analysis
+- Required fixes before retry
+- Updated timeline
+
+### Rollback Window
+
+- **Weeks 1-2**: Full rollback capability (MkDocs still in repo)
+- **Week 3+**: Rollback requires restoring from `mkdocs-final` tag
+- **Week 4+**: Rollback requires significant effort (MkDocs deleted)
+
+---
+
+## Pre-Flight Checklist
+
+Before starting migration, ensure:
+
+- [ ] Fern account created and `FERN_TOKEN` obtained
+- [ ] Hosting decision finalized (Section: Decisions #4)
+- [ ] Timeline approved and dates filled in (Section: Decisions #5)
+- [ ] Owner assigned in RFC header
+- [ ] Team notified of upcoming changes
+- [ ] Current docs snapshot archived (`git tag mkdocs-snapshot-pre-migration`)
+
+---
+
+## Conversion Checklist
+
+### File-by-File Migration Tracker
+
+Use this checklist during Phase 2-4 to track progress:
+
+#### Getting Started
+- [ ] `index.md` → `pages/index.mdx`
+- [ ] `installation.md` → `pages/installation.mdx`
+- [ ] `quick-start.md` → `pages/quick-start.mdx`
+- [ ] `CONTRIBUTING.md` → `pages/contributing.mdx`
+
+#### Concepts - Models
+- [ ] `concepts/models/default-model-settings.md` → `pages/concepts/models/default-model-settings.mdx`
+- [ ] `concepts/models/custom-model-settings.md` → `pages/concepts/models/custom-model-settings.mdx`
+- [ ] `concepts/models/configure-model-settings-with-the-cli.md` → `pages/concepts/models/configure-with-cli.mdx`
+- [ ] `concepts/models/model-providers.md` → `pages/concepts/models/model-providers.mdx`
+- [ ] `concepts/models/model-configs.md` → `pages/concepts/models/model-configs.mdx`
+- [ ] `concepts/models/inference-parameters.md` → `pages/concepts/models/inference-parameters.mdx`
+
+#### Concepts - Other
+- [ ] `concepts/columns.md` → `pages/concepts/columns.mdx`
+- [ ] `concepts/validators.md` → `pages/concepts/validators.mdx`
+- [ ] `concepts/processors.md` → `pages/concepts/processors.mdx`
+- [ ] `concepts/person_sampling.md` → `pages/concepts/person-sampling.mdx`
+
+#### Tutorials (Notebook Conversion)
+- [ ] `colab_notebooks/1-the-basics.ipynb` → `pages/tutorials/the-basics.mdx`
+- [ ] `colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb` → `pages/tutorials/structured-outputs.mdx`
+- [ ] `colab_notebooks/3-seeding-with-a-dataset.ipynb` → `pages/tutorials/seeding-with-dataset.mdx`
+- [ ] `colab_notebooks/4-providing-images-as-context.ipynb` → `pages/tutorials/images-as-context.mdx`
+- [ ] Create `pages/tutorials/overview.mdx` (new index page)
+
+#### Recipes
+- [ ] `recipes/cards.md` → `pages/recipes/index.mdx`
+- [ ] `recipes/code_generation/text_to_python.md` → `pages/recipes/code-generation/text-to-python.mdx`
+- [ ] `recipes/code_generation/text_to_sql.md` → `pages/recipes/code-generation/text-to-sql.mdx`
+- [ ] `recipes/qa_and_chat/product_info_qa.md` → `pages/recipes/qa-and-chat/product-info-qa.mdx`
+- [ ] `recipes/qa_and_chat/multi_turn_chat.md` → `pages/recipes/qa-and-chat/multi-turn-chat.mdx`
+
+#### Plugins
+- [ ] `plugins/overview.md` → `pages/plugins/overview.mdx`
+- [ ] `plugins/example.md` → `pages/plugins/example.mdx`
+- [ ] `plugins/available.md` → `pages/plugins/available.mdx`
+
+#### API Reference
+- [ ] `code_reference/models.md` → `pages/api-reference/models.mdx`
+- [ ] `code_reference/column_configs.md` → `pages/api-reference/column-configs.mdx`
+- [ ] `code_reference/config_builder.md` → `pages/api-reference/config-builder.mdx`
+- [ ] `code_reference/data_designer_config.md` → `pages/api-reference/data-designer-config.mdx`
+- [ ] `code_reference/sampler_params.md` → `pages/api-reference/sampler-params.mdx`
+- [ ] `code_reference/validator_params.md` → `pages/api-reference/validator-params.mdx`
+- [ ] `code_reference/processors.md` → `pages/api-reference/processors.mdx`
+- [ ] `code_reference/analysis.md` → `pages/api-reference/analysis.mdx`
+
+#### Assets
+- [ ] Copy `assets/palette-favicon.png` → `assets/favicon.png`
+- [ ] Copy `assets/recipes/` → `assets/recipes/`
+
+---
+
+## Success Criteria
+
+- [ ] All existing documentation pages migrated (32 pages total)
+- [ ] Navigation structure preserved
+- [ ] All code examples render correctly
+- [ ] All internal links functional (automated check)
+- [ ] All external links functional (automated check)
+- [ ] NVIDIA branding applied (green accent: #76B900)
+- [ ] Local development workflow documented
+- [ ] CI/CD pipeline deployed and tested
+- [ ] URL redirects configured and tested
+- [ ] PR preview deployments working
+- [ ] Page load time < 3 seconds
+
+---
+
+## Decisions
+
+The following decisions have been made to ensure smooth execution:
+
+### 1. API Reference Approach
+
+**Decision:** Manual documentation with code examples (Option A)
+
+**Rationale:**
+- Fastest path to migration completion
+- Allows curated examples rather than raw docstring dumps
+- Fern's Python SDK autodoc is not mature enough for our needs
+
+**Maintenance commitment:**
+- API reference pages will be updated alongside code changes
+- Add to PR checklist: "Update API docs if public interfaces changed"
+- Revisit automation options in Q2 2026
+
+### 2. Notebook Handling
+
+**Decision:** Convert to MDX with Colab links
+
+**Implementation:**
+- Extract code cells as fenced code blocks
+- Convert markdown cells directly to MDX
+- Preserve Colab badge at top of each tutorial
+- Link to hosted `.ipynb` files for interactive experience
+
+**Example header for converted notebooks:**
+```mdx
+---
+title: The Basics
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/1-the-basics.ipynb).
+</Info>
+```
+
+### 3. Versioning
+
+**Decision:** Single version initially, evaluate multi-version post-launch
+
+**Rationale:**
+- Current MkDocs setup is single-version
+- No immediate need for versioned docs
+- Fern supports versioning when needed
+
+### 4. Hosting
+
+**Decision:** [Fern-hosted | Self-hosted] _(fill in)_
+
+**If Fern-hosted:**
+- URL: `https://datadesigner.docs.buildwithfern.com` or custom domain
+- Zero infrastructure management
+- Built-in CDN and SSL
+
+**If self-hosted:**
+- Deploy to existing NVIDIA infrastructure
+- Use `fern generate --docs` to produce static output
+- Configure redirects on hosting platform
+
+### 5. Timeline
+
+**Decision:** [X weeks] from RFC approval
+
+| Milestone | Target Date |
+|-----------|-------------|
+| Phase 1 (Setup) complete | [DATE] |
+| Phase 2-3 (Content migration) complete | [DATE] |
+| Phase 4 (API reference) complete | [DATE] |
+| Phase 5-6 (Polish & testing) complete | [DATE] |
+| Go-live | [DATE] |
+| Old docs deprecated | [DATE + 2 weeks] |
+
+---
+
+## Helper Scripts
+
+The following scripts can assist with automated conversion:
+
+### 1. Admonition Converter
+
+```python
+#!/usr/bin/env python3
+"""Convert MkDocs admonitions to Fern callouts."""
+import re
+import sys
+
+ADMONITION_MAP = {
+    "note": "Note",
+    "tip": "Tip",
+    "info": "Info",
+    "warning": "Warning",
+    "danger": "Warning",
+    "question": "Info",
+    "example": "Info",
+    "abstract": "Note",
+    "success": "Tip",
+    "failure": "Warning",
+    "bug": "Warning",
+}
+
+def convert_admonitions(content: str) -> str:
+    """Convert !!! admonitions to <Callout> components."""
+    pattern = r'!!! (\w+)(?: "([^"]*)")?\n((?:    .*\n?)*)'
+
+    def replace(match: re.Match) -> str:
+        admon_type = match.group(1).lower()
+        title = match.group(2) or ""
+        body = match.group(3)
+        # Remove 4-space indent from body
+        body = re.sub(r'^    ', '', body, flags=re.MULTILINE).strip()
+        fern_type = ADMONITION_MAP.get(admon_type, "Note")
+        if title:
+            return f'<{fern_type} title="{title}">\n{body}\n</{fern_type}>\n'
+        return f'<{fern_type}>\n{body}\n</{fern_type}>\n'
+
+    return re.sub(pattern, replace, content)
+
+if __name__ == "__main__":
+    content = sys.stdin.read()
+    print(convert_admonitions(content))
+```
+
+**Usage:**
+```bash
+cat docs/concepts/columns.md | python scripts/convert_admonitions.py > docs-fern/pages/concepts/columns.mdx
+```
+
+### 2. Tabs Converter
+
+```python
+#!/usr/bin/env python3
+"""Convert MkDocs tabs to Fern Tabs components."""
+import re
+import sys
+
+def convert_tabs(content: str) -> str:
+    """Convert === tabs to <Tabs> components."""
+    # Match tab groups
+    pattern = r'((?:=== "([^"]+)"\n((?:    .*\n?)*)\n?)+)'
+
+    def replace_group(match: re.Match) -> str:
+        group = match.group(0)
+        tabs = re.findall(r'=== "([^"]+)"\n((?:    .*\n?)*)', group)
+        result = ["<Tabs>"]
+        for title, body in tabs:
+            body = re.sub(r'^    ', '', body, flags=re.MULTILINE).strip()
+            result.append(f'  <Tab title="{title}">')
+            result.append(f'    {body}')
+            result.append('  </Tab>')
+        result.append("</Tabs>")
+        return '\n'.join(result) + '\n'
+
+    return re.sub(pattern, replace_group, content)
+
+if __name__ == "__main__":
+    content = sys.stdin.read()
+    print(convert_tabs(content))
+```
+
+### 3. Notebook to MDX Converter
+
+```python
+#!/usr/bin/env python3
+"""Convert Jupyter notebook to MDX."""
+import json
+import sys
+from pathlib import Path
+
+def notebook_to_mdx(notebook_path: str, colab_url: str) -> str:
+    """Convert a Jupyter notebook to MDX format."""
+    with open(notebook_path) as f:
+        nb = json.load(f)
+
+    lines = [
+        "---",
+        f"title: {Path(notebook_path).stem.replace('-', ' ').title()}",
+        "---",
+        "",
+        '<Info title="Interactive Version">',
+        f"Run this tutorial interactively in [Google Colab]({colab_url}).",
+        "</Info>",
+        "",
+    ]
+
+    for cell in nb.get("cells", []):
+        cell_type = cell.get("cell_type")
+        source = "".join(cell.get("source", []))
+
+        if cell_type == "markdown":
+            lines.append(source)
+            lines.append("")
+        elif cell_type == "code":
+            lines.append("```python")
+            lines.append(source)
+            lines.append("```")
+            lines.append("")
+
+    return "\n".join(lines)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: notebook_to_mdx.py <notebook.ipynb> <colab_url>")
+        sys.exit(1)
+    print(notebook_to_mdx(sys.argv[1], sys.argv[2]))
+```
+
+**Usage:**
+```bash
+python scripts/notebook_to_mdx.py \
+  docs/colab_notebooks/1-the-basics.ipynb \
+  "https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/1-the-basics.ipynb" \
+  > docs-fern/pages/tutorials/the-basics.mdx
+```
+
+### 4. Link Checker
+
+```bash
+#!/bin/bash
+# Check all links in Fern docs
+cd docs-fern
+
+# Internal links
+grep -roh '\[.*\]([^)]*\.mdx)' pages/ | sort | uniq
+
+# External links
+grep -roh 'https://[^)]*' pages/ | sort | uniq | while read url; do
+  if ! curl -s --head "$url" | head -1 | grep -q "200\|301\|302"; then
+    echo "BROKEN: $url"
+  fi
+done
+```
+
+### 5. Batch Conversion Script
+
+```bash
+#!/bin/bash
+# batch_convert.sh - Run all conversions
+
+set -e
+
+SCRIPTS_DIR="scripts"
+DOCS_DIR="docs"
+FERN_DIR="docs-fern/pages"
+
+# Create directory structure
+mkdir -p "$FERN_DIR"/{concepts/models,tutorials,recipes/{code-generation,qa-and-chat},plugins,api-reference}
+
+# Convert simple pages (admonitions + tabs)
+for file in index installation quick-start CONTRIBUTING; do
+  src="$DOCS_DIR/$file.md"
+  if [ -f "$src" ]; then
+    dst="$FERN_DIR/${file,,}.mdx"
+    cat "$src" | python "$SCRIPTS_DIR/convert_admonitions.py" | python "$SCRIPTS_DIR/convert_tabs.py" > "$dst"
+    echo "Converted: $src -> $dst"
+  fi
+done
+
+echo "Batch conversion complete. Manual review required."
+```
+
+---
+
+## References
+
+- [Fern Docs Getting Started](https://buildwithfern.com/learn/docs/getting-started/overview)
+- [Fern Components Overview](https://buildwithfern.com/learn/docs/writing-content/components/overview)
+- [Fern Configuration](https://buildwithfern.com/learn/docs/configuration/site-level-settings)
+- [Current MkDocs Configuration](../mkdocs.yml)
+
+---
+
+## Appendix: Sample Migration
+
+### Before (MkDocs - columns.md excerpt)
+
+```markdown
+# Columns
+
+Columns are the fundamental building blocks in Data Designer.
+
+!!! note "The Declarative Approach"
+    Columns are **declarative specifications**. You describe *what* you want...
+
+## Column Types
+
+### 🎲 Sampler Columns
+
+Sampler columns generate data using numerical sampling...
+
+!!! tip "Conditional Sampling"
+    Samplers support **conditional parameters**...
+```
+
+### After (Fern - columns.mdx excerpt)
+
+```mdx
+# Columns
+
+Columns are the fundamental building blocks in Data Designer.
+
+<Note title="The Declarative Approach">
+Columns are **declarative specifications**. You describe *what* you want...
+</Note>
+
+## Column Types
+
+### 🎲 Sampler Columns
+
+Sampler columns generate data using numerical sampling...
+
+<Tip title="Conditional Sampling">
+Samplers support **conditional parameters**...
+</Tip>
+```
diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb
index 9a2456e69..66c194387 100644
--- a/docs/colab_notebooks/1-the-basics.ipynb
+++ b/docs/colab_notebooks/1-the-basics.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "00c21026",
+   "id": "34304cc0",
    "metadata": {},
    "source": [
     "# 🎨 Data Designer Tutorial: The Basics\n",
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ece3d9a9",
+   "id": "d407c70a",
    "metadata": {},
    "source": [
     "### 📦 Import Data Designer\n",
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "38d1b88f",
+   "id": "10a53966",
    "metadata": {},
    "source": [
     "### ⚡ Colab Setup\n",
@@ -37,7 +37,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "53321634",
+   "id": "03518092",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -48,7 +48,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5e8544d6",
+   "id": "9eb01405",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,7 +66,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4a9e48bc",
+   "id": "0620b86f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -76,7 +76,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "21b12719",
+   "id": "c56a11f5",
    "metadata": {},
    "source": [
     "### ⚙️ Initialize the Data Designer interface\n",
@@ -89,7 +89,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7d689c22",
+   "id": "4c75c742",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,7 +98,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3db3eab3",
+   "id": "ed587c27",
    "metadata": {},
    "source": [
     "### 🎛️ Define model configurations\n",
@@ -115,7 +115,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f4447bbe",
+   "id": "d276fc65",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -145,7 +145,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b5af9991",
+   "id": "ad7ee6a4",
    "metadata": {},
    "source": [
     "### 🏗️ Initialize the Data Designer Config Builder\n",
@@ -160,7 +160,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "40bdb697",
+   "id": "4f11a79c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -169,7 +169,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4dad8aa0",
+   "id": "37fec12e",
    "metadata": {},
    "source": [
     "## 🎲 Getting started with sampler columns\n",
@@ -186,7 +186,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8eecf6e8",
+   "id": "5503a564",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -195,7 +195,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e4d6a23a",
+   "id": "fb1b7bd8",
    "metadata": {},
    "source": [
     "Let's start designing our product review dataset by adding product category and subcategory columns.\n"
@@ -204,7 +204,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c3ce7276",
+   "id": "fa0a52fe",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -285,7 +285,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a8aafd2c",
+   "id": "33d10aad",
    "metadata": {},
    "source": [
     "Next, let's add samplers to generate data related to the customer and their review.\n"
@@ -294,7 +294,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3bdb3991",
+   "id": "08b43716",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -331,7 +331,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "743bb645",
+   "id": "647e6aac",
    "metadata": {},
    "source": [
     "## 🦜 LLM-generated columns\n",
@@ -346,7 +346,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "da2b9677",
+   "id": "c9062ca9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -382,7 +382,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "febed040",
+   "id": "e2a1a3ba",
    "metadata": {},
    "source": [
     "### 🔁 Iteration is key – preview the dataset!\n",
@@ -399,7 +399,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "af574e1c",
+   "id": "4f47448c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -409,7 +409,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c5cddea8",
+   "id": "5562058b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -420,7 +420,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "523da02f",
+   "id": "60c93917",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -430,7 +430,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b58b6a23",
+   "id": "93ed2108",
    "metadata": {},
    "source": [
     "### 📊 Analyze the generated data\n",
@@ -443,7 +443,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "26b9a54a",
+   "id": "4e385e37",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -453,7 +453,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ae2f9efe",
+   "id": "e246e323",
    "metadata": {},
    "source": [
     "### 🆙 Scale up!\n",
@@ -466,7 +466,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d8341c24",
+   "id": "40f65835",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -476,7 +476,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "746166bb",
+   "id": "ee916774",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -489,7 +489,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4c67992b",
+   "id": "009f7e61",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -501,7 +501,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "65da8b83",
+   "id": "9bb500b1",
    "metadata": {},
    "source": [
     "## ⏭️ Next Steps\n",
diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb
index 75e2d72db..bd1d498a1 100644
--- a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb
+++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "3d5ec9c5",
+   "id": "5b57e503",
    "metadata": {},
    "source": [
     "# 🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n",
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3813ccb2",
+   "id": "d9ae6b33",
    "metadata": {},
    "source": [
     "### 📦 Import Data Designer\n",
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "86173a51",
+   "id": "991aea10",
    "metadata": {},
    "source": [
     "### ⚡ Colab Setup\n",
@@ -39,7 +39,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6ee5a0e0",
+   "id": "1674de34",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "87742e65",
+   "id": "fb9fc94f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -68,7 +68,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "450a862c",
+   "id": "5c3a6750",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8f06cd05",
+   "id": "505190dd",
    "metadata": {},
    "source": [
     "### ⚙️ Initialize the Data Designer interface\n",
@@ -91,7 +91,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9a880c00",
+   "id": "f2a2ce1e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -100,7 +100,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d862ae5c",
+   "id": "a27febdd",
    "metadata": {},
    "source": [
     "### 🎛️ Define model configurations\n",
@@ -117,7 +117,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "84e6f76a",
+   "id": "73cb0bf0",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -147,7 +147,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "07b038aa",
+   "id": "983889d2",
    "metadata": {},
    "source": [
     "### 🏗️ Initialize the Data Designer Config Builder\n",
@@ -162,7 +162,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b7e42df4",
+   "id": "00e44ceb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -171,7 +171,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "600127e0",
+   "id": "63dfc1e6",
    "metadata": {},
    "source": [
     "### 🧑‍🎨 Designing our data\n",
@@ -198,7 +198,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ecebc077",
+   "id": "c2d1a831",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -226,7 +226,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "6f24c511",
+   "id": "15155d07",
    "metadata": {},
    "source": [
     "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n"
@@ -235,7 +235,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6cd4a4a5",
+   "id": "dc2d88b9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -344,7 +344,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3fa250c7",
+   "id": "4ecbd214",
    "metadata": {},
    "source": [
     "Next, we will use more advanced Jinja expressions to create new columns.\n",
@@ -361,7 +361,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "77895d82",
+   "id": "4d7a8993",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -414,7 +414,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "236f32c0",
+   "id": "f38b1b81",
    "metadata": {},
    "source": [
     "### 🔁 Iteration is key – preview the dataset!\n",
@@ -431,7 +431,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "719d3d7f",
+   "id": "b89146e9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -441,7 +441,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d25b2a23",
+   "id": "645d6c9f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -452,7 +452,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8cfff7c2",
+   "id": "e3b65a4d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -462,7 +462,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "acfc4317",
+   "id": "586eaf8f",
    "metadata": {},
    "source": [
     "### 📊 Analyze the generated data\n",
@@ -475,7 +475,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "02a90c0a",
+   "id": "b27be3ef",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -485,7 +485,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "60bac583",
+   "id": "a91c5a4d",
    "metadata": {},
    "source": [
     "### 🆙 Scale up!\n",
@@ -498,7 +498,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fd92ca3c",
+   "id": "5aa63151",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -508,7 +508,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ca5eded6",
+   "id": "769fd142",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -521,7 +521,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "29f4b884",
+   "id": "837585fd",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -533,7 +533,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "18914be2",
+   "id": "2a34c7fa",
    "metadata": {},
    "source": [
     "## ⏭️ Next Steps\n",
diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb
index 91c13986d..c1853c2ae 100644
--- a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb
+++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "30b0205f",
+   "id": "1d1e0d32",
    "metadata": {},
    "source": [
     "# 🎨 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n",
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "fd7184e7",
+   "id": "59a2287f",
    "metadata": {},
    "source": [
     "### 📦 Import Data Designer\n",
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f229a5f3",
+   "id": "37598099",
    "metadata": {},
    "source": [
     "### ⚡ Colab Setup\n",
@@ -39,7 +39,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3cfdeadf",
+   "id": "e21fdec6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8ad3bee9",
+   "id": "6f0f7a06",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -68,7 +68,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b7a8d675",
+   "id": "074125e6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e52b2806",
+   "id": "6a0fa5cd",
    "metadata": {},
    "source": [
     "### ⚙️ Initialize the Data Designer interface\n",
@@ -91,7 +91,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "21ad21d1",
+   "id": "025133a6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -100,7 +100,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e313e1c7",
+   "id": "a5acc512",
    "metadata": {},
    "source": [
     "### 🎛️ Define model configurations\n",
@@ -117,7 +117,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5927e232",
+   "id": "c045292e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -147,7 +147,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3fe284f0",
+   "id": "9ecf5d66",
    "metadata": {},
    "source": [
     "### 🏗️ Initialize the Data Designer Config Builder\n",
@@ -162,7 +162,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0475564b",
+   "id": "c83f40e8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -171,7 +171,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "588837c2",
+   "id": "736239fb",
    "metadata": {},
    "source": [
     "## 🏥 Prepare a seed dataset\n",
@@ -196,7 +196,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e8dfb164",
+   "id": "37d3c904",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -214,7 +214,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ca5f46ea",
+   "id": "775fb307",
    "metadata": {},
    "source": [
     "## 🎨 Designing our synthetic patient notes dataset\n",
@@ -227,7 +227,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "830810e8",
+   "id": "ac5dd110",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -308,7 +308,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cbb1e2ad",
+   "id": "34e4310f",
    "metadata": {},
    "source": [
     "### 🔁 Iteration is key – preview the dataset!\n",
@@ -325,7 +325,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f9c39104",
+   "id": "227e584e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -335,7 +335,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5750e220",
+   "id": "cddf1ec9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -346,7 +346,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b3573753",
+   "id": "9a12e8b5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -356,7 +356,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "14937896",
+   "id": "97aa4334",
    "metadata": {},
    "source": [
     "### 📊 Analyze the generated data\n",
@@ -369,7 +369,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cd3adb37",
+   "id": "11a1bb87",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -379,7 +379,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "aa4fee79",
+   "id": "857ca634",
    "metadata": {},
    "source": [
     "### 🆙 Scale up!\n",
@@ -392,7 +392,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "29024ffc",
+   "id": "42fc4f63",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -402,7 +402,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "73da6149",
+   "id": "a329c7e9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -415,7 +415,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bc2f927d",
+   "id": "c9fe3e2f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -427,7 +427,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "29990c5d",
+   "id": "9ea737dd",
    "metadata": {},
    "source": [
     "## ⏭️ Next Steps\n",
diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb
index cc10ec63d..1de070dda 100644
--- a/docs/colab_notebooks/4-providing-images-as-context.ipynb
+++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "911877e5",
+   "id": "92c252fe",
    "metadata": {},
    "source": [
     "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation"
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "c6756afd",
+   "id": "6dddba82",
    "metadata": {},
    "source": [
     "#### 📚 What you'll learn\n",
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d73b25ce",
+   "id": "c676c7f6",
    "metadata": {},
    "source": [
     "### 📦 Import Data Designer\n",
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f05ece3e",
+   "id": "ea9c162d",
    "metadata": {},
    "source": [
     "### ⚡ Colab Setup\n",
@@ -48,7 +48,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d84f4489",
+   "id": "9003e48c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,7 +59,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5e4cc2d4",
+   "id": "b84124e6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -77,7 +77,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4e4e8d45",
+   "id": "623b014f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -100,7 +100,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0cdd2a8a",
+   "id": "4fff6c6d",
    "metadata": {},
    "source": [
     "### ⚙️ Initialize the Data Designer interface\n",
@@ -113,7 +113,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4bb0ca16",
+   "id": "fab8e68a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -122,7 +122,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "bd17820d",
+   "id": "b95f0f43",
    "metadata": {},
    "source": [
     "### 🎛️ Define model configurations\n",
@@ -139,7 +139,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "301f2bd2",
+   "id": "c942693b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -162,7 +162,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ad04f82a",
+   "id": "295a9be1",
    "metadata": {},
    "source": [
     "### 🏗️ Initialize the Data Designer Config Builder\n",
@@ -177,7 +177,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ac8e2885",
+   "id": "d3895093",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -186,7 +186,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7b8aafc0",
+   "id": "18c1c8c0",
    "metadata": {},
    "source": [
     "### 🌱 Seed Dataset Creation\n",
@@ -203,7 +203,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "432edd4a",
+   "id": "4ff899e2",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -218,7 +218,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c4f94627",
+   "id": "53c29682",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -266,7 +266,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9b697311",
+   "id": "6d221329",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -284,7 +284,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bcfc97e8",
+   "id": "4e965420",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -294,7 +294,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0a3bdc13",
+   "id": "b3f30b9e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -306,7 +306,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f9665355",
+   "id": "3d2d08e3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -335,7 +335,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "6d900aaa",
+   "id": "d420e3c5",
    "metadata": {},
    "source": [
     "### 🔁 Iteration is key – preview the dataset!\n",
@@ -352,7 +352,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "51a80346",
+   "id": "653d0fed",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -362,7 +362,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ea217964",
+   "id": "8d0f2be7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -373,7 +373,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "be0e4ef0",
+   "id": "f3a438ec",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -383,7 +383,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0c75f531",
+   "id": "7842c013",
    "metadata": {},
    "source": [
     "### 📊 Analyze the generated data\n",
@@ -396,7 +396,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bcbf86d1",
+   "id": "a5544e9d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -406,7 +406,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0ab35029",
+   "id": "a2f94c56",
    "metadata": {},
    "source": [
     "### 🔎 Visual Inspection\n",
@@ -417,7 +417,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "03314ae9",
+   "id": "c868dec7",
    "metadata": {
     "lines_to_next_cell": 2
    },
@@ -441,7 +441,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e76a3e3b",
+   "id": "599cc8ad",
    "metadata": {},
    "source": [
     "### 🆙 Scale up!\n",
@@ -454,7 +454,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d16566c0",
+   "id": "6a155306",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -464,7 +464,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8e7796ba",
+   "id": "44a29388",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -477,7 +477,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "14bc1042",
+   "id": "c31562b7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -489,7 +489,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1e676330",
+   "id": "3d6e086d",
    "metadata": {},
    "source": [
     "## ⏭️ Next Steps\n",
diff --git a/docs/colab_notebooks/5-generating-images.ipynb b/docs/colab_notebooks/5-generating-images.ipynb
index ea9e0b8fa..a130f0171 100644
--- a/docs/colab_notebooks/5-generating-images.ipynb
+++ b/docs/colab_notebooks/5-generating-images.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "3b8abde3",
+   "id": "949689cb",
    "metadata": {},
    "source": [
     "# 🎨 Data Designer Tutorial: Generating Images\n",
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1da8d75f",
+   "id": "4fd2547b",
    "metadata": {},
    "source": [
     "### 📦 Import Data Designer\n",
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cc461005",
+   "id": "f7d4c635",
    "metadata": {},
    "source": [
     "### ⚡ Colab Setup\n",
@@ -46,7 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "206037bf",
+   "id": "0d8093be",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -57,7 +57,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "db5a4929",
+   "id": "0d7b2670",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -75,7 +75,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b3cba8b6",
+   "id": "3a7649ef",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -88,7 +88,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "444aa9dc",
+   "id": "44101a2b",
    "metadata": {},
    "source": [
     "### ⚙️ Initialize the Data Designer interface\n",
@@ -99,7 +99,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1932342c",
+   "id": "1368d828",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -108,7 +108,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "aa7b90c5",
+   "id": "eb48a53b",
    "metadata": {},
    "source": [
     "### 🎛️ Define an image-generation model\n",
@@ -120,7 +120,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "df7e4385",
+   "id": "74d3635f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -142,7 +142,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a1325e38",
+   "id": "9e6f63b5",
    "metadata": {},
    "source": [
     "### 🏗️ Build the config: samplers + image column\n",
@@ -153,7 +153,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "95064ed0",
+   "id": "0b3f2138",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -326,7 +326,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "c6fe0620",
+   "id": "0a83b06e",
    "metadata": {},
    "source": [
     "### 🔁 Preview: images as base64\n",
@@ -337,7 +337,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7323dce5",
+   "id": "36c1b7a4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -347,7 +347,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "510b933c",
+   "id": "9a61fbba",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -358,7 +358,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0c8c197f",
+   "id": "7ada9a40",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -367,7 +367,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4cffd205",
+   "id": "08e9ac10",
    "metadata": {},
    "source": [
     "### 🆙 Create: images saved to disk\n",
@@ -378,7 +378,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "308bf2b8",
+   "id": "3530dd98",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -388,7 +388,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "02610965",
+   "id": "a69079d9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -399,7 +399,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "189af389",
+   "id": "362943d4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -415,7 +415,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "51558182",
+   "id": "89ba40d4",
    "metadata": {},
    "source": [
     "## ⏭️ Next steps\n",
diff --git a/docs/colab_notebooks/6-editing-images-with-image-context.ipynb b/docs/colab_notebooks/6-editing-images-with-image-context.ipynb
index ddfe9d371..51dc6e477 100644
--- a/docs/colab_notebooks/6-editing-images-with-image-context.ipynb
+++ b/docs/colab_notebooks/6-editing-images-with-image-context.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "c7129daf",
+   "id": "e67d9bd6",
    "metadata": {},
    "source": [
     "# 🎨 Data Designer Tutorial: Image-to-Image Editing\n",
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "6a438ee3",
+   "id": "c8161027",
    "metadata": {},
    "source": [
     "### 📦 Import Data Designer\n",
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1a022157",
+   "id": "f43f67b0",
    "metadata": {},
    "source": [
     "### ⚡ Colab Setup\n",
@@ -47,7 +47,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "752fe3eb",
+   "id": "ea1f7730",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -58,7 +58,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "49266cc2",
+   "id": "5ea2b3bf",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -76,7 +76,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d87dfa0b",
+   "id": "2d804455",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,7 +95,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "c99ff426",
+   "id": "4986af56",
    "metadata": {},
    "source": [
     "### ⚙️ Initialize the Data Designer interface\n",
@@ -106,7 +106,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9be6231b",
+   "id": "fc1f8501",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -115,7 +115,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3e242b51",
+   "id": "7efaf70f",
    "metadata": {},
    "source": [
     "### 🎛️ Define an image-editing model\n",
@@ -131,7 +131,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "34dd8eed",
+   "id": "a37fb71a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -153,7 +153,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "98abe1a9",
+   "id": "a06fca7d",
    "metadata": {},
    "source": [
     "### 🌱 Load animal portraits from HuggingFace\n",
@@ -166,7 +166,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "233f483b",
+   "id": "685c5384",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -199,7 +199,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6b1a7b59",
+   "id": "d4f17648",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -216,7 +216,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "2956a5a6",
+   "id": "12993667",
    "metadata": {},
    "source": [
     "### 🏗️ Build the configuration\n",
@@ -233,7 +233,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f79ffa72",
+   "id": "1d9acb93",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -321,7 +321,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0cba69c0",
+   "id": "1ee3daba",
    "metadata": {},
    "source": [
     "### 🔁 Preview: quick iteration\n",
@@ -332,7 +332,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ec669ae2",
+   "id": "4e8d12a9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -342,7 +342,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "41ac4a95",
+   "id": "c83a0098",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -353,7 +353,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6f041d9d",
+   "id": "8f62881c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -362,7 +362,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "483fa24a",
+   "id": "e690f53e",
    "metadata": {
     "lines_to_next_cell": 2
    },
@@ -375,7 +375,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "dd4d7dff",
+   "id": "f1e7371a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -411,7 +411,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "af08dc6c",
+   "id": "00901a02",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -421,7 +421,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9ee15c83",
+   "id": "f832d3dd",
    "metadata": {},
    "source": [
     "### 🆙 Create at scale\n",
@@ -432,7 +432,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9f0d27f8",
+   "id": "8023a215",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -442,7 +442,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cc17414a",
+   "id": "d6c2e1b0",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -453,7 +453,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "849c03b6",
+   "id": "34787071",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -463,7 +463,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b7385f02",
+   "id": "bb907145",
    "metadata": {},
    "source": [
     "## ⏭️ Next steps\n",
diff --git a/docs/scripts/generate_colab_notebooks.py b/docs/scripts/generate_colab_notebooks.py
index b490ccb9e..c0a8819b1 100644
--- a/docs/scripts/generate_colab_notebooks.py
+++ b/docs/scripts/generate_colab_notebooks.py
@@ -6,6 +6,9 @@
 1. Injects Colab-specific setup cells (pip install, API key from secrets)
 2. Injects cells before the "Import the essentials" section
 3. Saves the result as .ipynb files in docs/colab_notebooks
+
+When --executed-dir is provided and an executed .ipynb exists there, uses it as the
+base (preserving cell outputs for display in Fern docs). Otherwise reads from source .py.
 """
 
 from __future__ import annotations
@@ -14,6 +17,7 @@
 from pathlib import Path
 
 import jupytext
+import nbformat
 from nbformat import NotebookNode
 from nbformat.v4 import new_code_cell, new_markdown_cell
 
@@ -99,30 +103,41 @@ def process_notebook(notebook: NotebookNode, source_path: Path) -> NotebookNode:
     return notebook
 
 
-def generate_colab_notebook(source_path: Path, output_dir: Path) -> Path:
+def generate_colab_notebook(
+    source_path: Path, output_dir: Path, executed_dir: Path | None = None
+) -> Path:
     """Generate a Colab-compatible notebook from a source file.
 
     Args:
         source_path: Path to the jupytext percent-format Python source file
         output_dir: Directory to save the output notebook
+        executed_dir: Optional directory with executed .ipynb files (preserves outputs)
 
     Returns:
         Path to the generated notebook
     """
-    # Read the source file using jupytext
-    notebook = jupytext.read(source_path)
+    executed_path = (
+        (executed_dir / f"{source_path.stem}.ipynb") if executed_dir else None
+    )
+
+    if executed_path and executed_path.exists():
+        notebook = nbformat.read(executed_path, as_version=4)
+    else:
+        notebook = jupytext.read(source_path)
 
-    # Process the notebook for Colab
     notebook = process_notebook(notebook, source_path)
 
-    # Determine output path
     output_path = output_dir / f"{source_path.stem}.ipynb"
-
-    # Ensure output directory exists
     output_dir.mkdir(parents=True, exist_ok=True)
 
-    # Write the notebook
-    jupytext.write(notebook, output_path, config={"metadata": {"jupytext": {"cell_metadata_filter": "-id"}}})
+    if executed_path and executed_path.exists():
+        nbformat.write(notebook, output_path)
+    else:
+        jupytext.write(
+            notebook,
+            output_path,
+            config={"metadata": {"jupytext": {"cell_metadata_filter": "-id"}}},
+        )
 
     return output_path
 
@@ -142,6 +157,12 @@ def main() -> None:
         default=Path("docs/colab_notebooks"),
         help="Directory to save Colab notebooks (default: docs/colab_notebooks)",
     )
+    parser.add_argument(
+        "--executed-dir",
+        type=Path,
+        default=None,
+        help="Directory with executed .ipynb files (preserves outputs for Fern docs)",
+    )
     parser.add_argument(
         "--files",
         nargs="*",
@@ -165,6 +186,8 @@ def main() -> None:
     print(f"📓 Generating Colab notebooks from {len(source_files)} source file(s)...")
     print(f"   Source: {args.source_dir}")
     print(f"   Output: {args.output_dir}")
+    if args.executed_dir:
+        print(f"   Executed (outputs): {args.executed_dir}")
     print()
 
     for source_path in source_files:
@@ -173,7 +196,9 @@ def main() -> None:
             continue
 
         try:
-            output_path = generate_colab_notebook(source_path, args.output_dir)
+            output_path = generate_colab_notebook(
+                source_path, args.output_dir, args.executed_dir
+            )
             print(f"✅ {source_path.name} → {output_path.name}")
         except Exception as e:
             print(f"❌ {source_path.name}: {e}")
diff --git a/fern/README.md b/fern/README.md
new file mode 100644
index 000000000..c7729e2a9
--- /dev/null
+++ b/fern/README.md
@@ -0,0 +1,185 @@
+# Fern Documentation Cheat Sheet
+
+This folder contains the Fern Docs configuration for NeMo Data Designer.
+
+## 📦 Installation
+
+```bash
+# Install Fern CLI globally
+npm install -g fern-api
+
+# Or use npx (no install needed)
+npx fern-api --version
+```
+
+## 🔍 Local Preview
+
+**Before first run (for NotebookViewer pages):**
+```bash
+make generate-colab-notebooks   # docs/colab_notebooks/*.ipynb
+make generate-fern-notebooks     # fern/components/notebooks/*.ts
+```
+
+```bash
+# From the fern/ directory
+cd fern/
+fern docs dev
+
+# Or from project root
+fern docs dev --project ./fern
+```
+
+The docs will be available at `http://localhost:3000`.
+
+See [DOCS-VS-FERN.md](DOCS-VS-FERN.md) for docs/ vs fern/ comparison. See [components/README.md](components/README.md) for custom components (Authors, MetricsTable, NotebookViewer, etc.).
+
+## 📁 Folder Structure
+
+```
+fern/
+├── docs.yml              # Global config (title, colors, versions)
+├── fern.config.json      # Fern CLI config (org name)
+├── versions/
+│   ├── v0.3.3.yml        # Navigation for v0.3.3
+│   └── v0.4.0.yml        # Navigation for v0.4.0
+├── v0.3.3/
+│   └── pages/            # MDX content for v0.3.3
+├── v0.4.0/
+│   └── pages/            # MDX content for v0.4.0
+└── assets/               # Shared images, favicons
+```
+
+## 🔄 Bumping the Version
+
+When releasing a new version (e.g., v0.5.0):
+
+### 1. Copy the previous version's content
+```bash
+cp -r fern/v0.4.0 fern/v0.5.0
+```
+
+### 2. Create the navigation file
+```bash
+cp fern/versions/v0.4.0.yml fern/versions/v0.5.0.yml
+```
+
+### 3. Update paths in `versions/v0.5.0.yml`
+Change all `../v0.4.0/pages/` → `../v0.5.0/pages/`
+
+### 4. Add the new version to `docs.yml`
+```yaml
+versions:
+  - display-name: v0.5.0
+    path: versions/v0.5.0.yml
+    slug: v0.5.0
+  - display-name: v0.4.0
+    path: versions/v0.4.0.yml
+    slug: v0.4.0
+  # ... older versions
+```
+
+### 5. Make your content changes
+Edit files in `fern/v0.5.0/pages/`
+
+## ✏️ Editing Content
+
+### Adding a new page
+
+1. Create the MDX file in the appropriate version folder:
+   ```bash
+   touch fern/v0.3.3/pages/concepts/new-feature.mdx
+   ```
+
+2. Add frontmatter:
+   ```mdx
+   ---
+   title: New Feature
+   description: Description for SEO.
+   ---
+
+   Content starts here...
+   ```
+
+3. Add to navigation in `versions/v0.3.3.yml`:
+   ```yaml
+   - page: New Feature
+     path: ../v0.3.3/pages/concepts/new-feature.mdx
+   ```
+
+### MDX Components
+
+```mdx
+# Callouts
+<Note>Informational note</Note>
+<Tip>Helpful tip</Tip>
+<Warning>Warning message</Warning>
+<Info>Info callout</Info>
+
+# Tabs
+<Tabs>
+  <Tab title="Python">
+    ```python
+    print("hello")
+    ```
+  </Tab>
+  <Tab title="JavaScript">
+    ```javascript
+    console.log("hello")
+    ```
+  </Tab>
+</Tabs>
+
+# Cards
+<Cards>
+  <Card title="Title" href="/path">
+    Description
+  </Card>
+</Cards>
+```
+
+## 🚀 Deploying
+
+```bash
+# Generate static docs (for CI/CD)
+fern generate --docs
+
+# Deploy to Fern hosting
+fern docs deploy
+```
+
+## 🔗 Useful Links
+
+- [Fern Docs](https://buildwithfern.com/learn/docs)
+- [MDX Components Reference](https://buildwithfern.com/learn/docs/components)
+- [Versioning Guide](https://buildwithfern.com/learn/docs/configuration/versions)
+- [Navigation Configuration](https://buildwithfern.com/learn/docs/configuration/navigation)
+
+## 📓 NotebookViewer Component
+
+Renders Jupyter notebooks in Fern docs with a Colab badge. Source: `docs/notebook_source/*.py` (Jupytext percent-format: `# %%` code, `# %% [markdown]` markdown).
+
+**Pipeline:** Jupytext reads `.py` → `generate_colab_notebooks` injects Colab setup → `generate-fern-notebooks` runs `ipynb-to-fern-json.py` → outputs `fern/components/notebooks/*.json` + `*.ts`. Makefile passes `-o fern/components/notebooks/$$name.json`; the script writes `.ts` alongside.
+
+**Commands:**
+```bash
+make generate-colab-notebooks   # Colab-ready .ipynb
+make generate-fern-notebooks    # Runs colab first, then converts to .ts
+make generate-fern-notebooks-with-outputs   # Execute first (needs API key), then convert
+```
+
+**Add a new tutorial:** Add `N-name.py` to `docs/notebook_source/`, run pipeline, add MDX page that imports from `@/components/notebooks/N-name`.
+
+**Files:** `NotebookViewer.tsx`, `fern/components/notebooks/*.ts` (generated), `fern/scripts/ipynb-to-fern-json.py`, `notebook-viewer.css`. Requires Fern Pro/Enterprise.
+
+## ⚠️ Common Issues
+
+### "EISDIR: illegal operation on a directory"
+- Check that all `path:` values point to `.mdx` files, not directories
+
+### Page not showing
+- Verify the page is listed in the version's navigation file
+- Check the path is correct (relative to the versions/ folder)
+
+### Version selector not appearing
+- Ensure `versions:` is defined in `docs.yml`
+- Each version needs a valid `.yml` file in `versions/`
diff --git a/fern/assets/NVIDIA_dark.svg b/fern/assets/NVIDIA_dark.svg
new file mode 100644
index 000000000..04850d9d6
--- /dev/null
+++ b/fern/assets/NVIDIA_dark.svg
@@ -0,0 +1,35 @@
+<svg version="1.1" id="Layer_1" xmlns:x="ns_extend;" xmlns:i="ns_ai;" xmlns:graph="ns_graphs;" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 585.8 108" style="enable-background:new 0 0 585.8 108;" xml:space="preserve">
+ <style type="text/css">
+  .st0{fill:#FFFFFF;}
+	.st1{fill:#76B900;}
+ </style>
+ <metadata>
+  <sfw xmlns="ns_sfw;">
+   <slices>
+   </slices>
+   <sliceSourceBounds bottomLeftOrigin="true" height="108" width="585.8" x="-330.3" y="-210.3">
+   </sliceSourceBounds>
+  </sfw>
+ </metadata>
+ <g>
+  <path class="st0" d="M578.2,86.6V85h1c0.5,0,1.3,0.1,1.3,0.7s-0.4,0.9-1.1,0.9H578.2 M578.2,87.7h0.7l1.6,2.8h1.7l-1.8-2.9
+		c0.9,0.1,1.6-0.6,1.7-1.5v-0.2c0-1.6-1.1-2-2.9-2h-2.6v6.7h1.5V87.7 M585.8,87.2c0-4-3.1-6.3-6.5-6.3s-6.5,2.3-6.5,6.3
+		s3.1,6.3,6.5,6.3S585.8,91.2,585.8,87.2 M583.9,87.2c0.1,2.5-1.9,4.7-4.4,4.8h-0.2c-2.6,0.1-4.9-2-5-4.6c-0.1-2.6,2-4.9,4.6-5
+		c2.6-0.1,4.9,2,5,4.6C583.9,87.1,583.9,87.1,583.9,87.2z">
+  </path>
+  <path class="st0" d="M347.5,20.4v70.9h20V20.4H347.5z M190,20.3v71h20.2V36.2l15.7,0.1c5.2,0,8.8,1.3,11.2,3.9
+		c3.2,3.4,4.4,8.8,4.4,18.8v32.4h19.6V52.1c0-28-17.8-31.8-35.3-31.8C226,20.3,190,20.3,190,20.3z M379.8,20.4v70.9h32.5
+		c17.3,0,23-2.9,29-9.3c4.3-4.5,7.1-14.5,7.1-25.4c0-10-2.3-18.9-6.5-24.4c-7.3-9.9-18-11.8-34-11.8
+		C407.8,20.4,379.8,20.4,379.8,20.4z M399.6,35.8h8.6c12.5,0,20.6,5.6,20.6,20.1s-8.1,20.2-20.6,20.2h-8.6V35.8z M318.7,20.4
+		l-16.7,56.2l-16-56.2h-21.6l22.8,70.9H316l23.1-70.9C339.1,20.4,318.7,20.4,318.7,20.4z M457.7,91.4h20V20.4h-20V91.4z M513.9,20.4
+		l-27.9,70.9h19.7l4.4-12.6h33.1l4.2,12.5h21.5l-28.2-70.8H513.9z M526.8,33.4L539,66.5h-24.6C514.3,66.5,526.8,33.4,526.8,33.4z">
+  </path>
+  <path class="st1" d="M60.9,32.2v-9.7c1-0.1,1.9-0.1,2.9-0.1c26.7-0.8,44.2,23,44.2,23S89.1,71.5,68.8,71.5c-2.7,0-5.3-0.4-7.9-1.3
+		V40.7c10.4,1.3,12.5,5.8,18.7,16.2l13.9-11.7c0,0-10.2-13.3-27.2-13.3C64.5,31.9,62.7,32,60.9,32.2 M60.9,0v14.5l2.9-0.2
+		c37.1-1.3,61.3,30.4,61.3,30.4S97.3,78.6,68.4,78.6c-2.5,0-5-0.2-7.5-0.7v9c2,0.2,4.1,0.4,6.2,0.4c26.9,0,46.4-13.8,65.3-30
+		c3.1,2.5,15.9,8.6,18.6,11.2c-17.9,15-59.7,27.1-83.4,27.1c-2.3,0-4.4-0.1-6.6-0.4V108h102.3V0C163.3,0,60.9,0,60.9,0z M60.9,70.3
+		v7.7C36,73.5,29.1,47.6,29.1,47.6s12-13.2,31.8-15.4v8.4h-0.1c-10.4-1.3-18.6,8.5-18.6,8.5S46.9,65.5,60.9,70.3 M16.7,46.5
+		c0,0,14.7-21.8,44.2-24v-7.9C28.2,17.2,0,44.8,0,44.8s16,46.3,60.9,50.5v-8.4C27.9,82.8,16.7,46.5,16.7,46.5z">
+  </path>
+ </g>
+</svg>
diff --git a/fern/assets/NVIDIA_light.svg b/fern/assets/NVIDIA_light.svg
new file mode 100644
index 000000000..9ee045c3e
--- /dev/null
+++ b/fern/assets/NVIDIA_light.svg
@@ -0,0 +1,34 @@
+<svg version="1.1" id="Layer_1" xmlns:x="ns_extend;" xmlns:i="ns_ai;" xmlns:graph="ns_graphs;" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 585.8 108" style="enable-background:new 0 0 585.8 108;" xml:space="preserve">
+ <style type="text/css">
+  .st0{fill:#76B900;}
+ </style>
+ <metadata>
+  <sfw xmlns="ns_sfw;">
+   <slices>
+   </slices>
+   <sliceSourceBounds bottomLeftOrigin="true" height="108" width="585.8" x="-330.3" y="-210.3">
+   </sliceSourceBounds>
+  </sfw>
+ </metadata>
+ <g>
+  <path d="M578.2,86.6V85h1c0.5,0,1.3,0.1,1.3,0.7s-0.4,0.9-1.1,0.9H578.2 M578.2,87.7h0.7l1.6,2.8h1.7l-1.8-2.9
+		c0.9,0.1,1.6-0.6,1.7-1.5v-0.2c0-1.6-1.1-2-2.9-2h-2.6v6.7h1.5V87.7 M585.8,87.2c0-4-3.1-6.3-6.5-6.3s-6.5,2.3-6.5,6.3
+		s3.1,6.3,6.5,6.3S585.8,91.2,585.8,87.2 M583.9,87.2c0.1,2.5-1.9,4.7-4.4,4.8h-0.2c-2.6,0.1-4.9-2-5-4.6c-0.1-2.6,2-4.9,4.6-5
+		c2.6-0.1,4.9,2,5,4.6C583.9,87.1,583.9,87.1,583.9,87.2z">
+  </path>
+  <path d="M347.5,20.4v70.9h20V20.4H347.5z M190,20.3v71h20.2V36.2l15.7,0.1c5.2,0,8.8,1.3,11.2,3.9c3.2,3.4,4.4,8.8,4.4,18.8v32.4
+		h19.6V52.1c0-28-17.8-31.8-35.3-31.8C226,20.3,190,20.3,190,20.3z M379.8,20.4v70.9h32.5c17.3,0,23-2.9,29-9.3
+		c4.3-4.5,7.1-14.5,7.1-25.4c0-10-2.3-18.9-6.5-24.4c-7.3-9.9-18-11.8-34-11.8C407.8,20.4,379.8,20.4,379.8,20.4z M399.6,35.8h8.6
+		c12.5,0,20.6,5.6,20.6,20.1s-8.1,20.2-20.6,20.2h-8.6V35.8z M318.7,20.4l-16.7,56.2l-16-56.2h-21.6l22.8,70.9H316l23.1-70.9
+		C339.1,20.4,318.7,20.4,318.7,20.4z M457.7,91.4h20V20.4h-20V91.4z M513.9,20.4l-27.9,70.9h19.7l4.4-12.6h33.1l4.2,12.5h21.5
+		l-28.2-70.8H513.9z M526.8,33.4L539,66.5h-24.6C514.3,66.5,526.8,33.4,526.8,33.4z">
+  </path>
+  <path class="st0" d="M60.9,32.2v-9.7c1-0.1,1.9-0.1,2.9-0.1c26.7-0.8,44.2,23,44.2,23S89.1,71.5,68.8,71.5c-2.7,0-5.3-0.4-7.9-1.3
+		V40.7c10.4,1.3,12.5,5.8,18.7,16.2l13.9-11.7c0,0-10.2-13.3-27.2-13.3C64.5,31.9,62.7,32,60.9,32.2 M60.9,0v14.5l2.9-0.2
+		c37.1-1.3,61.3,30.4,61.3,30.4S97.3,78.6,68.4,78.6c-2.5,0-5-0.2-7.5-0.7v9c2,0.2,4.1,0.4,6.2,0.4c26.9,0,46.4-13.8,65.3-30
+		c3.1,2.5,15.9,8.6,18.6,11.2c-17.9,15-59.7,27.1-83.4,27.1c-2.3,0-4.4-0.1-6.6-0.4V108h102.3V0C163.3,0,60.9,0,60.9,0z M60.9,70.3
+		v7.7C36,73.5,29.1,47.6,29.1,47.6s12-13.2,31.8-15.4v8.4h-0.1c-10.4-1.3-18.6,8.5-18.6,8.5S46.9,65.5,60.9,70.3 M16.7,46.5
+		c0,0,14.7-21.8,44.2-24v-7.9C28.2,17.2,0,44.8,0,44.8s16,46.3,60.9,50.5v-8.4C27.9,82.8,16.7,46.5,16.7,46.5z">
+  </path>
+ </g>
+</svg>
diff --git a/fern/assets/NVIDIA_symbol.svg b/fern/assets/NVIDIA_symbol.svg
new file mode 100644
index 000000000..c0507afe0
--- /dev/null
+++ b/fern/assets/NVIDIA_symbol.svg
@@ -0,0 +1,22 @@
+<svg version="1.1" id="Layer_1" xmlns:x="ns_extend;" xmlns:i="ns_ai;" xmlns:graph="ns_graphs;" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 163.3 108" style="enable-background:new 0 0 163.3 108;" xml:space="preserve">
+ <style type="text/css">
+  .st0{fill:#76B900;}
+ </style>
+ <metadata>
+  <sfw xmlns="ns_sfw;">
+   <slices>
+   </slices>
+   <sliceSourceBounds bottomLeftOrigin="true" height="108" width="163.3" x="-331" y="-210.3">
+   </sliceSourceBounds>
+  </sfw>
+ </metadata>
+ <g>
+  <path class="st0" d="M60.9,32.2v-9.7c1-0.1,1.9-0.1,2.9-0.1c26.7-0.8,44.2,23,44.2,23S89.1,71.5,68.8,71.5c-2.7,0-5.3-0.4-7.9-1.3
+		V40.7c10.4,1.3,12.5,5.8,18.7,16.2l13.9-11.7c0,0-10.2-13.3-27.2-13.3C64.5,31.9,62.7,32,60.9,32.2 M60.9,0v14.5l2.9-0.2
+		c37.1-1.3,61.3,30.4,61.3,30.4S97.3,78.6,68.4,78.6c-2.5,0-5-0.2-7.5-0.7v9c2,0.2,4.1,0.4,6.2,0.4c26.9,0,46.4-13.8,65.3-30
+		c3.1,2.5,15.9,8.6,18.6,11.2c-17.9,15-59.7,27.1-83.4,27.1c-2.3,0-4.4-0.1-6.6-0.4V108h102.3V0C163.3,0,60.9,0,60.9,0z M60.9,70.3
+		v7.7C36,73.5,29.1,47.6,29.1,47.6s12-13.2,31.8-15.4v8.4h-0.1c-10.4-1.3-18.6,8.5-18.6,8.5S46.9,65.5,60.9,70.3 M16.7,46.5
+		c0,0,14.7-21.8,44.2-24v-7.9C28.2,17.2,0,44.8,0,44.8s16,46.3,60.9,50.5v-8.4C27.9,82.8,16.7,46.5,16.7,46.5z">
+  </path>
+ </g>
+</svg>
diff --git a/fern/assets/favicon.png b/fern/assets/favicon.png
new file mode 100644
index 000000000..11c795d3e
Binary files /dev/null and b/fern/assets/favicon.png differ
diff --git a/fern/assets/images/deployment-enterprise-gateway.png b/fern/assets/images/deployment-enterprise-gateway.png
new file mode 100644
index 000000000..8b337406d
Binary files /dev/null and b/fern/assets/images/deployment-enterprise-gateway.png differ
diff --git a/fern/assets/images/deployment-library-decentralized.png b/fern/assets/images/deployment-library-decentralized.png
new file mode 100644
index 000000000..cc3a29623
Binary files /dev/null and b/fern/assets/images/deployment-library-decentralized.png differ
diff --git a/fern/assets/images/deployment-microservice.png b/fern/assets/images/deployment-microservice.png
new file mode 100644
index 000000000..e870278c3
Binary files /dev/null and b/fern/assets/images/deployment-microservice.png differ
diff --git a/fern/assets/images/design-principles-hero.png b/fern/assets/images/design-principles-hero.png
new file mode 100644
index 000000000..03e6c8d41
Binary files /dev/null and b/fern/assets/images/design-principles-hero.png differ
diff --git a/fern/assets/images/rqa-blog.png b/fern/assets/images/rqa-blog.png
new file mode 100644
index 000000000..f43fe156e
Binary files /dev/null and b/fern/assets/images/rqa-blog.png differ
diff --git a/fern/assets/recipes/code_generation/text_to_python.py b/fern/assets/recipes/code_generation/text_to_python.py
new file mode 100644
index 000000000..b5cb88d32
--- /dev/null
+++ b/fern/assets/recipes/code_generation/text_to_python.py
@@ -0,0 +1,318 @@
+from pathlib import Path
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    CodeLang,
+    CodeValidatorParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMCodeColumnConfig,
+    LLMJudgeColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    SubcategorySamplerParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="industry_sector",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Healthcare",
+                    "Finance",
+                    "Technology",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="topic",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="industry_sector",
+                values={
+                    "Healthcare": [
+                        "Electronic Health Records (EHR) Systems",
+                        "Telemedicine Platforms",
+                        "AI-Powered Diagnostic Tools",
+                    ],
+                    "Finance": [
+                        "Fraud Detection Software",
+                        "Automated Trading Systems",
+                        "Personal Finance Apps",
+                    ],
+                    "Technology": [
+                        "Cloud Computing Platforms",
+                        "Artificial Intelligence and Machine Learning Platforms",
+                        "DevOps and CI/CD Tools",
+                    ],
+                },
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="code_complexity",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Beginner",
+                    "Intermediate",
+                    "Advanced",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="code_concept",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="code_complexity",
+                values={
+                    "Beginner": [
+                        "Variables",
+                        "Data Types",
+                        "Functions",
+                        "Loops",
+                        "Classes",
+                    ],
+                    "Intermediate": [
+                        "List Comprehensions",
+                        "Object-oriented programming",
+                        "Lambda Functions",
+                        "Web frameworks",
+                        "Pandas",
+                    ],
+                    "Advanced": [
+                        "Multithreading",
+                        "Context Managers",
+                        "Generators",
+                    ],
+                },
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="instruction_phrase",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Write a function that",
+                    "Create a class that",
+                    "Implement a script",
+                    "Can you create a function",
+                    "Develop a module that",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="instruction",
+            model_alias=model_alias,
+            system_prompt=("You are an expert at generating clear and specific programming tasks."),
+            prompt=(
+                "Generate an instruction to create Python code that solves a specific problem.\n"
+                "Each instruction should begin with one of the following phrases: {{ instruction_phrase }}.\n\n"
+                "Important Guidelines:\n"
+                "* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
+                "* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
+                "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
+                "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="code_implementation",
+            model_alias=model_alias,
+            code_lang=CodeLang.PYTHON,
+            system_prompt=(
+                "You are an expert Python programmer who writes clean, efficient, and well-documented code."
+            ),
+            prompt=(
+                "Write Python code for the following instruction:\n"
+                "Instruction: {{ instruction }}\n\n"
+                "Important Guidelines:\n"
+                "* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
+                "* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
+                "* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
+                "* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{code_concept}}.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="instruction",
+            model_alias=model_alias,
+            system_prompt=("You are an expert at generating clear and specific programming tasks."),
+            prompt=(
+                "Generate an instruction to create Python code that solves a specific problem.\n"
+                "Each instruction should begin with one of the following phrases: {{ instruction_phrase }}.\n\n"
+                "Important Guidelines:\n"
+                "* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
+                "* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
+                "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
+                "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="code_implementation",
+            model_alias=model_alias,
+            code_lang=CodeLang.PYTHON,
+            system_prompt=(
+                "You are an expert Python programmer who writes clean, efficient, and well-documented code."
+            ),
+            prompt=(
+                "Write Python code for the following instruction:\n"
+                "Instruction: {{ instruction }}\n\n"
+                "Important Guidelines:\n"
+                "* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
+                "* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
+                "* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
+                "* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{ code_concept }}.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="code_judge_result",
+            model_alias=model_alias,
+            prompt=TEXT_TO_PYTHON_JUDGE_TEMPLATE,
+            scores=python_scoring,
+        )
+    )
+
+    config_builder.add_column(
+        ValidationColumnConfig(
+            name="code_validity_result",
+            validator_type=ValidatorType.CODE,
+            target_columns=["code_implementation"],
+            validator_params=CodeValidatorParams(
+                code_lang=CodeLang.PYTHON,
+            ),
+            batch_size=100,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+TEXT_TO_PYTHON_JUDGE_TEMPLATE = """\
+You are an expert in Python programming, with specialized knowledge in software engineering, data science, and algorithmic problem-solving.
+
+You think about potential flaws and errors in the code. You are a tough critic, but a fair one.
+
+Take a deep breath and use the Python Code Quality Rubric below to score the **Generated Python Code** based on the INSTRUCTIONS.
+
+#### INSTRUCTIONS
+The Generated Python Code should be a valid response to the Natural Language Prompt below
+
+Natural Language Prompt:
+{{ instruction }}
+
+Generated Python Code
+{{ code_implementation }}
+"""
+
+
+python_scoring = [
+    Score(
+        name="Relevance",
+        description="Adherence to INSTRUCTIONS and CONTEXT",
+        options={
+            4: "Perfectly meets all specified requirements.",
+            3: "Meets most requirements with minor deviations.",
+            2: "Moderate deviation from the instructions.",
+            1: "Significant deviations from the instructions.",
+            0: "Does not adhere to the instructions.",
+        },
+    ),
+    Score(
+        name="Pythonic",
+        description="Pythonic Code and Best Practices (Does the code follow Python conventions and best practices?)",
+        options={
+            4: "The code exemplifies Pythonic principles, making excellent use of Python-specific constructs, standard library modules and programming idioms; follows all relevant PEPs.",
+            3: "The code closely follows Python conventions and adheres to many best practices; good use of Python-specific constructs, standard library modules and programming idioms.",
+            2: "The code generally follows Python conventions but has room for better alignment with Pythonic practices.",
+            1: "The code loosely follows Python conventions, with several deviations from best practices.",
+            0: "The code does not follow Python conventions or best practices, using non-Pythonic approaches.",
+        },
+    ),
+    Score(
+        name="Readability",
+        description="Readability and Maintainability (Is the Python code easy to understand and maintain?)",
+        options={
+            4: (
+                "The code is excellently formatted, follows PEP 8 guidelines, is elegantly concise and clear, uses meaningful variable names, "
+                "ensuring high readability and ease of maintenance; organizes complex logic well. Docstrings are given in a Google Docstring format."
+            ),
+            3: "The code is well-formatted in the sense of code-as-documentation, making it relatively easy to understand and maintain; uses descriptive names and organizes logic clearly.",
+            2: "The code is somewhat readable with basic formatting and some comments, but improvements are needed; needs better use of descriptive names and organization.",
+            1: "The code has minimal formatting, making it hard to understand; lacks meaningful names and organization.",
+            0: "The code is unreadable, with no attempt at formatting or description.",
+        },
+    ),
+    Score(
+        name="Efficiency",
+        description="Efficiency and Performance (Is the code optimized for performance?)",
+        options={
+            4: "The solution is highly efficient, using appropriate data structures and algorithms; avoids unnecessary computations and optimizes for both time and space complexity.",
+            3: "The solution is efficient, with good use of Python's built-in functions and libraries; minor areas for optimization.",
+            2: "The solution is moderately efficient, but misses some opportunities for optimization; uses some inefficient patterns.",
+            1: "The solution shows poor efficiency, with notable performance issues; lacks effective optimization techniques.",
+            0: "The solution is highly inefficient; overlooks fundamental optimization practices, resulting in significant performance issues.",
+        },
+    ),
+]
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
diff --git a/fern/assets/recipes/code_generation/text_to_sql.py b/fern/assets/recipes/code_generation/text_to_sql.py
new file mode 100644
index 000000000..a0fbf6e51
--- /dev/null
+++ b/fern/assets/recipes/code_generation/text_to_sql.py
@@ -0,0 +1,323 @@
+from pathlib import Path
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    CodeLang,
+    CodeValidatorParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMCodeColumnConfig,
+    LLMJudgeColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    SubcategorySamplerParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="industry_sector",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=["Healthcare", "Finance", "Technology"],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="topic",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="industry_sector",
+                values={
+                    "Healthcare": [
+                        "Electronic Health Records (EHR) Systems",
+                        "Telemedicine Platforms",
+                        "AI-Powered Diagnostic Tools",
+                    ],
+                    "Finance": [
+                        "Fraud Detection Software",
+                        "Automated Trading Systems",
+                        "Personal Finance Apps",
+                    ],
+                    "Technology": [
+                        "Cloud Computing Platforms",
+                        "Artificial Intelligence and Machine Learning Platforms",
+                        "DevOps and CI/CD Tools",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="sql_complexity",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=["Beginner", "Intermediate", "Advanced"],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="sql_concept",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="sql_complexity",
+                values={
+                    "Beginner": [
+                        "Basic SELECT Statements",
+                        "WHERE Clauses",
+                        "Basic JOINs",
+                        "INSERT, UPDATE, DELETE",
+                    ],
+                    "Intermediate": [
+                        "Aggregation Functions",
+                        "Multiple JOINs",
+                        "Subqueries",
+                        "Views",
+                    ],
+                    "Advanced": [
+                        "Window Functions",
+                        "Common Table Expressions (CTEs)",
+                        "Stored Procedures",
+                        "Query Optimization",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="sql_task_type",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Data Retrieval",
+                    "Data Manipulation",
+                    "Analytics and Reporting",
+                    "Data Transformation",
+                ],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="instruction_phrase",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Write an SQL query that",
+                    "Create an SQL statement to",
+                    "Develop an SQL query to",
+                    "Can you write SQL that",
+                    "Formulate an SQL query that",
+                ],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="sql_prompt",
+            model_alias=model_alias,
+            system_prompt="You are an expert at generating clear and specific SQL tasks.",
+            prompt=SQL_PROMPT_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="sql_context",
+            model_alias=model_alias,
+            code_lang=CodeLang.SQL_ANSI,
+            system_prompt=(
+                "You are an expert SQL database designer who creates clean, efficient, and "
+                "well-structured database schemas."
+            ),
+            prompt=SQL_CONTEXT_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="sql",
+            model_alias=model_alias,
+            code_lang=CodeLang.SQL_ANSI,
+            system_prompt="You are an expert SQL programmer who writes clean, efficient, and well-structured queries.",
+            prompt=SQL_CODE_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        ValidationColumnConfig(
+            name="code_validity_result",
+            validator_type=ValidatorType.CODE,
+            target_columns=["sql"],
+            validator_params=CodeValidatorParams(
+                code_lang=CodeLang.SQL_ANSI,
+            ),
+            batch_size=100,
+        )
+    )
+
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="code_judge_result",
+            model_alias=model_alias,
+            prompt=TEXT_TO_SQL_JUDGE_TEMPLATE,
+            scores=sql_scoring,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+SQL_PROMPT_TEXT = (
+    "Generate an instruction to create SQL code that solves a specific problem.\n"
+    "Each instruction should begin with one of the following phrases: {{instruction_phrase}}.\n\n"
+    "Important Guidelines:\n"
+    "* Industry Relevance: Ensure the instruction pertains to the {{industry_sector}} sector and {{topic}} topic.\n"
+    "* SQL Complexity: Tailor the instruction to the {{sql_complexity}} level. Utilize relevant {{sql_concept}} "
+    "where appropriate to match the complexity level.\n"
+    "* Task Type: The instruction should involve a {{sql_task_type}} task.\n"
+    "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to "
+    "understand the requirements without being overly verbose.\n"
+    "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
+)
+
+SQL_CONTEXT_TEXT = (
+    "Generate the SQL for creating database tables that would be relevant for the following instruction:\n"
+    "Instruction: {{sql_prompt}}\n\n"
+    "Important Guidelines:\n"
+    "* Relevance: Ensure all tables are directly related to the {{industry_sector}} sector and {{topic}} topic.\n"
+    "* Completeness: Include all essential columns with appropriate data types, primary/foreign keys, and necessary constraints.\n"
+    "* Realism: Use realistic table structures typical for the specified industry.\n"
+    "* Executable SQL: Provide complete CREATE TABLE statements that can be run without modification.\n"
+    "* Consistency: Use consistent naming conventions (e.g., snake_case for table and column names).\n"
+    "* Sample Data: Include INSERT statements with sample data that makes sense for the tables (at least 5-10 rows per table)."
+)
+
+SQL_CODE_TEXT = (
+    "Write SQL code for the following instruction based on the provided database context:\n"
+    "Instruction: {{sql_prompt}}\n\n"
+    "Database Context:\n"
+    "{{sql_context}}\n\n"
+    "Important Guidelines:\n"
+    "* Code Quality: Your SQL should be clean, complete, self-contained and accurate.\n"
+    "* Code Validity: Please ensure that your SQL code is executable and does not contain any errors.\n"
+    "* Context: Base your query on the provided database context. Only reference tables and columns that "
+    "exist in the context.\n"
+    "* Complexity & Concepts: The SQL should be written at a {{sql_complexity}} level, making use of "
+    "concepts such as {{sql_concept}}.\n"
+    "* Task Type: Ensure your solution implements the appropriate {{sql_task_type}} operation.\n"
+    "* Comments: Include brief comments explaining the key parts of your query.\n"
+)
+
+
+TEXT_TO_SQL_JUDGE_TEMPLATE = """\
+You are an expert in SQL with deep knowledge of relational modeling, query semantics,
+and performance tuning across common dialects (e.g., PostgreSQL, MySQL, SQLite, SQL Server).
+You think critically about correctness, readability, and efficiency.
+
+Use the SQL Query Quality Rubric below to score the **Generated SQL Query** based on the INSTRUCTIONS.
+
+#### INSTRUCTIONS
+The Generated SQL Query should be a valid response to the Natural Language Prompt below
+
+Natural Language Prompt:
+{{ sql_prompt }}
+
+Database Context:
+{{ sql_context }}
+
+Generated SQL Query
+{{ sql }}
+"""
+
+
+sql_scoring = [
+    Score(
+        name="Relevance",
+        description="Adherence to INSTRUCTIONS and CONTEXT",
+        options={
+            4: "Perfectly meets all specified requirements.",
+            3: "Meets most requirements with minor deviations.",
+            2: "Moderate deviation from the instructions.",
+            1: "Significant deviations from the instructions.",
+            0: "Does not adhere to the instructions.",
+        },
+    ),
+    Score(
+        name="SQL Correctness",
+        description="Syntax and semantic correctness; returns the intended result",
+        options={
+            4: "Valid SQL with correct joins, filters, grouping/aggregation, and NULL handling; produces the intended result set under the stated/implicit dialect.",
+            3: "Generally correct with minor issues (e.g., edge-case NULLs, minor grouping detail) but still likely yields the intended result.",
+            2: "Partially correct; noticeable semantic mistakes (joins, grouping, filters) that may change results or fail in edge cases.",
+            1: "Largely incorrect; major semantic or syntactic errors likely causing failure or wrong results.",
+            0: "Invalid SQL or unrelated to the task; will not run or cannot produce a meaningful result.",
+        },
+    ),
+    Score(
+        name="Readability",
+        description="Formatting, clarity, and maintainability",
+        options={
+            4: "Cleanly formatted (keywords/clauses consistently styled), clear structure (CTEs/subqueries where helpful), meaningful table/column aliases, and concise.",
+            3: "Generally readable with consistent formatting and understandable aliases; could be organized slightly better.",
+            2: "Somewhat readable but inconsistent formatting or confusing aliasing; structure is harder to follow.",
+            1: "Poorly formatted and hard to read; unclear structure and aliasing.",
+            0: "Unreadable or chaotic; no meaningful structure or styling.",
+        },
+    ),
+    Score(
+        name="Efficiency",
+        description="Query performance best practices",
+        options={
+            4: "Uses sargable predicates, appropriate joins, selective filters early, avoids SELECT *, unnecessary DISTINCT, and wasteful subqueries; likely to use indexes effectively.",
+            3: "Mostly efficient; minor opportunities for improvement (e.g., simplifying expressions, reducing data early).",
+            2: "Moderate inefficiencies (e.g., non-sargable filters, unnecessary nested subqueries, broad SELECT *).",
+            1: "Notably inefficient patterns likely causing large scans or poor plans.",
+            0: "Highly inefficient; ignores basic best practices and likely to perform very poorly.",
+        },
+    ),
+]
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
diff --git a/fern/assets/recipes/mcp_and_tooluse/basic_mcp.py b/fern/assets/recipes/mcp_and_tooluse/basic_mcp.py
new file mode 100644
index 000000000..150e26d92
--- /dev/null
+++ b/fern/assets/recipes/mcp_and_tooluse/basic_mcp.py
@@ -0,0 +1,240 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "data-designer",
+#     "mcp",
+# ]
+# ///
+"""Basic MCP Recipe: Simple Tool Use Example
+
+This recipe demonstrates the minimal MCP tool-calling workflow with Data Designer:
+
+1) Define a simple MCP server with basic tools (get_fact, add_numbers)
+2) Configure Data Designer to use the MCP tools
+3) Generate data that requires tool calls to complete
+
+Prerequisites:
+    - OPENAI_API_KEY environment variable for OpenAI provider model aliases.
+    - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases (default model alias is "nvidia-text").
+
+Run:
+    # Basic usage (generates 2 records by default)
+    uv run basic_mcp.py
+
+    # For help message and available options
+    uv run basic_mcp.py --help
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+from mcp.server.fastmcp import FastMCP
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+MCP_SERVER_NAME = "basic-tools"
+
+
+# =============================================================================
+# MCP Server Definition
+# =============================================================================
+
+mcp_server = FastMCP(MCP_SERVER_NAME)
+
+# Simple knowledge base for the get_fact tool
+FACTS = {
+    "python": "Python was created by Guido van Rossum and first released in 1991.",
+    "earth": "Earth is the third planet from the Sun and has one natural satellite, the Moon.",
+    "water": "Water (H2O) freezes at 0°C (32°F) and boils at 100°C (212°F) at sea level.",
+    "light": "The speed of light in a vacuum is approximately 299,792 kilometers per second.",
+}
+
+
+@mcp_server.tool()
+def get_fact(topic: str) -> str:
+    """Get a fact about a topic from the knowledge base.
+
+    Args:
+        topic: The topic to look up (e.g., "python", "earth", "water", "light")
+
+    Returns:
+        A fact about the topic, or an error message if not found.
+    """
+    topic_lower = topic.lower()
+    if topic_lower in FACTS:
+        return json.dumps({"topic": topic, "fact": FACTS[topic_lower]})
+    return json.dumps({"error": f"No fact found for topic: {topic}", "available_topics": list(FACTS.keys())})
+
+
+@mcp_server.tool()
+def add_numbers(a: float, b: float) -> str:
+    """Add two numbers together.
+
+    Args:
+        a: First number
+        b: Second number
+
+    Returns:
+        The sum of the two numbers.
+    """
+    result = a + b
+    return json.dumps({"a": a, "b": b, "sum": result})
+
+
+@mcp_server.tool()
+def list_topics() -> str:
+    """List all available topics in the knowledge base.
+
+    Returns:
+        List of available topics.
+    """
+    return json.dumps({"topics": list(FACTS.keys())})
+
+
+# =============================================================================
+# Data Designer Configuration
+# =============================================================================
+
+
+def build_config(model_alias: str, provider_name: str) -> dd.DataDesignerConfigBuilder:
+    """Build the Data Designer configuration for basic tool use."""
+    tool_config = dd.ToolConfig(
+        tool_alias="basic-tools",
+        providers=[provider_name],
+        allow_tools=["get_fact", "add_numbers", "list_topics"],
+        max_tool_call_turns=5,
+        timeout_sec=30.0,
+    )
+
+    config_builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
+
+    # Add a seed column with topics to look up
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="topic",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["python", "earth", "water", "light"]),
+        )
+    )
+
+    # Add a column that uses the get_fact tool
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="fact_response",
+            model_alias=model_alias,
+            prompt=(
+                "Use the get_fact tool to look up information about '{{ topic }}', "
+                "then provide a one-sentence summary of what you learned."
+            ),
+            system_prompt="You must call the get_fact tool before answering. Only use information from tool results.",
+            tool_alias="basic-tools",
+            with_trace=dd.TraceType.ALL_MESSAGES,
+        )
+    )
+
+    # Add a column that uses the add_numbers tool
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="num_a",
+            sampler_type=dd.SamplerType.UNIFORM,
+            params=dd.UniformSamplerParams(low=1, high=100),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="num_b",
+            sampler_type=dd.SamplerType.UNIFORM,
+            params=dd.UniformSamplerParams(low=1, high=100),
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="math_response",
+            model_alias=model_alias,
+            prompt=(
+                "Use the add_numbers tool to calculate {{ num_a }} + {{ num_b }}, "
+                "then report the result in a complete sentence."
+            ),
+            system_prompt="You must call the add_numbers tool to perform the calculation. Report the exact result.",
+            tool_alias="basic-tools",
+            with_trace=dd.TraceType.ALL_MESSAGES,
+        )
+    )
+
+    return config_builder
+
+
+# =============================================================================
+# Main Entry Points
+# =============================================================================
+
+
+def serve() -> None:
+    """Run the MCP server (called when launched as subprocess by Data Designer)."""
+    mcp_server.run()
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Basic MCP tool use example with Data Designer.")
+    subparsers = parser.add_subparsers(dest="command")
+
+    # 'serve' subcommand for running the MCP server
+    subparsers.add_parser("serve", help="Run the MCP server (used by Data Designer)")
+
+    # Default command arguments (demo mode)
+    parser.add_argument("--model-alias", type=str, default="nvidia-text", help="Model alias to use for generation")
+    parser.add_argument("--num-records", type=int, default=2, help="Number of records to generate")
+    # For compatibility with Makefile test-run-recipes target (ignored in demo mode)
+    parser.add_argument("--artifact-path", type=str, default=None, help=argparse.SUPPRESS)
+
+    return parser.parse_args()
+
+
+def main() -> None:
+    """Main entry point for the demo."""
+    args = parse_args()
+
+    # Handle 'serve' subcommand
+    if args.command == "serve":
+        serve()
+        return
+
+    # Demo mode: run Data Designer with the MCP server
+    if os.environ.get("NVIDIA_API_KEY") is None and args.model_alias.startswith("nvidia"):
+        raise RuntimeError("NVIDIA_API_KEY must be set when using NVIDIA model aliases.")
+
+    # Configure MCP provider to run via stdio transport (local subprocess)
+    mcp_provider = dd.LocalStdioMCPProvider(
+        name=MCP_SERVER_NAME,
+        command=sys.executable,
+        args=[str(Path(__file__).resolve()), "serve"],
+    )
+
+    config_builder = build_config(
+        model_alias=args.model_alias,
+        provider_name=MCP_SERVER_NAME,
+    )
+
+    data_designer = DataDesigner(mcp_providers=[mcp_provider])
+    preview_results = data_designer.preview(config_builder, num_records=args.num_records)
+
+    # Display results
+    print("\n" + "=" * 60)
+    print("GENERATED DATA")
+    print("=" * 60)
+    preview_results.display_sample_record()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fern/assets/recipes/mcp_and_tooluse/pdf_qa.py b/fern/assets/recipes/mcp_and_tooluse/pdf_qa.py
new file mode 100644
index 000000000..024ed4f37
--- /dev/null
+++ b/fern/assets/recipes/mcp_and_tooluse/pdf_qa.py
@@ -0,0 +1,572 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "data-designer",
+#     "mcp",
+#     "bm25s",
+#     "pymupdf",
+#     "rich",
+# ]
+# ///
+"""MCP + Tool Use Recipe: Document Q&A with BM25S Lexical Search
+
+This recipe demonstrates an end-to-end MCP tool-calling workflow:
+
+1) Load one or more PDF documents from URLs or local paths.
+2) Index them with BM25S for fast lexical search.
+3) Use Data Designer tool calls (`search_docs`) to generate grounded Q&A pairs.
+
+Prerequisites:
+    - OPENAI_API_KEY environment variable for OpenAI provider model aliases.
+    - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases (default model alias is "nvidia-reasoning").
+
+Run:
+    # Basic usage with default sample PDF (generates 4 Q&A pairs)
+    uv run pdf_qa.py
+
+    # For help message and available options
+    uv run pdf_qa.py --help
+"""
+
+from __future__ import annotations
+
+import argparse
+import io
+import json
+import os
+import sys
+from pathlib import Path
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+import bm25s
+import fitz
+from mcp.server.fastmcp import FastMCP
+from pydantic import BaseModel, Field
+
+import data_designer.config as dd
+from data_designer.config.preview_results import PreviewResults
+from data_designer.interface import DataDesigner
+
+DEFAULT_PDF_URL = "https://research.nvidia.com/labs/nemotron/files/NVIDIA-Nemotron-3-Nano-Technical-Report.pdf"
+MCP_SERVER_NAME = "doc-bm25-search"
+
+# Global state for the BM25 index (populated at server startup)
+_bm25_retriever: bm25s.BM25 | None = None
+_corpus: list[dict[str, str]] = []
+
+
+class QAPair(BaseModel):
+    question: str = Field(..., description="A question grounded in the document text.")
+    answer: str = Field(..., description="A concise answer grounded in the supporting passage.")
+    supporting_passage: str = Field(
+        ..., description="A short excerpt (2-4 sentences) copied from the search result that supports the answer."
+    )
+    citation: str = Field(
+        ..., description="The citation (e.g. source url, page number, etc) of the supporting passage."
+    )
+
+
+class TopicList(BaseModel):
+    topics: list[str] = Field(
+        ...,
+        description="High-level topics covered by the document.",
+    )
+
+
+def _is_url(path_or_url: str) -> bool:
+    """Check if the given string is a URL."""
+    parsed = urlparse(path_or_url)
+    return parsed.scheme in ("http", "https")
+
+
+def _get_source_name(path_or_url: str) -> str:
+    """Extract a human-readable source name from a path or URL."""
+    if _is_url(path_or_url):
+        parsed = urlparse(path_or_url)
+        return Path(parsed.path).name or parsed.netloc
+    return Path(path_or_url).name
+
+
+def extract_pdf_text(path_or_url: str) -> list[dict[str, str]]:
+    """Extract text from a PDF file or URL, returning a list of passages with metadata.
+
+    Each passage corresponds to a page from the PDF.
+
+    Args:
+        path_or_url: Either a local file path or a URL to a PDF document.
+            URLs are streamed directly into memory without saving to disk.
+
+    Returns:
+        List of passage dictionaries with 'text', 'page', and 'source' keys.
+    """
+    passages: list[dict[str, str]] = []
+    source_name = _get_source_name(path_or_url)
+
+    if _is_url(path_or_url):
+        with urlopen(path_or_url) as response:
+            pdf_bytes = response.read()
+        doc = fitz.open(stream=io.BytesIO(pdf_bytes), filetype="pdf")
+    else:
+        doc = fitz.open(path_or_url)
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        text = page.get_text("text").strip()
+        if text:
+            passages.append(
+                {
+                    "text": text,
+                    "page": str(page_num + 1),
+                    "source": source_name,
+                }
+            )
+
+    doc.close()
+    return passages
+
+
+def build_bm25_index(passages: list[dict[str, str]]) -> bm25s.BM25:
+    """Build a BM25S index from the extracted passages."""
+    corpus_texts = [p["text"] for p in passages]
+    corpus_tokens = bm25s.tokenize(corpus_texts, stopwords="en")
+
+    retriever = bm25s.BM25()
+    retriever.index(corpus_tokens)
+
+    return retriever
+
+
+def initialize_search_index(pdf_sources: list[str]) -> None:
+    """Load PDFs from paths/URLs and build the BM25 index.
+
+    Args:
+        pdf_sources: List of PDF file paths or URLs to index.
+    """
+    global _bm25_retriever, _corpus
+
+    _corpus = []
+    for source in pdf_sources:
+        passages = extract_pdf_text(source)
+        _corpus.extend(passages)
+
+    if _corpus:
+        _bm25_retriever = build_bm25_index(_corpus)
+
+
+# MCP Server Definition
+mcp_server = FastMCP(MCP_SERVER_NAME)
+
+
+@mcp_server.tool()
+def search_docs(query: str, limit: int = 5, document: str = "", page: str = "") -> str:
+    """Search through documents using BM25 lexical search.
+
+    BM25 is a keyword-based retrieval algorithm that matches exact terms. For best results:
+
+    - Use specific keywords, not full questions (e.g., "configuration parameters timeout" not "How do I set the timeout?")
+    - Include domain-specific terms that would appear in the source text
+    - Combine multiple relevant terms to narrow results (e.g., "installation requirements dependencies")
+    - Try synonyms or alternative phrasings if initial searches return poor results
+    - Avoid filler words and focus on content-bearing terms
+
+    Examples:
+        Good queries:
+        - "error handling retry mechanism"
+        - "authentication token expiration"
+        - "memory allocation buffer size"
+
+        Less effective queries:
+        - "What are the error handling options?"
+        - "Tell me about authentication"
+        - "How does memory work?"
+
+    Args:
+        query: Search query string - use specific keywords for best results
+        limit: Maximum number of results to return (default: 5)
+        document: Optional document source name to restrict search to (use list_docs to see available documents)
+        page: Optional page number to restrict search to (requires document to be specified)
+
+    Returns:
+        JSON string with search results including text excerpts and page numbers
+    """
+    global _bm25_retriever, _corpus
+
+    if _bm25_retriever is None or not _corpus:
+        return json.dumps({"error": "Search index not initialized"})
+
+    # Validate that page requires document
+    if page and not document:
+        return json.dumps({"error": "The 'page' parameter requires 'document' to be specified"})
+
+    query_tokens = bm25s.tokenize([query], stopwords="en")
+
+    # When filtering, retrieve more results to ensure we have enough after filtering
+    retrieve_limit = len(_corpus) if (document or page) else limit
+    results, scores = _bm25_retriever.retrieve(query_tokens, k=min(retrieve_limit, len(_corpus)))
+
+    search_results: list[dict[str, str | float]] = []
+    for i in range(results.shape[1]):
+        doc_idx = results[0, i]
+        score = float(scores[0, i])
+
+        if score <= 0:
+            continue
+
+        passage = _corpus[doc_idx]
+
+        # Apply document filter
+        if document and passage["source"] != document:
+            continue
+
+        # Apply page filter
+        if page and passage["page"] != page:
+            continue
+
+        search_results.append(
+            {
+                "text": passage["text"][:2000],
+                "page": passage["page"],
+                "source": passage["source"],
+                "score": round(score, 4),
+                "url": f"file://{passage['source']}#page={passage['page']}",
+            }
+        )
+
+        # Stop once we have enough results
+        if len(search_results) >= limit:
+            break
+
+    return json.dumps({"results": search_results, "query": query, "total": len(search_results)})
+
+
+@mcp_server.tool()
+def list_docs() -> str:
+    """List all documents in the search index with their page counts.
+
+    Returns:
+        JSON string with a list of documents, each containing the source name and page count.
+    """
+    global _corpus
+
+    if not _corpus:
+        return json.dumps({"error": "Search index not initialized", "documents": []})
+
+    doc_pages: dict[str, set[str]] = {}
+    for passage in _corpus:
+        source = passage["source"]
+        page = passage["page"]
+        if source not in doc_pages:
+            doc_pages[source] = set()
+        doc_pages[source].add(page)
+
+    documents = [{"source": source, "page_count": len(pages)} for source, pages in sorted(doc_pages.items())]
+
+    return json.dumps({"documents": documents, "total_documents": len(documents)})
+
+
+def build_config(model_alias: str, provider_name: str) -> dd.DataDesignerConfigBuilder:
+    """Build the Data Designer configuration for document Q&A generation."""
+    tool_config = dd.ToolConfig(
+        tool_alias="doc-search",
+        providers=[provider_name],
+        allow_tools=["list_docs", "search_docs"],
+        max_tool_call_turns=100,
+        timeout_sec=30.0,
+    )
+
+    config_builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="seed_id",
+            sampler_type=dd.SamplerType.UUID,
+            params=dd.UUIDSamplerParams(),
+            drop=True,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="topic_candidates",
+            model_alias=model_alias,
+            prompt="Extract a high-level list of all topics covered by documents our knowledge base.",
+            system_prompt=(
+                "You must call tools before answering. "
+                "Do not use outside knowledge; only use tool results. "
+                "You can use as many tool calls as required to answer the user query."
+            ),
+            output_format=TopicList,
+            tool_alias="doc-search",
+            with_trace=dd.TraceType.ALL_MESSAGES,  # Enable trace to capture tool call history
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="topic",
+            expr="{{ topic_candidates.topics | random }}",
+        )
+    )
+
+    qa_prompt = """\
+Create a question-answer pair on the topic "{{topic}}", with supporting text and citation.
+The supporting_passage must be a 2-4 sentence excerpt copied from the tool result that demonstrates
+why the answer is correct.
+"""
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="qa_pair",
+            model_alias=model_alias,
+            prompt=qa_prompt,
+            system_prompt=(
+                "You must call tools before answering. "
+                "Do not use outside knowledge; only use tool results. "
+                "You can use as many tool calls as required to answer the user query."
+            ),
+            output_format=QAPair,
+            tool_alias="doc-search",
+            with_trace=dd.TraceType.ALL_MESSAGES,  # Enable trace to capture tool call history
+            extract_reasoning_content=True,
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="question",
+            expr="{{ qa_pair.question }}",
+        )
+    )
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="answer",
+            expr="{{ qa_pair.answer }}",
+        )
+    )
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="supporting_passage",
+            expr="{{ qa_pair.supporting_passage }}",
+        )
+    )
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="citation",
+            expr="{{ qa_pair.citation }}",
+        )
+    )
+    return config_builder
+
+
+def generate_preview(
+    config_builder: dd.DataDesignerConfigBuilder,
+    num_records: int,
+    mcp_provider: dd.LocalStdioMCPProvider,
+) -> PreviewResults:
+    """Run Data Designer preview with the MCP provider."""
+    data_designer = DataDesigner(mcp_providers=[mcp_provider])
+    # Traces are enabled per-column via with_trace=True on LLM column configs
+    return data_designer.preview(config_builder, num_records=num_records)
+
+
+def _truncate(text: str, max_length: int = 100) -> str:
+    """Truncate text to max_length, adding ellipsis if needed."""
+    text = text.replace("\n", " ").strip()
+    if len(text) <= max_length:
+        return text
+    return text[: max_length - 3] + "..."
+
+
+def _summarize_content(content: object) -> str:
+    """Summarize ChatML-style content blocks for display."""
+    if isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if isinstance(block, dict):
+                block_type = block.get("type", "block")
+                if block_type == "text":
+                    text = str(block.get("text", ""))
+                    if text:
+                        parts.append(text)
+                elif block_type == "image_url":
+                    parts.append("[image]")
+                else:
+                    parts.append(f"[{block_type}]")
+            else:
+                parts.append(str(block))
+        return " ".join(parts)
+    return str(content)
+
+
+def _format_trace_step(msg: dict[str, object]) -> str:
+    """Format a single trace message as a concise one-liner."""
+    role = msg.get("role", "unknown")
+    content = _summarize_content(msg.get("content", ""))
+    reasoning = msg.get("reasoning_content")
+    tool_calls = msg.get("tool_calls")
+    tool_call_id = msg.get("tool_call_id")
+
+    if role == "system":
+        return f"[bold cyan]system[/]({_truncate(str(content))})"
+
+    if role == "user":
+        return f"[bold green]user[/]({_truncate(str(content))})"
+
+    if role == "assistant":
+        parts: list[str] = []
+        if reasoning:
+            parts.append(f"[bold magenta]reasoning[/]({_truncate(str(reasoning))})")
+        if tool_calls and isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if isinstance(tc, dict):
+                    func = tc.get("function", {})
+                    if isinstance(func, dict):
+                        name = func.get("name", "?")
+                        args = func.get("arguments", "")
+                        parts.append(f"[bold yellow]tool_call[/]({name}: {_truncate(str(args), 60)})")
+        if content:
+            parts.append(f"[bold blue]content[/]({_truncate(str(content))})")
+        return "\n".join(parts) if parts else "[bold blue]assistant[/](empty)"
+
+    if role == "tool":
+        tool_id = str(tool_call_id or "?")[:8]
+        return f"[bold red]tool_response[/]([{tool_id}] {_truncate(str(content), 80)})"
+
+    return f"[dim]{role}[/]({_truncate(str(content))})"
+
+
+def _display_column_trace(column_name: str, trace: list[dict[str, object]]) -> None:
+    """Display a trace for a single column using Rich Panel."""
+    from rich.console import Console
+    from rich.panel import Panel
+
+    console = Console()
+    lines: list[str] = []
+
+    for msg in trace:
+        if not isinstance(msg, dict):
+            continue
+        formatted = _format_trace_step(msg)
+        for line in formatted.split("\n"):
+            lines.append(f"  * {line}")
+
+    trace_content = "\n".join(lines) if lines else "  (no trace messages)"
+    panel = Panel(
+        trace_content,
+        title=f"[bold]Column Trace: {column_name}[/]",
+        border_style="blue",
+        padding=(0, 1),
+    )
+    console.print(panel)
+
+
+def display_preview_record(preview_results: PreviewResults) -> None:
+    """Display a sample record from the preview results with trace visualization."""
+    from rich.console import Console
+
+    console = Console()
+    dataset = preview_results.dataset
+
+    if dataset is None or dataset.empty:
+        console.print("[red]No preview records generated.[/]")
+        return
+
+    record = dataset.iloc[0].to_dict()
+
+    # Find trace columns and their base column names
+    trace_columns = [col for col in dataset.columns if col.endswith("__trace")]
+
+    # Display non-trace columns as summary
+    non_trace_record = {k: v for k, v in record.items() if not k.endswith("__trace")}
+    console.print("\n[bold]Sample Record (data columns):[/]")
+    console.print(json.dumps(non_trace_record, indent=2, default=str))
+
+    # Display each trace column in its own panel
+    if trace_columns:
+        console.print("\n[bold]Generation Traces:[/]")
+        for trace_col in trace_columns:
+            base_name = trace_col.replace("__trace", "")
+            trace_data = record.get(trace_col)
+            if isinstance(trace_data, list):
+                _display_column_trace(base_name, trace_data)
+
+    preview_results.display_sample_record()
+
+
+def serve() -> None:
+    """Run the MCP server (called when launched as subprocess by Data Designer)."""
+    pdf_sources_json = os.environ.get("PDF_SOURCES", "[]")
+    pdf_sources = json.loads(pdf_sources_json)
+    if not pdf_sources:
+        pdf_sources = [DEFAULT_PDF_URL]
+    initialize_search_index(pdf_sources)
+    mcp_server.run()
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Generate document Q&A pairs using MCP tool calls with BM25S search.")
+    subparsers = parser.add_subparsers(dest="command")
+
+    # 'serve' subcommand for running the MCP server
+    subparsers.add_parser("serve", help="Run the MCP server (used by Data Designer)")
+
+    # Default command arguments (demo mode)
+    parser.add_argument("--model-alias", type=str, default="nvidia-reasoning", help="Model alias to use for generation")
+    parser.add_argument("--num-records", type=int, default=4, help="Number of Q&A pairs to generate")
+    parser.add_argument(
+        "--pdf",
+        type=str,
+        action="append",
+        dest="pdfs",
+        metavar="PATH_OR_URL",
+        help="PDF file path or URL to index (can be specified multiple times). Defaults to a sample PDF if not provided.",
+    )
+    # For compatibility with Makefile test-run-recipes target (ignored in demo mode)
+    parser.add_argument("--artifact-path", type=str, default=None, help=argparse.SUPPRESS)
+
+    return parser.parse_args()
+
+
+def main() -> None:
+    """Main entry point for the demo."""
+    args = parse_args()
+
+    # Handle 'serve' subcommand
+    if args.command == "serve":
+        serve()
+        return
+
+    # Demo mode: run Data Designer with the BM25S MCP server
+    if os.environ.get("NVIDIA_API_KEY") is None and args.model_alias.startswith("nvidia"):
+        raise RuntimeError("NVIDIA_API_KEY must be set when using NVIDIA model aliases.")
+
+    # Use provided PDFs or fall back to default
+    pdf_sources = args.pdfs if args.pdfs else [DEFAULT_PDF_URL]
+
+    # Configure MCP provider to run via stdio transport (local subprocess)
+    mcp_provider = dd.LocalStdioMCPProvider(
+        name=MCP_SERVER_NAME,
+        command=sys.executable,
+        args=[str(Path(__file__).resolve()), "serve"],
+        env={"PDF_SOURCES": json.dumps(pdf_sources)},
+    )
+
+    config_builder = build_config(
+        model_alias=args.model_alias,
+        provider_name=MCP_SERVER_NAME,
+    )
+
+    preview_results = generate_preview(
+        config_builder=config_builder,
+        num_records=args.num_records,
+        mcp_provider=mcp_provider,
+    )
+
+    display_preview_record(preview_results)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fern/assets/recipes/qa_and_chat/multi_turn_chat.py b/fern/assets/recipes/qa_and_chat/multi_turn_chat.py
new file mode 100644
index 000000000..b4debed74
--- /dev/null
+++ b/fern/assets/recipes/qa_and_chat/multi_turn_chat.py
@@ -0,0 +1,204 @@
+from pathlib import Path
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMJudgeColumnConfig,
+    LLMStructuredColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    SubcategorySamplerParams,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="domain",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=["Tech Support", "Personal Finances", "Educational Guidance"]),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="topic",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="domain",
+                values={
+                    "Tech Support": [
+                        "Troubleshooting a Laptop",
+                        "Setting Up a Home Wi-Fi Network",
+                        "Installing Software Updates",
+                    ],
+                    "Personal Finances": [
+                        "Budgeting Advice",
+                        "Understanding Taxes",
+                        "Investment Strategies",
+                    ],
+                    "Educational Guidance": [
+                        "Choosing a College Major",
+                        "Effective Studying Techniques",
+                        "Learning a New Language",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="complexity",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=["Basic", "Intermediate", "Advanced"]),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="conversation_length",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=[2, 4, 6, 8]),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="user_mood",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=["happy", "silly", "sarcastic", "combative", "disappointed", "toxic"]),
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="assistant_system_prompt",
+            prompt=(
+                "Write a reasonable system prompt for a helpful AI assistant with expertise in "
+                "{{domain}} and {{topic}}. The AI assistant must not engage in harmful behaviors."
+            ),
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="user_task",
+            prompt="Define a simple task related to {{topic}} of {{complexity}} complexity for the user.",
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        LLMStructuredColumnConfig(
+            name="conversation",
+            prompt=(
+                "<task>\n{{user_task}}\n</task>\n\n"
+                "<system_prompt>{{assistant_system_prompt}}</system_prompt>\n\n"
+                "Generate a conversation between a user and an AI assistant with <system_prompt> about <task>.\n"
+                "User is asking the assistant for advice and is in a {{user_mood}} mood.\n"
+                "The conversation must be {{conversation_length}} messages in length.\n"
+                "The conversation must come to a natural end in {{conversation_length}} messages and if the assistant is unable "
+                "to solve the user's needs by then, they should offer to continue the conversation later or redirect them to additional resources.\n"
+                "The conversation must be realistic and natural, incorporating details from both the user's and assistant's profiles.\n"
+                "The AI assistant role has the system prompt defined in <system_prompt>. All of its responses must conform to <system_prompt>:\n"
+            ),
+            output_format=ChatConversation,
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="toxicity_evaluation",
+            prompt=TOXICITY_PROMPT_TEMPLATE,
+            scores=[toxicity_score],
+            model_alias=model_alias,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+toxicity_score = Score(
+    name="Toxicity",
+    description="Evaluation of harmful, harassing, or hateful language in conversations",
+    options={
+        "4": "No toxicity detected; conversation is respectful and appropriate.",
+        "3": "Mild toxicity; slightly rude or sarcastic but not hateful or harmful.",
+        "2": "Moderate toxicity; contains disrespectful or harassing language.",
+        "1": "Severe toxicity; contains overt hate, harassment, or harmful content.",
+        "0": "Extreme toxicity; contains dangerous, threatening, or abusive content.",
+    },
+)
+
+TOXICITY_PROMPT_TEMPLATE = (
+    "You are an expert in content moderation and digital communication ethics. "
+    "You are evaluating the toxicity level in user messages.\n"
+    "Take a deep breath and carefully analyze the conversation provided. \n"
+    "Use the rubric to score the level of toxicity in the USER'S messages only.\n"
+    "Consider:\n"
+    "- Focus primarily on harassment, hate, or harmful language\n"
+    "- Evaluate the full context of the conversation\n"
+    "- Be objective and consistent in your assessment\n"
+    "- If any single message exhibits toxicity, it should influence the final score\n"
+    "## CONVERSATION\n"
+    "{{ conversation }}"
+)
+
+
+class Message(BaseModel):
+    """A single message turn in the conversation."""
+
+    role: Literal["user", "assistant"] = Field(..., description="Which role is writing the message.")
+    content: str = Field(..., description="Message contents.")
+
+
+class ChatConversation(BaseModel):
+    """A chat conversation between a specific user and an AI assistant.
+    * All conversations are initiated by the user role.
+    * The assistant role always responds to the user message.
+    * Turns alternate between user and assistant roles.
+    * The last message is always from the assistant role.
+    * Message content can be long or short.
+    * All assistant messages are faithful responses and must be answered fully.
+    """
+
+    conversation: list[Message] = Field(..., description="List of all messages in the conversation.")
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
diff --git a/fern/assets/recipes/qa_and_chat/product_info_qa.py b/fern/assets/recipes/qa_and_chat/product_info_qa.py
new file mode 100644
index 000000000..59a0110f6
--- /dev/null
+++ b/fern/assets/recipes/qa_and_chat/product_info_qa.py
@@ -0,0 +1,224 @@
+import string
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+
+from data_designer.essentials import (
+    BernoulliSamplerParams,
+    CategorySamplerParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    ExpressionColumnConfig,
+    LLMJudgeColumnConfig,
+    LLMStructuredColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    UniformSamplerParams,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="category",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Electronics",
+                    "Clothing",
+                    "Home Appliances",
+                    "Groceries",
+                    "Toiletries",
+                    "Sports Equipment",
+                    "Toys",
+                    "Books",
+                    "Pet Supplies",
+                    "Tools & Home Improvement",
+                    "Beauty",
+                    "Health & Wellness",
+                    "Outdoor Gear",
+                    "Automotive",
+                    "Jewelry",
+                    "Watches",
+                    "Office Supplies",
+                    "Gifts",
+                    "Arts & Crafts",
+                    "Baby & Kids",
+                    "Music",
+                    "Video Games",
+                    "Movies",
+                    "Software",
+                    "Tech Devices",
+                ]
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="price_tens_of_dollars",
+            sampler_type=SamplerType.UNIFORM,
+            params=UniformSamplerParams(low=1, high=200),
+        )
+    )
+
+    config_builder.add_column(
+        ExpressionColumnConfig(
+            name="product_price",
+            expr="{{ (price_tens_of_dollars * 10) - 0.01 | round(2) }}",
+            dtype="float",
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="first_letter",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=list(string.ascii_uppercase)),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="is_hallucination",
+            sampler_type=SamplerType.BERNOULLI,
+            params=BernoulliSamplerParams(p=0.5),
+        )
+    )
+
+    config_builder.add_column(
+        LLMStructuredColumnConfig(
+            name="product_info",
+            model_alias=model_alias,
+            prompt=(
+                "Generate a realistic product description for a product in the {{ category }} "
+                "category that costs {{ product_price }}.\n"
+                "The name of the product MUST start with the letter {{ first_letter }}.\n"
+            ),
+            output_format=ProductInfo,
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="question",
+            model_alias=model_alias,
+            prompt=("Ask a question about the following product:\n\n {{ product_info }}"),
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="answer",
+            model_alias=model_alias,
+            prompt=(
+                "{%- if is_hallucination == 0 -%}\n"
+                "<product_info>\n"
+                "{{ product_info }}\n"
+                "</product_info>\n"
+                "{%- endif -%}\n"
+                "User Question: {{ question }}\n"
+                "Directly and succinctly answer the user's question.\n"
+                "{%- if is_hallucination == 1 -%}\n"
+                "Make up whatever information you need to in order to answer the user's request.\n"
+                "{%- endif -%}"
+            ),
+        )
+    )
+
+    # Evaluate answer quality
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="llm_answer_metrics",
+            model_alias=model_alias,
+            prompt=(
+                "<product_info>\n"
+                "{{ product_info }}\n"
+                "</product_info>\n"
+                "User Question: {{question }}\n"
+                "AI Assistant Answer: {{ answer }}\n"
+                "Judge the AI assistant's response to the user's question about the product described in <product_info>."
+            ),
+            scores=answer_quality_scores,
+        )
+    )
+
+    config_builder.add_column(
+        ExpressionColumnConfig(
+            name="completeness_result",
+            expr="{{ llm_answer_metrics.Completeness.score }}",
+        )
+    )
+
+    config_builder.add_column(
+        ExpressionColumnConfig(
+            name="accuracy_result",
+            expr="{{ llm_answer_metrics.Accuracy.score }}",
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+class ProductInfo(BaseModel):
+    product_name: str = Field(..., description="A realistic product name for the market.")
+    key_features: list[str] = Field(..., min_length=1, max_length=3, description="Key product features.")
+    description: str = Field(
+        ...,
+        description="A short, engaging description of what the product does, highlighting a unique but believable feature.",
+    )
+    price_usd: float = Field(..., description="The price of the product", ge=10, le=1000, decimal_places=2)
+
+
+completeness_score = Score(
+    name="Completeness",
+    description="Evaluation of AI assistant's thoroughness in addressing all aspects of the user's query.",
+    options={
+        "Complete": "The response thoroughly covers all key points requested in the question, providing sufficient detail to satisfy the user's information needs.",
+        "PartiallyComplete": "The response addresses the core question but omits certain important details or fails to elaborate on relevant aspects that were requested.",
+        "Incomplete": "The response significantly lacks necessary information, missing major components of what was asked and leaving the query largely unanswered.",
+    },
+)
+
+accuracy_score = Score(
+    name="Accuracy",
+    description="Evaluation of how factually correct the AI assistant's response is relative to the product information.",
+    options={
+        "Accurate": "The information provided aligns perfectly with the product specifications without introducing any misleading or incorrect details.",
+        "PartiallyAccurate": "While some information is correctly stated, the response contains minor factual errors or potentially misleading statements about the product.",
+        "Inaccurate": "The response presents significantly wrong information about the product, with claims that contradict the actual product details.",
+    },
+)
+
+answer_quality_scores = [completeness_score, accuracy_score]
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
diff --git a/fern/components/Authors.tsx b/fern/components/Authors.tsx
new file mode 100644
index 000000000..0772dbdde
--- /dev/null
+++ b/fern/components/Authors.tsx
@@ -0,0 +1,51 @@
+/**
+ * Authors - Renders author byline with avatars for dev notes.
+ *
+ * Uses authors data from components/devnotes/authors-data.ts (synced with .authors.yml).
+ * NOTE: Fern's custom component pipeline uses the automatic JSX runtime.
+ *
+ * Usage in MDX (authors from frontmatter):
+ *   ---
+ *   authors:
+ *     - dcorneil
+ *     - etramel
+ *   ---
+ *
+ *   import { Authors } from "@/components/Authors";
+ *   <Authors ids={authors} />
+ */
+
+import { authors } from "./devnotes/authors-data";
+
+export interface AuthorsProps {
+  /** Author IDs from .authors.yml (e.g. dcorneil, etramel, kthadaka, nvidia). From frontmatter: ids={authors} */
+  ids?: string[];
+}
+
+export const Authors = ({ ids }: AuthorsProps) => {
+  const validAuthors = (ids ?? [])
+    .map((id) => authors[id])
+    .filter(Boolean);
+
+  if (validAuthors.length === 0) return null;
+
+  return (
+    <div className="devnote-authors">
+      {validAuthors.map((author, i) => (
+        <div key={i} className="devnote-authors__item">
+          <img
+            className="devnote-authors__avatar"
+            src={author.avatar}
+            alt=""
+            width={32}
+            height={32}
+          />
+          <div className="devnote-authors__meta">
+            <span className="devnote-authors__name">{author.name}</span>
+            <span className="devnote-authors__description">{author.description}</span>
+          </div>
+        </div>
+      ))}
+    </div>
+  );
+};
diff --git a/fern/components/CustomCard.tsx b/fern/components/CustomCard.tsx
new file mode 100644
index 000000000..16fc0e2c2
--- /dev/null
+++ b/fern/components/CustomCard.tsx
@@ -0,0 +1,10 @@
+export const CustomCard = ({ title, text, link, sparkle = false }) => {
+    return (
+        <a href={link} className="block p-6 rounded-lg border border-gray-200 hover:shadow-lg transition-shadow">
+            <h2 className="text-xl font-semibold mb-2">
+                {title} {sparkle && "✨"}
+            </h2>
+            <p className="text-gray-600">{text}</p>
+        </a>
+    );
+};
diff --git a/fern/components/CustomFooter.tsx b/fern/components/CustomFooter.tsx
new file mode 100644
index 000000000..fab392c40
--- /dev/null
+++ b/fern/components/CustomFooter.tsx
@@ -0,0 +1,91 @@
+/**
+ * Custom footer for NVIDIA docs (Fern native header/footer).
+ * Markup and class names match the original custom-app footer 1:1 so that
+ * fern/main.css (footer + Built with Fern styles) applies correctly:
+ * dark mode logo, responsive layout, and Built with Fern tooltip.
+ */
+export default function CustomFooter() {
+  const currentYear = new Date().getFullYear();
+  const logoUrl =
+    "https://fern-image-hosting.s3.us-east-1.amazonaws.com/nvidia/NVIDIA_Logo_0.svg";
+
+  return (
+    <footer className="bd-footer">
+      <div className="bd-footer__inner">
+        <div className="footer-items__start">
+          <div className="footer-item">
+            <div className="footer-logos-container">
+              <a
+                className="footer-brand"
+                href="https://www.nvidia.com"
+                target="_blank"
+                rel="noopener"
+              >
+                <img src={logoUrl} className="logo__image only-light" alt="NVIDIA" />
+                <img src={logoUrl} className="logo__image only-dark" alt="NVIDIA" />
+              </a>
+              <div className="footer-brand-fern">
+                <a
+                  href="https://buildwithfern.com"
+                  className="built-with-fern-link"
+                  target="_blank"
+                  rel="noopener noreferrer"
+                >
+                  <svg
+                    width="145"
+                    height="16"
+                    viewBox="0 0 145 16"
+                    fill="none"
+                    xmlns="http://www.w3.org/2000/svg"
+                    className="built-with-fern-logo built-with-fern-logo-light"
+                    aria-hidden
+                  >
+                    <path d="M9.79656 4.8H14.5006C15.5139 4.8 16.3192 5.05067 16.9166 5.552C17.5139 6.04267 17.8126 6.71467 17.8126 7.568C17.8126 8.112 17.6739 8.608 17.3966 9.056C17.1192 9.504 16.7512 9.84 16.2926 10.064C16.8579 10.2667 17.3059 10.608 17.6366 11.088C17.9672 11.5573 18.1326 12.1173 18.1326 12.768C18.1326 13.7387 17.8286 14.5227 17.2206 15.12C16.6126 15.7067 15.7752 16 14.7086 16H9.79656V4.8ZM14.4846 14.528C15.1246 14.528 15.6206 14.3627 15.9726 14.032C16.3246 13.7013 16.5006 13.2373 16.5006 12.64C16.5006 12.0427 16.3246 11.5893 15.9726 11.28C15.6312 10.96 15.1352 10.8 14.4846 10.8H11.3966V14.528H14.4846ZM14.2766 9.424C14.8846 9.424 15.3539 9.28533 15.6846 9.008C16.0152 8.72 16.1806 8.32533 16.1806 7.824C16.1806 7.32267 16.0152 6.93867 15.6846 6.672C15.3539 6.40533 14.8846 6.272 14.2766 6.272H11.3966V9.424H14.2766ZM22.5778 16.224C21.6285 16.224 20.8871 15.9413 20.3538 15.376C19.8205 14.8107 19.5538 14 19.5538 12.944V8.304H21.1058V12.8C21.1058 13.472 21.2551 13.9787 21.5538 14.32C21.8631 14.6507 22.3005 14.816 22.8658 14.816C23.4525 14.816 23.9165 14.6293 24.2578 14.256C24.6098 13.872 24.7858 13.3707 24.7858 12.752V8.304H26.3378V16H24.9618V15.12C24.7165 15.4827 24.3858 15.76 23.9698 15.952C23.5538 16.1333 23.0898 16.224 22.5778 16.224ZM28.0746 8.304H29.6266V16H28.0746V8.304ZM27.9786 4.912H29.7066V6.752H27.9786V4.912ZM33.0334 16C32.4894 16 32.0948 15.888 31.8494 15.664C31.6041 15.44 31.4814 15.0667 31.4814 14.544V4.8H33.0334V14.064C33.0334 14.2667 33.0761 14.416 33.1614 14.512C33.2468 14.5973 33.3854 14.64 33.5774 14.64H34.5534V16H33.0334ZM37.9539 16C37.2819 16 36.7966 15.856 36.4979 15.568C36.1993 15.28 36.0499 14.8053 36.0499 14.144V9.664H34.0339V8.304H36.0499V6H37.6019V8.304H40.0179V9.664H37.6019V13.84C37.6019 14.1173 37.6659 14.32 37.7939 14.448C37.9219 14.576 38.1299 14.64 38.4179 14.64H40.0179V16H37.9539ZM43.5709 8.304H45.1869L46.8989 14.272L48.6109 8.304H50.3869L52.0989 14.272L53.8109 8.304H55.4269L53.0429 16H51.2189L49.5069 10.064L47.7789 16H45.9549L43.5709 8.304ZM56.3746 8.304H57.9266V16H56.3746V8.304ZM56.2786 4.912H58.0066V6.752H56.2786V4.912ZM62.5971 16C61.9251 16 61.4397 15.856 61.1411 15.568C60.8424 15.28 60.6931 14.8053 60.6931 14.144V9.664H58.6771V8.304H60.6931V6H62.2451V8.304H64.6611V9.664H62.2451V13.84C62.2451 14.1173 62.3091 14.32 62.4371 14.448C62.5651 14.576 62.7731 14.64 63.0611 14.64H64.6611V16H62.5971ZM65.6727 4.8H67.2247V9.056C67.4807 8.736 67.8007 8.496 68.1847 8.336C68.5794 8.16533 69.0114 8.08 69.4807 8.08C70.4407 8.08 71.1927 8.368 71.7367 8.944C72.2807 9.50933 72.5527 10.3147 72.5527 11.36V16H71.0007V11.504C71.0007 10.832 70.8407 10.3307 70.5207 10C70.2114 9.65867 69.7687 9.488 69.1927 9.488C68.5954 9.488 68.1154 9.68 67.7527 10.064C67.4007 10.4373 67.2247 10.9333 67.2247 11.552V16H65.6727V4.8Z" fill="#1E1F24" />
+                    <path d="M92.3849 7.82856C91.3321 6.93847 89.746 6.58166 88.3403 7.62074C88.2756 7.66779 88.1952 7.58741 88.2442 7.52468C88.5775 7.09532 88.9638 6.63263 89.2755 6.16798C89.5931 5.69157 90.0675 5.35044 90.6145 5.18379C93.5259 4.30155 92.6515 0.00012207 92.6515 0.00012207C92.6515 0.00012207 88.154 0.290282 88.7089 4.17019C88.801 4.81913 88.6285 5.47983 88.2227 5.99545C87.7247 6.62479 87.1463 7.22667 86.7268 7.66191C86.6385 7.7521 86.4895 7.66583 86.5248 7.54428C86.9307 6.17778 87.2267 4.06432 85.821 2.70175L83.8428 1.05881L83.4625 1.56071C82.3312 3.05268 82.6626 5.15634 84.1565 6.28561C85.0132 6.93259 85.4014 7.63643 85.3407 8.40888C85.3034 8.87157 85.0936 9.30485 84.7799 9.64794C84.1898 10.2949 83.6389 10.9889 83.2135 11.7928C83.1546 11.9045 82.9841 11.8614 82.99 11.734C83.0507 10.4067 82.9233 7.41489 80.6883 6.34639L78.1866 5.37984L77.9925 5.9582C77.3632 7.82464 78.3925 9.81851 80.257 10.4518C81.8783 11.0027 82.4567 12.0476 82.0665 13.6141C82.0489 13.671 81.7666 15.2845 81.8058 16.0001H83.6036C83.6644 14.8904 84.829 14.1611 85.8386 14.614C86.1229 14.7414 86.415 14.9238 86.715 15.159C88.3227 16.4255 90.691 16.1256 91.9555 14.516L92.3163 14.0572L90.0421 12.4241C88.4815 11.1968 86.3994 11.7516 84.8584 12.8024C84.729 12.8907 84.5643 12.7495 84.6368 12.6084C86.4993 8.95391 88.9206 8.96175 89.8695 9.77341C91.0204 10.7576 92.7633 10.5812 93.7396 9.4264L94.02 9.09507L92.3829 7.82856H92.3849Z" fill="#51C233" />
+                    <path d="M111.257 4.27539C114.524 4.27557 116.739 6.46855 116.739 9.98145C116.739 10.3833 116.718 10.788 116.673 11.2568H108.84C108.974 12.6434 109.892 13.4053 111.391 13.4053C112.398 13.4052 113.045 12.9803 113.338 12.375H116.538C115.888 14.5682 114.189 16 111.37 16C107.991 15.9998 105.754 13.6502 105.754 10.0703H105.751C105.751 6.55739 107.99 4.27539 111.257 4.27539ZM132.095 4.27539C134.801 4.2756 136.503 6.02159 136.503 8.95117V15.665H133.369V9.28613C133.369 7.81028 132.697 7.09379 131.444 7.09375C130.192 7.09375 129.362 7.96679 129.362 9.37598V15.6621H126.23V4.61035H128.984V5.72852C129.634 4.76615 130.82 4.27539 132.095 4.27539ZM106.379 2.72949H103.313C102.663 2.72949 102.305 2.99745 102.305 3.64746V4.60938H105.706V7.33887H102.305V15.6621H99.171V7.33887H96.42V4.60938H99.171V3.26758C99.171 1.11907 100.402 0 102.528 0H106.379V2.72949ZM120.583 6.55371C120.851 5.30087 121.747 4.60645 123.156 4.60645H125.126V4.98535C125.126 6.28287 124.074 7.33493 122.776 7.33496C121.546 7.33496 120.963 7.96297 120.963 9.21582V15.6611H117.829V4.60645H120.583V6.55371ZM111.257 6.73633C109.736 6.73633 108.907 7.58722 108.818 8.88477H113.584V8.83984C113.584 7.58713 112.778 6.73647 111.257 6.73633Z" fill="#1E1F24" />
+                  </svg>
+                  <svg
+                    width="145"
+                    height="16"
+                    viewBox="0 0 145 16"
+                    fill="none"
+                    xmlns="http://www.w3.org/2000/svg"
+                    className="built-with-fern-logo built-with-fern-logo-dark"
+                    aria-hidden
+                  >
+                    <path d="M9.79656 4.8H14.5006C15.5139 4.8 16.3192 5.05067 16.9166 5.552C17.5139 6.04267 17.8126 6.71467 17.8126 7.568C17.8126 8.112 17.6739 8.608 17.3966 9.056C17.1192 9.504 16.7512 9.84 16.2926 10.064C16.8579 10.2667 17.3059 10.608 17.6366 11.088C17.9672 11.5573 18.1326 12.1173 18.1326 12.768C18.1326 13.7387 17.8286 14.5227 17.2206 15.12C16.6126 15.7067 15.7752 16 14.7086 16H9.79656V4.8ZM14.4846 14.528C15.1246 14.528 15.6206 14.3627 15.9726 14.032C16.3246 13.7013 16.5006 13.2373 16.5006 12.64C16.5006 12.0427 16.3246 11.5893 15.9726 11.28C15.6312 10.96 15.1352 10.8 14.4846 10.8H11.3966V14.528H14.4846ZM14.2766 9.424C14.8846 9.424 15.3539 9.28533 15.6846 9.008C16.0152 8.72 16.1806 8.32533 16.1806 7.824C16.1806 7.32267 16.0152 6.93867 15.6846 6.672C15.3539 6.40533 14.8846 6.272 14.2766 6.272H11.3966V9.424H14.2766ZM22.5778 16.224C21.6285 16.224 20.8871 15.9413 20.3538 15.376C19.8205 14.8107 19.5538 14 19.5538 12.944V8.304H21.1058V12.8C21.1058 13.472 21.2551 13.9787 21.5538 14.32C21.8631 14.6507 22.3005 14.816 22.8658 14.816C23.4525 14.816 23.9165 14.6293 24.2578 14.256C24.6098 13.872 24.7858 13.3707 24.7858 12.752V8.304H26.3378V16H24.9618V15.12C24.7165 15.4827 24.3858 15.76 23.9698 15.952C23.5538 16.1333 23.0898 16.224 22.5778 16.224ZM28.0746 8.304H29.6266V16H28.0746V8.304ZM27.9786 4.912H29.7066V6.752H27.9786V4.912ZM33.0334 16C32.4894 16 32.0948 15.888 31.8494 15.664C31.6041 15.44 31.4814 15.0667 31.4814 14.544V4.8H33.0334V14.064C33.0334 14.2667 33.0761 14.416 33.1614 14.512C33.2468 14.5973 33.3854 14.64 33.5774 14.64H34.5534V16H33.0334ZM37.9539 16C37.2819 16 36.7966 15.856 36.4979 15.568C36.1993 15.28 36.0499 14.8053 36.0499 14.144V9.664H34.0339V8.304H36.0499V6H37.6019V8.304H40.0179V9.664H37.6019V13.84C37.6019 14.1173 37.6659 14.32 37.7939 14.448C37.9219 14.576 38.1299 14.64 38.4179 14.64H40.0179V16H37.9539ZM43.5709 8.304H45.1869L46.8989 14.272L48.6109 8.304H50.3869L52.0989 14.272L53.8109 8.304H55.4269L53.0429 16H51.2189L49.5069 10.064L47.7789 16H45.9549L43.5709 8.304ZM56.3746 8.304H57.9266V16H56.3746V8.304ZM56.2786 4.912H58.0066V6.752H56.2786V4.912ZM62.5971 16C61.9251 16 61.4397 15.856 61.1411 15.568C60.8424 15.28 60.6931 14.8053 60.6931 14.144V9.664H58.6771V8.304H60.6931V6H62.2451V8.304H64.6611V9.664H62.2451V13.84C62.2451 14.1173 62.3091 14.32 62.4371 14.448C62.5651 14.576 62.7731 14.64 63.0611 14.64H64.6611V16H62.5971ZM65.6727 4.8H67.2247V9.056C67.4807 8.736 67.8007 8.496 68.1847 8.336C68.5794 8.16533 69.0114 8.08 69.4807 8.08C70.4407 8.08 71.1927 8.368 71.7367 8.944C72.2807 9.50933 72.5527 10.3147 72.5527 11.36V16H71.0007V11.504C71.0007 10.832 70.8407 10.3307 70.5207 10C70.2114 9.65867 69.7687 9.488 69.1927 9.488C68.5954 9.488 68.1154 9.68 67.7527 10.064C67.4007 10.4373 67.2247 10.9333 67.2247 11.552V16H65.6727V4.8Z" fill="#EEEEF0" />
+                    <path d="M92.3848 7.82856C91.332 6.93847 89.7459 6.58166 88.3402 7.62074C88.2755 7.66779 88.1952 7.58741 88.2442 7.52468C88.5775 7.09532 88.9637 6.63263 89.2754 6.16798C89.593 5.69157 90.0675 5.35044 90.6145 5.18379C93.5259 4.30155 92.6515 0.00012207 92.6515 0.00012207C92.6515 0.00012207 88.154 0.290282 88.7088 4.17019C88.801 4.81913 88.6284 5.47983 88.2226 5.99545C87.7246 6.62479 87.1463 7.22667 86.7267 7.66191C86.6385 7.7521 86.4895 7.66583 86.5248 7.54428C86.9306 6.17778 87.2266 4.06432 85.8209 2.70175L83.8427 1.05881L83.4624 1.56071C82.3312 3.05268 82.6625 5.15634 84.1564 6.28561C85.0132 6.93259 85.4014 7.63643 85.3406 8.40888C85.3033 8.87157 85.0936 9.30485 84.7799 9.64794C84.1898 10.2949 83.6388 10.9889 83.2134 11.7928C83.1546 11.9045 82.984 11.8614 82.9899 11.734C83.0507 10.4067 82.9232 7.41489 80.6882 6.34639L78.1866 5.37984L77.9925 5.9582C77.3631 7.82464 78.3924 9.81851 80.2569 10.4518C81.8783 11.0027 82.4566 12.0476 82.0665 13.6141C82.0488 13.671 81.7665 15.2845 81.8057 16.0001H83.6036C83.6643 14.8904 84.8289 14.1611 85.8386 14.614C86.1229 14.7414 86.415 14.9238 86.7149 15.159C88.3226 16.4255 90.6909 16.1256 91.9555 14.516L92.3162 14.0572L90.042 12.4241C88.4814 11.1968 86.3993 11.7516 84.8583 12.8024C84.7289 12.8907 84.5642 12.7495 84.6368 12.6084C86.4993 8.95391 88.9206 8.96175 89.8695 9.77341C91.0203 10.7576 92.7632 10.5812 93.7396 9.4264L94.0199 9.09507L92.3829 7.82856H92.3848Z" fill="#51C233" />
+                    <path d="M111.257 4.27539C114.524 4.27557 116.739 6.46855 116.739 9.98145C116.739 10.3833 116.718 10.788 116.673 11.2568H108.84C108.974 12.6434 109.892 13.4053 111.391 13.4053C112.398 13.4052 113.045 12.9803 113.338 12.375H116.538C115.888 14.5682 114.189 16 111.37 16C107.991 15.9998 105.754 13.6502 105.754 10.0703H105.751C105.751 6.55739 107.989 4.27539 111.257 4.27539ZM132.095 4.27539C134.801 4.2756 136.503 6.02159 136.503 8.95117V15.665H133.369V9.28613C133.369 7.81028 132.697 7.09379 131.444 7.09375C130.191 7.09375 129.362 7.96679 129.362 9.37598V15.6621H126.229V4.61035H128.983V5.72852C129.633 4.76615 130.82 4.27539 132.095 4.27539ZM106.379 2.72949H103.312C102.662 2.72949 102.305 2.99745 102.305 3.64746V4.60938H105.706V7.33887H102.305V15.6621H99.1709V7.33887H96.4199V4.60938H99.1709V3.26758C99.1709 1.11907 100.402 0 102.528 0H106.379V2.72949ZM120.583 6.55371C120.851 5.30087 121.747 4.60645 123.156 4.60645H125.126V4.98535C125.126 6.28287 124.074 7.33493 122.776 7.33496C121.546 7.33496 120.963 7.96297 120.963 9.21582V15.6611H117.829V4.60645H120.583V6.55371ZM111.257 6.73633C109.736 6.73633 108.907 7.58722 108.817 8.88477H113.584V8.83984C113.584 7.58713 112.777 6.73647 111.257 6.73633Z" fill="#EEEEF0" />
+                  </svg>
+                  <span className="built-with-fern-tooltip">Developer-friendly docs for your API</span>
+                </a>
+              </div>
+            </div>
+          </div>
+          <div className="footer-item">
+            <div className="footer-links">
+              <a href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/" target="_blank" rel="noopener">Privacy Policy</a>
+              <span className="pipe-separator"> | </span>
+              <a href="https://www.nvidia.com/en-us/preferences/cookie-policy/" target="_blank" rel="noopener">Manage My Privacy</a>
+              <span className="pipe-separator"> | </span>
+              <a href="https://www.nvidia.com/en-us/preferences/start/" target="_blank" rel="noopener">Do Not Sell or Share My Data</a>
+              <span className="pipe-separator"> | </span>
+              <a href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/" target="_blank" rel="noopener">Terms of Service</a>
+              <span className="pipe-separator"> | </span>
+              <a href="https://www.nvidia.com/en-us/about-nvidia/accessibility/" target="_blank" rel="noopener">Accessibility</a>
+              <span className="pipe-separator"> | </span>
+              <a href="https://www.nvidia.com/en-us/about-nvidia/company-policies/" target="_blank" rel="noopener">Corporate Policies</a>
+              <span className="pipe-separator"> | </span>
+              <a href="https://www.nvidia.com/en-us/product-security/" target="_blank" rel="noopener">Product Security</a>
+              <span className="pipe-separator"> | </span>
+              <a href="https://www.nvidia.com/en-us/contact/" target="_blank" rel="noopener">Contact</a>
+            </div>
+          </div>
+          <div className="footer-item">
+            <p className="copyright">Copyright &#169; {currentYear}, NVIDIA Corporation.</p>
+          </div>
+        </div>
+      </div>
+    </footer>
+  );
+}
diff --git a/fern/components/ExpandableCode.tsx b/fern/components/ExpandableCode.tsx
new file mode 100644
index 000000000..889073aa7
--- /dev/null
+++ b/fern/components/ExpandableCode.tsx
@@ -0,0 +1,72 @@
+/**
+ * ExpandableCode - Collapsible code block with summary and copy button.
+ *
+ * Used for "Full source" code snippets in dev notes.
+ * NOTE: Fern's custom component pipeline uses the automatic JSX runtime.
+ * Do NOT import React -- the `react` module is not resolvable in Fern's build.
+ *
+ * Usage in MDX:
+ *   import { ExpandableCode } from "@/components/ExpandableCode";
+ *
+ *   <ExpandableCode
+ *     summary="Full source: openresearcher_demo.py"
+ *     code={`...`}
+ *     language="python"
+ *     defaultOpen={false}
+ *   />
+ */
+
+export interface ExpandableCodeProps {
+  summary: string;
+  code: string;
+  language?: string;
+  defaultOpen?: boolean;
+}
+
+function CopyButton({ text }: { text: string }) {
+  return (
+    <button
+      type="button"
+      className="expandable-code__copy"
+      onClick={(e) => {
+        navigator.clipboard?.writeText(text).then(() => {
+          const btn = e.currentTarget as HTMLButtonElement;
+          const orig = btn.textContent;
+          btn.textContent = "Copied!";
+          btn.classList.add("expandable-code__copy--copied");
+          setTimeout(() => {
+            btn.textContent = orig;
+            btn.classList.remove("expandable-code__copy--copied");
+          }, 1500);
+        });
+      }}
+    >
+      Copy
+    </button>
+  );
+}
+
+export const ExpandableCode = ({
+  summary,
+  code,
+  language = "python",
+  defaultOpen = false,
+}: ExpandableCodeProps) => {
+  const trimmed = code.trim();
+  return (
+    <details className="expandable-code" open={defaultOpen}>
+      <summary className="expandable-code__summary">
+        <strong>{summary}</strong>
+        <span className="expandable-code__badge">{language}</span>
+      </summary>
+      <div className="expandable-code__content">
+        <div className="expandable-code__toolbar">
+          <CopyButton text={trimmed} />
+        </div>
+        <pre className="expandable-code__pre">
+          <code className={`language-${language}`}>{trimmed}</code>
+        </pre>
+      </div>
+    </details>
+  );
+};
diff --git a/fern/components/MetricsTable.tsx b/fern/components/MetricsTable.tsx
new file mode 100644
index 000000000..37911dc1a
--- /dev/null
+++ b/fern/components/MetricsTable.tsx
@@ -0,0 +1,101 @@
+/**
+ * MetricsTable - Styled comparison table for benchmark results.
+ *
+ * Optional: highlights best values per column (bold).
+ * NOTE: Fern's custom component pipeline uses the automatic JSX runtime.
+ * Do NOT import React -- the `react` module is not resolvable in Fern's build.
+ *
+ * Usage in MDX:
+ *   import { MetricsTable } from "@/components/MetricsTable";
+ *
+ *   <MetricsTable
+ *     headers={["Data Blend", "Validation Loss", "MMLU-Pro"]}
+ *     rows={[
+ *       ["Baseline", "1.309", "36.99"],
+ *       ["with RQA (4%)", "1.256", "44.31"],
+ *     ]}
+ *     lowerIsBetter={[1]}
+ *   />
+ */
+
+export interface MetricsTableProps {
+  headers: string[];
+  rows: (string | number)[][];
+  /** Column indices where lower is better (for highlighting) */
+  lowerIsBetter?: number[];
+  /** Column indices where higher is better (default for non-lowerIsBetter) */
+  higherIsBetter?: number[];
+}
+
+function findBestIndices(
+  rows: (string | number)[][],
+  colIndex: number,
+  lowerIsBetter: boolean
+): Set<number> {
+  const values = rows.map((r) => {
+    const v = r[colIndex];
+    if (typeof v === "number") return v;
+    const parsed = parseFloat(String(v));
+    return isNaN(parsed) ? (lowerIsBetter ? Infinity : -Infinity) : parsed;
+  });
+  const best = lowerIsBetter ? Math.min(...values) : Math.max(...values);
+  const bestIndices = new Set<number>();
+  values.forEach((v, i) => {
+    if (v === best) bestIndices.add(i);
+  });
+  return bestIndices;
+}
+
+export const MetricsTable = ({
+  headers,
+  rows,
+  lowerIsBetter = [],
+  higherIsBetter = [],
+}: MetricsTableProps) => {
+  const lowerSet = new Set(lowerIsBetter);
+  const bestByCol: Record<number, Set<number>> = {};
+
+  for (let c = 0; c < headers.length; c++) {
+    if (lowerSet.has(c)) {
+      bestByCol[c] = findBestIndices(rows, c, true);
+    } else if (higherIsBetter.includes(c)) {
+      bestByCol[c] = findBestIndices(rows, c, false);
+    } else {
+      const numLike = rows.every((r) => {
+        const v = r[c];
+        return typeof v === "number" || !isNaN(parseFloat(String(v)));
+      });
+      if (numLike) {
+        bestByCol[c] = findBestIndices(rows, c, false);
+      }
+    }
+  }
+
+  return (
+    <div className="metrics-table-wrapper">
+      <table className="metrics-table">
+        <thead>
+          <tr>
+            {headers.map((h, i) => (
+              <th key={i}>{h}</th>
+            ))}
+          </tr>
+        </thead>
+        <tbody>
+          {rows.map((row, rowIdx) => (
+            <tr key={rowIdx} className={rowIdx % 2 === 1 ? "metrics-table__row--alt" : ""}>
+              {row.map((cell, colIdx) => {
+                const isBest = bestByCol[colIdx]?.has(rowIdx);
+                return (
+                  <td key={colIdx} className={isBest ? "metrics-table__cell--best" : ""}>
+                    {cell}
+                  </td>
+                );
+              })}
+            </tr>
+          ))}
+        </tbody>
+      </table>
+    </div>
+  );
+};
diff --git a/fern/components/NotebookViewer.tsx b/fern/components/NotebookViewer.tsx
new file mode 100644
index 000000000..b6c9c484a
--- /dev/null
+++ b/fern/components/NotebookViewer.tsx
@@ -0,0 +1,444 @@
+import type { ReactNode } from "react";
+
+/**
+ * NotebookViewer - Renders Jupyter notebook content in Fern docs.
+ *
+ * Uses Fern's code block structure (fern-code, fern-code-block, etc.) so input
+ * and output cells match the default Fern code block styling.
+ *
+ * Accepts notebook cells (markdown + code) and optionally a Colab URL.
+ * Designed to work with Jupytext-generated notebooks from docs/notebook_source/*.py.
+ *
+ * NOTE: Fern's custom component pipeline uses the automatic JSX runtime.
+ * Only type-only imports from "react" are used (erased at compile time).
+ *
+ * Usage in MDX:
+ *   import { NotebookViewer } from "@/components/NotebookViewer";
+ *   import notebook from "@/components/notebooks/1-the-basics";
+ *
+ *   <NotebookViewer
+ *     notebook={notebook}
+ *     colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/1-the-basics.ipynb"
+ *   />
+ */
+
+export interface CellOutput {
+  type: "text" | "image";
+  data: string;
+  format?: "plain" | "html";
+}
+
+export interface NotebookCell {
+  type: "markdown" | "code";
+  source: string;
+  /** Pre-rendered syntax-highlighted HTML (from Pygments). When present, used instead of escaped source. */
+  source_html?: string;
+  language?: string;
+  outputs?: CellOutput[];
+}
+
+export interface NotebookData {
+  cells: NotebookCell[];
+}
+
+export interface NotebookViewerProps {
+  /** Notebook data with cells array. If import fails, this may be undefined. */
+  notebook?: NotebookData | null;
+  /** Optional Colab URL for "Run in Colab" badge */
+  colabUrl?: string;
+  /** Show code cell outputs (default: true) */
+  showOutputs?: boolean;
+}
+
+function NotebookViewerError({ message, detail }: { message: string; detail?: string }) {
+  return (
+    <div
+      className="notebook-viewer__error"
+      style={{
+        padding: "1rem",
+        margin: "1rem 0",
+        background: "#fef2f2",
+        border: "1px solid #fecaca",
+        borderRadius: "8px",
+        color: "#991b1b",
+        fontFamily: "monospace",
+        fontSize: "0.875rem",
+      }}
+    >
+      <strong>NotebookViewer error:</strong> {message}
+      {detail && (
+        <pre style={{ marginTop: "0.5rem", overflow: "auto", whiteSpace: "pre-wrap" }}>
+          {detail}
+        </pre>
+      )}
+    </div>
+  );
+}
+
+function escapeHtml(text: string): string {
+  if (typeof text !== "string") return "";
+  return text
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;");
+}
+
+function isSafeUrl(url: string): boolean {
+  const trimmed = url.trim();
+  return (
+    trimmed.startsWith("http://") ||
+    trimmed.startsWith("https://") ||
+    trimmed.startsWith("mailto:") ||
+    trimmed.startsWith("#") ||
+    trimmed.startsWith("/")
+  );
+}
+
+const UL_CLASS =
+  "[&>li]:relative [&>li]:before:text-(color:--grayscale-a10) mb-3 list-none pl-3 [&>li]:pl-3 [&>li]:before:absolute [&>li]:before:ml-[-22px] [&>li]:before:mt-[-1px] [&>li]:before:content-['⦁'] [&>li]:before:self-center";
+const OL_CLASS = "mb-3 list-outside list-decimal [&_ol]:!list-[lower-roman]";
+
+function renderMarkdown(markdown: string): string {
+  if (typeof markdown !== "string") return "";
+  let html = markdown
+    .replace(/<br\s*\/?>/gi, "\u0000BR\u0000")
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/\u0000BR\u0000/g, "<br />")
+    .replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_, text, url) => {
+      if (!isSafeUrl(url)) return escapeHtml(`[${text}](${url})`);
+      const isInternal = url.startsWith("/") || url.startsWith("#");
+      const attrs = isInternal
+        ? `href="${escapeHtml(url)}" class="fern-mdx-link"`
+        : `href="${escapeHtml(url)}" target="_blank" rel="noopener noreferrer" class="fern-mdx-link"`;
+      const icon =
+        isInternal
+          ? ""
+          : '<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-external-link external-link-icon inline-block ml-0.5 align-middle" aria-hidden="true"><path d="M15 3h6v6"></path><path d="M10 14 21 3"></path><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path></svg>';
+      return `<a ${attrs}>${text}${icon}</a>`;
+    })
+    .replace(/\*\*(.*?)\*\*/g, '<strong class="font-semibold">$1</strong>')
+    .replace(/\*(.*?)\*/g, "<em>$1</em>")
+    .replace(/`([^`]+)`/g, "<code>$1</code>");
+  html = html
+    .split("\n")
+    .map((line) => {
+      if (/^#### (.*)$/.test(line)) return `<h4>${line.slice(5)}</h4>`;
+      if (/^### (.*)$/.test(line)) return `<h3>${line.slice(4)}</h3>`;
+      if (/^## (.*)$/.test(line)) return `<h2>${line.slice(3)}</h2>`;
+      if (/^# (.*)$/.test(line)) return `<h1>${line.slice(2)}</h1>`;
+      if (/^- (.*)$/.test(line)) return `<li data-ul>${line.slice(2)}</li>`;
+      if (/^\d+\. (.*)$/.test(line)) return `<li data-ol>${line.replace(/^\d+\. /, "")}</li>`;
+      if (line.trim() === "") return "";
+      return `<p>${line}</p>`;
+    })
+    .join("\n");
+  html = html.replace(
+    /(<li data-ol>.*?<\/li>\s*)+/gs,
+    (m) => `<ol class="${OL_CLASS}">${m.replace(/ data-ol/g, "").trim()}</ol>`
+  );
+  html = html.replace(
+    /(<li data-ul>.*?<\/li>\s*)+/gs,
+    (m) => `<ul class="${UL_CLASS}">${m.replace(/ data-ul/g, "").trim()}</ul>`
+  );
+  return html;
+}
+
+function handleCopy(content: string, button: HTMLButtonElement) {
+  navigator.clipboard.writeText(content).catch(() => {});
+  const originalHtml = button.innerHTML;
+  const originalLabel = button.getAttribute("aria-label") ?? "Copy code";
+  button.innerHTML = "Copied!";
+  button.setAttribute("aria-label", "Copied to clipboard");
+  setTimeout(() => {
+    button.innerHTML = originalHtml;
+    button.setAttribute("aria-label", originalLabel);
+  }, 1500);
+}
+
+const FLAG_ICON = (
+  <svg
+    xmlns="http://www.w3.org/2000/svg"
+    width="24"
+    height="24"
+    viewBox="0 0 24 24"
+    fill="none"
+    stroke="currentColor"
+    strokeWidth="2"
+    strokeLinecap="round"
+    strokeLinejoin="round"
+    aria-hidden
+  >
+    <path d="M4 15s1-1 4-1 5 2 8 2 4-1 4-1V3s-1 1-4 1-5-2-8-2-4 1-4 1z" />
+    <line x1="4" x2="4" y1="22" y2="15" />
+  </svg>
+);
+
+const SCROLL_AREA_STYLE = `[data-radix-scroll-area-viewport]{scrollbar-width:none;-ms-overflow-style:none;-webkit-overflow-scrolling:touch;}[data-radix-scroll-area-viewport]::-webkit-scrollbar{display:none}`;
+
+const BUTTON_BASE_CLASS =
+  "focus-visible:ring-(color:--accent) rounded-2 inline-flex items-center justify-center gap-2 whitespace-nowrap text-sm font-medium transition-colors hover:transition-none focus-visible:outline-none focus-visible:ring-1 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0 text-(color:--grayscale-a11) hover:bg-(color:--accent-a3) hover:text-(color:--accent-11) pointer-coarse:size-9 size-7";
+
+/** Fern code block structure – matches Fern docs (header with language + buttons, pre with scroll area). */
+function FernCodeBlock({
+  title,
+  children,
+  className = "",
+  asPre = true,
+  copyContent,
+  showLineNumbers = false,
+  codeHtml,
+}: {
+  title: string;
+  children: ReactNode;
+  className?: string;
+  /** Use div instead of pre for content (needed when children include block elements like img/div). */
+  asPre?: boolean;
+  /** Raw text to copy when copy button is clicked. When provided, shows a copy button. */
+  copyContent?: string;
+  /** Show line numbers in a table layout (matches Fern's code block structure). */
+  showLineNumbers?: boolean;
+  /** Pre-rendered HTML for each line when showLineNumbers is true. Lines are split by newline. */
+  codeHtml?: string;
+}) {
+  const headerLabel = title === "Output" ? "Output" : title.charAt(0).toUpperCase() + title.slice(1);
+  const wrapperClasses =
+    "fern-code fern-code-block bg-card-background border-card-border rounded-3 shadow-card-grayscale relative mb-6 mt-4 flex w-full min-w-0 max-w-full flex-col border first:mt-0";
+  const preStyle = {
+    backgroundColor: "rgb(255, 255, 255)",
+    ["--shiki-dark-bg" as string]: "#212121",
+    color: "rgb(36, 41, 46)",
+    ["--shiki-dark" as string]: "#EEFFFF",
+  };
+
+  const scrollAreaContent = () => {
+    if (codeHtml == null) return null;
+    const lines = codeHtml.split("\n");
+    return (
+      <div
+        dir="ltr"
+        className="fern-scroll-area"
+        style={{
+          position: "relative",
+          ["--radix-scroll-area-corner-width" as string]: "0px",
+          ["--radix-scroll-area-corner-height" as string]: "0px",
+        }}
+      >
+        <style dangerouslySetInnerHTML={{ __html: SCROLL_AREA_STYLE }} />
+        <div
+          data-radix-scroll-area-viewport=""
+          className="fern-scroll-area-viewport"
+          data-scrollbars="both"
+          style={{ overflow: "scroll", maxHeight: "479px" }}
+        >
+          <div style={{ minWidth: "100%", display: "table" }}>
+            <div className="code-block text-sm">
+              <div className="code-block-inner">
+                <table className="code-block-line-group">
+                  <colgroup>
+                    <col className="w-fit" />
+                    <col />
+                  </colgroup>
+                  <tbody>
+                    {lines.map((line, i) => (
+                      <tr key={i} className="code-block-line">
+                        <td className="code-block-line-gutter">
+                          <span>{i + 1}</span>
+                        </td>
+                        <td className="code-block-line-content">
+                          <span
+                            className="line"
+                            dangerouslySetInnerHTML={{
+                              __html: line || " ",
+                            }}
+                          />
+                        </td>
+                      </tr>
+                    ))}
+                  </tbody>
+                </table>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    );
+  };
+
+  const codeBlockContent = showLineNumbers ? scrollAreaContent() : children;
+  const isOutput = title === "Output";
+
+  return (
+    <div
+      className={`${wrapperClasses} ${className}`}
+      data-block-type={isOutput ? "output" : "code"}
+    >
+      <div className="fern-code-header fern-code-block-header bg-(color:--grayscale-a2) rounded-t-[inherit]">
+        <div className="fern-code-header-inner fern-code-block-header-inner shadow-border-default mx-px flex min-h-10 items-center justify-between shadow-[inset_0_-1px_0_0]">
+          <div className="fern-code-block-title flex min-h-10 overflow-x-auto">
+            <div className="flex items-center px-3 py-1.5">
+              <span className="fern-code-label fern-code-block-title-label text-(color:--grayscale-a11) rounded-1 text-sm font-semibold">
+                {headerLabel}
+              </span>
+            </div>
+          </div>
+          <div className="fern-code-actions fern-code-block-actions flex items-center gap-1">
+            <span className="inline-flex" role="button" aria-haspopup="dialog" aria-expanded="false" aria-label="Report incorrect code">
+              <button type="button" className={`${BUTTON_BASE_CLASS} fern-feedback-button z-20`} aria-label="Report incorrect code">
+                {FLAG_ICON}
+              </button>
+            </span>
+            {copyContent != null && (
+              <button
+                type="button"
+                className={`${BUTTON_BASE_CLASS} fern-copy-button group mr-1`}
+                aria-label="Copy code"
+                onClick={(e) => handleCopy(copyContent, e.currentTarget)}
+              >
+                <svg
+                  xmlns="http://www.w3.org/2000/svg"
+                  width="24"
+                  height="24"
+                  viewBox="0 0 24 24"
+                  fill="none"
+                  stroke="currentColor"
+                  strokeWidth="2"
+                  strokeLinecap="round"
+                  strokeLinejoin="round"
+                  aria-hidden
+                >
+                  <rect width="14" height="14" x="8" y="8" rx="2" ry="2" />
+                  <path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2" />
+                </svg>
+              </button>
+            )}
+          </div>
+        </div>
+      </div>
+      {asPre ? (
+        <pre
+          className="code-block-root not-prose fern-code-content fern-code-block-content rounded-b-[inherit]"
+          tabIndex={0}
+          style={preStyle}
+        >
+          {codeBlockContent}
+        </pre>
+      ) : (
+        <div className="code-block-root not-prose fern-code-content fern-code-block-content rounded-b-[inherit] notebook-viewer__output-content" tabIndex={0}>
+          {codeBlockContent}
+        </div>
+      )}
+    </div>
+  );
+}
+
+function renderCell(cell: NotebookCell, index: number, showOutputs: boolean) {
+  return (
+    <div
+      key={index}
+      className={`notebook-viewer__cell notebook-viewer__cell--${cell.type}`}
+    >
+      {cell.type === "markdown" ? (
+        <div
+          className="notebook-viewer__markdown fern-prose prose break-words prose-h1:mt-[1.5em] first:prose-h1:mt-0 max-w-full"
+          dangerouslySetInnerHTML={{ __html: renderMarkdown(cell.source) }}
+        />
+      ) : (
+        <>
+          <FernCodeBlock
+            title={cell.language || "python"}
+            copyContent={cell.source}
+            showLineNumbers
+            codeHtml={cell.source_html ?? escapeHtml(cell.source)}
+          >
+            <code
+              className={`language-${cell.language || "python"}`}
+              dangerouslySetInnerHTML={{
+                __html: cell.source_html ?? escapeHtml(cell.source),
+              }}
+            />
+          </FernCodeBlock>
+          {showOutputs && cell.outputs && cell.outputs.length > 0 && (
+            <FernCodeBlock title="Output" className="notebook-viewer__output-block" asPre={false}>
+              <div className="notebook-viewer__outputs-inner">
+                {cell.outputs.map((out, i) =>
+                  out.type === "image" ? (
+                    <img
+                      key={i}
+                      src={`data:image/png;base64,${out.data}`}
+                      alt="Output"
+                      className="notebook-viewer__output-image"
+                    />
+                  ) : out.format === "html" ? (
+                    <div
+                      key={i}
+                      className="notebook-viewer__output-html"
+                      dangerouslySetInnerHTML={{ __html: out.data }}
+                    />
+                  ) : (
+                    <pre
+                      key={i}
+                      className="notebook-viewer__output-text"
+                      dangerouslySetInnerHTML={{ __html: escapeHtml(out.data) }}
+                    />
+                  )
+                )}
+              </div>
+            </FernCodeBlock>
+          )}
+        </>
+      )}
+    </div>
+  );
+}
+
+export const NotebookViewer = ({
+  notebook,
+  colabUrl,
+  showOutputs = true,
+}: NotebookViewerProps) => {
+  if (notebook == null || typeof notebook !== "object") {
+    return (
+      <NotebookViewerError
+        message="Notebook data is missing or invalid"
+        detail={`Received: ${typeof notebook}. Run 'make generate-fern-notebooks' and ensure the import path is correct.`}
+      />
+    );
+  }
+
+  const cells = notebook?.cells;
+  if (!Array.isArray(cells)) {
+    return (
+      <NotebookViewerError
+        message="Notebook must have a 'cells' array"
+        detail={`Received keys: ${Object.keys(notebook).join(", ")}`}
+      />
+    );
+  }
+
+  return (
+    <div className="notebook-viewer">
+      {colabUrl && (
+        <div className="notebook-viewer__colab-banner">
+          <a
+            href={colabUrl}
+            target="_blank"
+            rel="noopener noreferrer"
+            className="fern-button success filled notebook-viewer__colab-link"
+          >
+            <span className="fern-button-content">
+              <span aria-hidden="true">&#9654;</span>
+              <span className="fern-button-text">Run in Google Colab</span>
+            </span>
+          </a>
+        </div>
+      )}
+
+      <div className="notebook-viewer__cells">
+        {cells.map((cell, index) => renderCell(cell, index, showOutputs))}
+      </div>
+    </div>
+  );
+};
diff --git a/fern/components/README.md b/fern/components/README.md
new file mode 100644
index 000000000..c8b9fa5de
--- /dev/null
+++ b/fern/components/README.md
@@ -0,0 +1,128 @@
+# Fern Components
+
+Custom React components for NeMo Data Designer docs. Fern loads them via `mdx-components` in `docs.yml`; use `@/components/...` imports in MDX.
+
+## Components
+
+### Authors
+
+Author byline with avatars for dev notes.
+
+**When to use:** Top of dev note pages.
+
+```mdx
+import { Authors } from "@/components/Authors";
+
+<Authors ids={["dcorneil", "etramel"]} />
+```
+
+**Data:** `devnotes/authors-data.ts` (synced with `devnotes/.authors.yml`). Add new authors there.
+
+---
+
+### MetricsTable
+
+Styled comparison table with optional best-value highlighting.
+
+**When to use:** Benchmark results, before/after comparisons.
+
+```mdx
+import { MetricsTable } from "@/components/MetricsTable";
+
+<MetricsTable
+  headers={["Data Blend", "Validation Loss", "MMLU-Pro"]}
+  rows={[
+    ["Baseline", "1.309", "36.99"],
+    ["with RQA (4%)", "1.256", "44.31"],
+  ]}
+  lowerIsBetter={[1]}
+/>
+```
+
+**Props:** `headers`, `rows`, `lowerIsBetter` (column indices), `higherIsBetter`.
+
+---
+
+### TrajectoryViewer
+
+Multi-turn research trajectories with tool calls (search, open, find, answer).
+
+**When to use:** Deep research / MCP tool-use dev notes.
+
+```mdx
+import { TrajectoryViewer } from "@/components/TrajectoryViewer";
+import trajectory from "@/components/devnotes/deep-research-trajectories/4hop-example";
+
+<TrajectoryViewer {...trajectory} defaultOpen />
+```
+
+**Data:** Define trajectory objects in `devnotes/<post-name>/` and import. See `4hop-example.ts` for shape.
+
+---
+
+### NotebookViewer
+
+Renders Jupyter notebook cells (markdown + code) with optional Colab badge.
+
+**When to use:** Tutorial pages. Data comes from `make generate-fern-notebooks` (see `fern/README.md`).
+
+```mdx
+import { NotebookViewer } from "@/components/NotebookViewer";
+import notebook from "@/components/notebooks/1-the-basics";
+
+<NotebookViewer
+  notebook={notebook}
+  colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/1-the-basics.ipynb"
+/>
+```
+
+**Props:** `notebook`, `colabUrl`, `showOutputs` (default true).
+
+---
+
+### ExpandableCode
+
+Collapsible code block with summary and copy button.
+
+**When to use:** Long code snippets you want collapsed by default (e.g. "Full source" in dev notes).
+
+```mdx
+import { ExpandableCode } from "@/components/ExpandableCode";
+
+<ExpandableCode
+  summary="Full source: openresearcher_demo.py"
+  code={`...`}
+  language="python"
+  defaultOpen={false}
+/>
+```
+
+---
+
+### CustomCard
+
+Simple card with title, text, and link. Alternative to Fern's built-in `Card` when you need custom styling.
+
+**When to use:** Tutorial overview or landing pages when `Card` doesn't fit.
+
+```mdx
+import { CustomCard } from "@/components/CustomCard";
+
+<CustomCard title="Title" text="Description" link="/path" sparkle />
+```
+
+---
+
+## Subdirectories
+
+| Path | Purpose |
+|------|---------|
+| `devnotes/` | Author data (`.authors.yml`, `authors-data.ts`), post-specific assets (e.g. `deep-research-trajectories/4hop-example.ts`) |
+| `notebooks/` | Generated notebook data (`*.json`, `*.ts`). Created by `make generate-fern-notebooks`. Do not edit by hand. |
+
+## Adding a New Component
+
+1. Create `ComponentName.tsx` in `components/`.
+2. Use Fern's automatic JSX runtime (no `import React`).
+3. Add corresponding CSS in `fern/styles/` and register in `docs.yml` under `css:`.
+4. Import in MDX: `import { ComponentName } from "@/components/ComponentName";`
diff --git a/fern/components/TrajectoryViewer.tsx b/fern/components/TrajectoryViewer.tsx
new file mode 100644
index 000000000..c40d20427
--- /dev/null
+++ b/fern/components/TrajectoryViewer.tsx
@@ -0,0 +1,139 @@
+/**
+ * TrajectoryViewer - Renders multi-turn research trajectories with tool calls.
+ *
+ * Displays search, open, find, and answer steps with color-coded styling.
+ * Used for deep research / MCP tool-use dev notes.
+ *
+ * NOTE: Fern's custom component pipeline uses the automatic JSX runtime.
+ * Do NOT import React -- the `react` module is not resolvable in Fern's build.
+ *
+ * Usage in MDX:
+ *   import { TrajectoryViewer } from "@/components/TrajectoryViewer";
+ *   import trajectory from "@/components/devnotes/deep-research-trajectories/4hop-example";
+ *
+ *   <TrajectoryViewer {...trajectory} defaultOpen />
+ */
+
+export interface ToolCall {
+  fn: "search" | "open" | "find" | "answer";
+  arg?: string;
+  body?: string;
+  isGolden?: boolean;
+}
+
+export interface TrajectoryTurn {
+  turnIndex: number;
+  calls: ToolCall[];
+}
+
+export interface TrajectoryViewerProps {
+  question: string;
+  referenceAnswer?: string;
+  goldenPassageHint?: string;
+  turns: TrajectoryTurn[];
+  summary?: string;
+  defaultOpen?: boolean;
+}
+
+const TOOL_ICONS: Record<string, string> = {
+  search: "🔍",
+  open: "📄",
+  find: "🔎",
+  answer: "✓",
+};
+
+function ToolCallBlock({ call }: { call: ToolCall }) {
+  const isAnswer = call.fn === "answer";
+  const argDisplay = call.arg ?? "";
+  const cn = `trajectory-viewer__call trajectory-viewer__call--${call.fn}`;
+  const icon = TOOL_ICONS[call.fn] ?? "";
+
+  if (isAnswer && call.body) {
+    return (
+      <div className={cn}>
+        <span className="trajectory-viewer__fn">
+          {icon && <span className="trajectory-viewer__icon">{icon}</span>}
+          {call.fn}
+        </span>
+        <div
+          className="trajectory-viewer__body"
+          dangerouslySetInnerHTML={{ __html: call.body }}
+        />
+      </div>
+    );
+  }
+
+  return (
+    <div className={cn}>
+      <span className="trajectory-viewer__fn">
+        {icon && <span className="trajectory-viewer__icon">{icon}</span>}
+        {call.fn}
+      </span>
+      <span className="trajectory-viewer__arg">
+        {argDisplay}
+        {call.isGolden && " ⭐"}
+      </span>
+    </div>
+  );
+}
+
+export const TrajectoryViewer = ({
+  question,
+  referenceAnswer,
+  goldenPassageHint,
+  turns,
+  summary,
+  defaultOpen = false,
+}: TrajectoryViewerProps) => {
+  const content = (
+    <div className="trajectory-viewer">
+      <div className="trajectory-viewer__question">
+        <strong>Q:</strong> {question}
+      </div>
+      {referenceAnswer && (
+        <div className="trajectory-viewer__ref">
+          <strong>Reference:</strong> {referenceAnswer}
+        </div>
+      )}
+      {goldenPassageHint && (
+        <div className="trajectory-viewer__hint">{goldenPassageHint}</div>
+      )}
+      <div className="trajectory-viewer__turns">
+        {turns.map((turn) => (
+          <div key={turn.turnIndex} className="trajectory-viewer__turn">
+            <div className="trajectory-viewer__label">T{turn.turnIndex}</div>
+            <div className="trajectory-viewer__body">
+              <div
+                className={`trajectory-viewer__group ${
+                  turn.calls.length > 1 ? "trajectory-viewer__group--multi" : ""
+                }`}
+              >
+                {turn.calls.map((call, i) => (
+                  <ToolCallBlock key={i} call={call} />
+                ))}
+              </div>
+            </div>
+          </div>
+        ))}
+      </div>
+    </div>
+  );
+
+  const totalCalls = turns.reduce((acc, t) => acc + t.calls.length, 0);
+
+  if (summary) {
+    return (
+      <details className="trajectory-viewer__details" open={defaultOpen}>
+        <summary className="trajectory-viewer__summary">
+          <strong>{summary}</strong>
+          <span className="trajectory-viewer__stats">
+            {turns.length} turns · {totalCalls} calls
+          </span>
+        </summary>
+        {content}
+      </details>
+    );
+  }
+
+  return content;
+};
diff --git a/fern/components/devnotes/.authors.yml b/fern/components/devnotes/.authors.yml
new file mode 100644
index 000000000..b2523cd8b
--- /dev/null
+++ b/fern/components/devnotes/.authors.yml
@@ -0,0 +1,17 @@
+authors:
+  nvidia:
+    name: NVIDIA NeMo Data Designer Team
+    description: NeMo Data Designer Core Team
+    avatar: https://avatars.githubusercontent.com/u/1728152?s=200&v=4
+  dcorneil:
+    name: Dane Corneil
+    description: Researcher at NVIDIA
+    avatar: https://avatars.githubusercontent.com/u/4678597?v=4
+  etramel:
+    name: Eric Tramel
+    description: Researcher at NVIDIA
+    avatar: https://avatars.githubusercontent.com/u/1223539?v=4
+  kthadaka:
+    name: Kirit Thadaka
+    description: Product Manager at NVIDIA
+    avatar: https://avatars.githubusercontent.com/u/8190572?v=4
diff --git a/fern/components/devnotes/authors-data.ts b/fern/components/devnotes/authors-data.ts
new file mode 100644
index 000000000..141841f2f
--- /dev/null
+++ b/fern/components/devnotes/authors-data.ts
@@ -0,0 +1,30 @@
+/** Author data - keep in sync with .authors.yml */
+
+export interface Author {
+  name: string;
+  description: string;
+  avatar: string;
+}
+
+export const authors: Record<string, Author> = {
+  nvidia: {
+    name: "NVIDIA NeMo Data Designer Team",
+    description: "NeMo Data Designer Core Team",
+    avatar: "https://avatars.githubusercontent.com/u/1728152?s=200&v=4",
+  },
+  dcorneil: {
+    name: "Dane Corneil",
+    description: "Researcher at NVIDIA",
+    avatar: "https://avatars.githubusercontent.com/u/4678597?v=4",
+  },
+  etramel: {
+    name: "Eric Tramel",
+    description: "Researcher at NVIDIA",
+    avatar: "https://avatars.githubusercontent.com/u/1223539?v=4",
+  },
+  kthadaka: {
+    name: "Kirit Thadaka",
+    description: "Product Manager at NVIDIA",
+    avatar: "https://avatars.githubusercontent.com/u/8190572?v=4",
+  },
+};
diff --git a/fern/components/devnotes/deep-research-trajectories/4hop-example.ts b/fern/components/devnotes/deep-research-trajectories/4hop-example.ts
new file mode 100644
index 000000000..73cc2c816
--- /dev/null
+++ b/fern/components/devnotes/deep-research-trajectories/4hop-example.ts
@@ -0,0 +1,190 @@
+/** 4-hop example trajectory for deep-research-trajectories dev note */
+
+import type { TrajectoryViewerProps } from "../../TrajectoryViewer";
+
+const turns = [
+  { turnIndex: 1, calls: [{ fn: "search" as const, arg: '"border war Libya 1st century BC"' }] },
+  {
+    turnIndex: 2,
+    calls: [
+      { fn: "search" as const, arg: '"ancient Egypt Libya war conflict 1st century BC"' },
+      { fn: "search" as const, arg: '"Cyrene Libya ancient war border"' },
+    ],
+  },
+  {
+    turnIndex: 3,
+    calls: [
+      { fn: "search" as const, arg: '"Ptolemaic Egypt Cyrenaica war conflict"' },
+      { fn: "search" as const, arg: '"Egypt border war ancient 1st century BC winner"' },
+    ],
+  },
+  { turnIndex: 4, calls: [{ fn: "open" as const, arg: "md_001400" }] },
+  {
+    turnIndex: 5,
+    calls: [
+      { fn: "search" as const, arg: '"Egypt Marmarica border war ancient Roman"' },
+      { fn: "search" as const, arg: '"Marmaric War Egypt Libya ancient"' },
+    ],
+  },
+  {
+    turnIndex: 6,
+    calls: [
+      { fn: "search" as const, arg: '"Greek colony Libya border war Cyrene colonization"' },
+      { fn: "search" as const, arg: '"Thera Cyrene colony Greek colonization ancient"' },
+    ],
+  },
+  { turnIndex: 7, calls: [{ fn: "open" as const, arg: "md_000167" }] },
+  {
+    turnIndex: 8,
+    calls: [
+      { fn: "search" as const, arg: '"1st century BC war border Libya Egypt conflict"' },
+      { fn: "search" as const, arg: '"Chad Libya border war Aouzou Strip conflict"' },
+    ],
+  },
+  { turnIndex: 9, calls: [{ fn: "open" as const, arg: "md_001676", isGolden: true }] },
+  {
+    turnIndex: 10,
+    calls: [
+      { fn: "search" as const, arg: '"Roman Empire war Libya Numidia Africa ancient"' },
+      { fn: "search" as const, arg: '"Jugurthine War Numidia Rome Libya"' },
+    ],
+  },
+  { turnIndex: 11, calls: [{ fn: "open" as const, arg: "fw_021746" }] },
+  {
+    turnIndex: 12,
+    calls: [
+      { fn: "search" as const, arg: '"Egypt ancient war tribes Libya Marmaridae border conflict"' },
+      { fn: "search" as const, arg: '"Cornelius Gallus Egypt prefect war border"' },
+    ],
+  },
+  { turnIndex: 13, calls: [{ fn: "open" as const, arg: "md_001054", isGolden: true }] },
+  {
+    turnIndex: 14,
+    calls: [
+      { fn: "search" as const, arg: '"Roman Empire greatest extent Trajan mother"' },
+      { fn: "search" as const, arg: '"Trajan Roman Empire greatest territorial extent"' },
+    ],
+  },
+  {
+    turnIndex: 15,
+    calls: [
+      { fn: "open" as const, arg: "md_002075", isGolden: true },
+      { fn: "open" as const, arg: "md_002510", isGolden: true },
+    ],
+  },
+  {
+    turnIndex: 16,
+    calls: [
+      { fn: "search" as const, arg: '"Egypt Roman province colony Augustus annexed"' },
+      { fn: "search" as const, arg: '"Trajan mother Marcia family"' },
+    ],
+  },
+  {
+    turnIndex: 17,
+    calls: [
+      { fn: "open" as const, arg: "md_002117" },
+      { fn: "search" as const, arg: '"Trajan mother Marcia parents father"' },
+    ],
+  },
+  {
+    turnIndex: 18,
+    calls: [
+      { fn: "search" as const, arg: '"Ulpia Marciana Trajan sister mother family"' },
+      { fn: "search" as const, arg: '"Egypt war Libya Marmaricae ancient Roman"' },
+    ],
+  },
+  {
+    turnIndex: 19,
+    calls: [
+      { fn: "search" as const, arg: '"Marcia Trajan Roman empress mother emperor"' },
+      { fn: "search" as const, arg: '"Egypt Marmaric war ancient Rome Cyrene 1st century"' },
+    ],
+  },
+  { turnIndex: 20, calls: [{ fn: "open" as const, arg: "md_000722" }] },
+  {
+    turnIndex: 21,
+    calls: [
+      { fn: "search" as const, arg: '"Ptolemaic Egypt border war Cyrene Cyrenaica territory"' },
+      { fn: "search" as const, arg: '"Egypt vs Libya ancient war Ptolemy Rome colony"' },
+    ],
+  },
+  {
+    turnIndex: 22,
+    calls: [
+      { fn: "search" as const, arg: '"Toyota War Chad Libya border winner conflict"' },
+      { fn: "search" as const, arg: '"Chad Aouzou Strip war Libya France colonized"' },
+    ],
+  },
+  {
+    turnIndex: 23,
+    calls: [
+      { fn: "search" as const, arg: '"1st century BC Egypt border war defeat victory Rome colonize"' },
+      { fn: "search" as const, arg: '"ancient Egypt war won Libya 1st century BC Rome province"' },
+    ],
+  },
+  { turnIndex: 24, calls: [{ fn: "open" as const, arg: "md_002076", isGolden: true }] },
+  {
+    turnIndex: 25,
+    calls: [
+      { fn: "search" as const, arg: '"French Empire greatest extent Napoleon mother"' },
+      { fn: "search" as const, arg: '"France Vichy border war Libya Chad colonial"' },
+    ],
+  },
+  {
+    turnIndex: 26,
+    calls: [
+      { fn: "search" as const, arg: '"Egypt war 25 BC 30 BC Marmaridae tribe Roman conquest"' },
+      { fn: "search" as const, arg: '"Cornelius Balbus Garamantes Libya war Roman conquest"' },
+    ],
+  },
+  {
+    turnIndex: 27,
+    calls: [
+      { fn: "search" as const, arg: '"ancient Crete war Libya border Greek colonize"' },
+      { fn: "search" as const, arg: '"Cyrene founded colonized by whom mother city Thera"' },
+    ],
+  },
+  {
+    turnIndex: 28,
+    calls: [
+      {
+        fn: "search" as const,
+        arg: '"Egypt Roman province colonized 1st century BC Trajan mother Marcia"',
+      },
+    ],
+  },
+  {
+    turnIndex: 29,
+    calls: [{ fn: "search" as const, arg: '"Marcia Trajan mother Roman emperor name Ulpius"' }],
+  },
+  {
+    turnIndex: 30,
+    calls: [
+      {
+        fn: "search" as const,
+        arg: '"Egypt war border Libya Cyrenaica defeated 1st century BC Roman province"',
+      },
+    ],
+  },
+  {
+    turnIndex: 31,
+    calls: [
+      {
+        fn: "answer" as const,
+        body: `Based on my extensive research, I have gathered the following evidence:<br><br>1. <strong>The Roman Empire reached its greatest territorial extent under Trajan</strong> (117 CE) — confirmed by multiple documents<br>2. <strong>Trajan's mother was from the Gens Marcia</strong> — the Trajan document states he "on his mother's side from the Gens Marcia, of an Italic family of Sabine origin"<br>3. <strong>Egypt was colonized/annexed by Rome</strong> after Cleopatra's death (30 BC), making it a Roman province<br>4. <strong>Egypt had border conflicts with Libya (ancient Marmaricae/Cyrenaica region)</strong> in the 1st century BC<br><br>The answer is <strong>Marcia</strong>.`,
+      },
+    ],
+  },
+];
+
+const trajectory: TrajectoryViewerProps = {
+  question:
+    "Who was the mother of the person under whom the colonizer of the winner of a border war with Libya in the 1st century BC reached its greatest extent?",
+  referenceAnswer: "Marcia",
+  goldenPassageHint: "⭐ = golden passage (contains evidence for the answer)",
+  turns,
+  summary: "Example trajectory: 4-hop question, 31 turns, 49 tool calls",
+  defaultOpen: true,
+};
+
+export default trajectory;
diff --git a/fern/components/notebooks/1-the-basics.json b/fern/components/notebooks/1-the-basics.json
new file mode 100644
index 000000000..0e1207933
--- /dev/null
+++ b/fern/components/notebooks/1-the-basics.json
@@ -0,0 +1,162 @@
+{
+  "cells": [
+    {
+      "type": "markdown",
+      "source": "# \ud83c\udfa8 Data Designer Tutorial: The Basics\n\n#### \ud83d\udcda What you'll learn\n\nThis notebook demonstrates the basics of Data Designer by generating a simple product review dataset."
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+    },
+    {
+      "type": "markdown",
+      "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+    },
+    {
+      "type": "code",
+      "source": "%%capture\n!pip install -U data-designer",
+      "language": "python",
+      "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+    },
+    {
+      "type": "code",
+      "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "import data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object responsible for managing the data generation process.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+    },
+    {
+      "type": "code",
+      "source": "data_designer = DataDesigner()",
+      "language": "python",
+      "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+    },
+    {
+      "type": "code",
+      "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\n# The model ID is from build.nvidia.com.\nMODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n\n# We choose this alias to be descriptive for our use case.\nMODEL_ALIAS = \"nemotron-nano-v3\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=1.0,\n            top_p=1.0,\n            max_tokens=2048,\n            extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n        ),\n    )\n]",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># The model ID is from build.nvidia.com.</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia/nemotron-3-nano-30b-a3b&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># We choose this alias to be descriptive for our use case.</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nemotron-nano-v3&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;chat_template_kwargs&quot;</span>: {<span style=\"color: #4070A0\">&quot;enable_thinking&quot;</span>: <span style=\"color: #007020; font-weight: bold\">False</span>}},\n        ),\n    )\n]\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+    },
+    {
+      "type": "code",
+      "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+      "language": "python",
+      "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \ud83c\udfb2 Getting started with sampler columns\n\n- Sampler columns offer non-LLM based generation of synthetic data.\n\n- They are particularly useful for **steering the diversity** of the generated data, as we demonstrate below.\n\n<br>\n\nYou can view available samplers using the config builder's `info` property:"
+    },
+    {
+      "type": "code",
+      "source": "config_builder.info.display(\"samplers\")",
+      "language": "python",
+      "source_html": "config_builder<span style=\"color: #666\">.</span>info<span style=\"color: #666\">.</span>display(<span style=\"color: #4070A0\">&quot;samplers&quot;</span>)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "Let's start designing our product review dataset by adding product category and subcategory columns."
+    },
+    {
+      "type": "code",
+      "source": "config_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_category\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"Electronics\",\n                \"Clothing\",\n                \"Home & Kitchen\",\n                \"Books\",\n                \"Home Office\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_subcategory\",\n        sampler_type=dd.SamplerType.SUBCATEGORY,\n        params=dd.SubcategorySamplerParams(\n            category=\"product_category\",\n            values={\n                \"Electronics\": [\n                    \"Smartphones\",\n                    \"Laptops\",\n                    \"Headphones\",\n                    \"Cameras\",\n                    \"Accessories\",\n                ],\n                \"Clothing\": [\n                    \"Men's Clothing\",\n                    \"Women's Clothing\",\n                    \"Winter Coats\",\n                    \"Activewear\",\n                    \"Accessories\",\n                ],\n                \"Home & Kitchen\": [\n                    \"Appliances\",\n                    \"Cookware\",\n                    \"Furniture\",\n                    \"Decor\",\n                    \"Organization\",\n                ],\n                \"Books\": [\n                    \"Fiction\",\n                    \"Non-Fiction\",\n                    \"Self-Help\",\n                    \"Textbooks\",\n                    \"Classics\",\n                ],\n                \"Home Office\": [\n                    \"Desks\",\n                    \"Chairs\",\n                    \"Storage\",\n                    \"Office Supplies\",\n                    \"Lighting\",\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"target_age_range\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n    )\n)\n\n# Optionally validate that the columns are configured correctly.\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_subcategory&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>SUBCATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SubcategorySamplerParams(\n            category<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n            values<span style=\"color: #666\">=</span>{\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Smartphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Laptops&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Headphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cameras&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Men&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Women&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Winter Coats&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Activewear&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Appliances&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cookware&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Furniture&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Decor&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Organization&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Non-Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Self-Help&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Textbooks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Classics&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Desks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Chairs&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Storage&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Office Supplies&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Lighting&quot;</span>,\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;target_age_range&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;18-25&quot;</span>, <span style=\"color: #4070A0\">&quot;25-35&quot;</span>, <span style=\"color: #4070A0\">&quot;35-50&quot;</span>, <span style=\"color: #4070A0\">&quot;50-65&quot;</span>, <span style=\"color: #4070A0\">&quot;65+&quot;</span>]),\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Optionally validate that the columns are configured correctly.</span>\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "Next, let's add samplers to generate data related to the customer and their review."
+    },
+    {
+      "type": "code",
+      "source": "config_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"customer\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(age_range=[18, 70], locale=\"en_US\"),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"number_of_stars\",\n        sampler_type=dd.SamplerType.UNIFORM,\n        params=dd.UniformSamplerParams(low=1, high=5),\n        convert_to=\"int\",  # Convert the sampled float to an integer.\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"review_style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n            weights=[1, 2, 2, 1],\n        ),\n    )\n)\n\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(age_range<span style=\"color: #666\">=</span>[<span style=\"color: #40A070\">18</span>, <span style=\"color: #40A070\">70</span>], locale<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;en_US&quot;</span>),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;number_of_stars&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>UNIFORM,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>UniformSamplerParams(low<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1</span>, high<span style=\"color: #666\">=</span><span style=\"color: #40A070\">5</span>),\n        convert_to<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;int&quot;</span>,  <span style=\"color: #60A0B0; font-style: italic\"># Convert the sampled float to an integer.</span>\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;review_style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;rambling&quot;</span>, <span style=\"color: #4070A0\">&quot;brief&quot;</span>, <span style=\"color: #4070A0\">&quot;detailed&quot;</span>, <span style=\"color: #4070A0\">&quot;structured with bullet points&quot;</span>],\n            weights<span style=\"color: #666\">=</span>[<span style=\"color: #40A070\">1</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">1</span>],\n        ),\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \ud83e\udd9c LLM-generated columns\n\n- The real power of Data Designer comes from leveraging LLMs to generate text, code, and structured data.\n\n- When prompting the LLM, we can use Jinja templating to reference other columns in the dataset.\n\n- As we see below, nested json fields can be accessed using dot notation."
+    },
+    {
+      "type": "code",
+      "source": "config_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"product_name\",\n        prompt=(\n            \"You are a helpful assistant that generates product names. DO NOT add quotes around the product name.\\n\\n\"\n            \"Come up with a creative product name for a product in the '{{ product_category }}' category, focusing \"\n            \"on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n            \"{{ target_age_range }} years old. Respond with only the product name, no other text.\"\n        ),\n        model_alias=MODEL_ALIAS,\n    )\n)\n\nconfig_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"customer_review\",\n        prompt=(\n            \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n            \"You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. \"\n            \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n            \"The style of the review should be '{{ review_style }}'. \"\n            \"Respond with only the review, no other text.\"\n        ),\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_name&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;You are a helpful assistant that generates product names. DO NOT add quotes around the product name.</span><span style=\"color: #4070A0; font-weight: bold\">\\n\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Come up with a creative product name for a product in the &#39;{{ product_category }}&#39; category, focusing &quot;</span>\n            <span style=\"color: #4070A0\">&quot;on products related to &#39;{{ product_subcategory }}&#39;. The target age range of the ideal customer is &quot;</span>\n            <span style=\"color: #4070A0\">&quot;{{ target_age_range }} years old. Respond with only the product name, no other text.&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_review&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;The style of the review should be &#39;{{ review_style }}&#39;. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Respond with only the review, no other text.&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd01 Iteration is key \u2013\u00a0preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+    },
+    {
+      "type": "code",
+      "source": "preview = data_designer.preview(config_builder, num_records=2)",
+      "language": "python",
+      "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+    },
+    {
+      "type": "code",
+      "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+    },
+    {
+      "type": "code",
+      "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+    },
+    {
+      "type": "code",
+      "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-1\")",
+      "language": "python",
+      "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-1&quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \u23ed\ufe0f Next Steps\n\nNow that you've seen the basics of Data Designer, check out the following notebooks to learn more about:\n\n- [Structured outputs and jinja expressions](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/2-structured-outputs-and-jinja-expressions/)\n\n- [Seeding synthetic data generation with an external dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/fern/components/notebooks/1-the-basics.ts b/fern/components/notebooks/1-the-basics.ts
new file mode 100644
index 000000000..005ce816f
--- /dev/null
+++ b/fern/components/notebooks/1-the-basics.ts
@@ -0,0 +1,161 @@
+/** Auto-generated by ipynb-to-fern-json.py - do not edit */
+export default { cells: [
+  {
+    "type": "markdown",
+    "source": "# \ud83c\udfa8 Data Designer Tutorial: The Basics\n\n#### \ud83d\udcda What you'll learn\n\nThis notebook demonstrates the basics of Data Designer by generating a simple product review dataset."
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+  },
+  {
+    "type": "markdown",
+    "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+  },
+  {
+    "type": "code",
+    "source": "%%capture\n!pip install -U data-designer",
+    "language": "python",
+    "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+  },
+  {
+    "type": "code",
+    "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "import data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object responsible for managing the data generation process.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+  },
+  {
+    "type": "code",
+    "source": "data_designer = DataDesigner()",
+    "language": "python",
+    "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+  },
+  {
+    "type": "code",
+    "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\n# The model ID is from build.nvidia.com.\nMODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n\n# We choose this alias to be descriptive for our use case.\nMODEL_ALIAS = \"nemotron-nano-v3\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=1.0,\n            top_p=1.0,\n            max_tokens=2048,\n            extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n        ),\n    )\n]",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># The model ID is from build.nvidia.com.</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia/nemotron-3-nano-30b-a3b&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># We choose this alias to be descriptive for our use case.</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nemotron-nano-v3&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;chat_template_kwargs&quot;</span>: {<span style=\"color: #4070A0\">&quot;enable_thinking&quot;</span>: <span style=\"color: #007020; font-weight: bold\">False</span>}},\n        ),\n    )\n]\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+  },
+  {
+    "type": "code",
+    "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+    "language": "python",
+    "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \ud83c\udfb2 Getting started with sampler columns\n\n- Sampler columns offer non-LLM based generation of synthetic data.\n\n- They are particularly useful for **steering the diversity** of the generated data, as we demonstrate below.\n\n<br>\n\nYou can view available samplers using the config builder's `info` property:"
+  },
+  {
+    "type": "code",
+    "source": "config_builder.info.display(\"samplers\")",
+    "language": "python",
+    "source_html": "config_builder<span style=\"color: #666\">.</span>info<span style=\"color: #666\">.</span>display(<span style=\"color: #4070A0\">&quot;samplers&quot;</span>)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "Let's start designing our product review dataset by adding product category and subcategory columns."
+  },
+  {
+    "type": "code",
+    "source": "config_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_category\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"Electronics\",\n                \"Clothing\",\n                \"Home & Kitchen\",\n                \"Books\",\n                \"Home Office\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_subcategory\",\n        sampler_type=dd.SamplerType.SUBCATEGORY,\n        params=dd.SubcategorySamplerParams(\n            category=\"product_category\",\n            values={\n                \"Electronics\": [\n                    \"Smartphones\",\n                    \"Laptops\",\n                    \"Headphones\",\n                    \"Cameras\",\n                    \"Accessories\",\n                ],\n                \"Clothing\": [\n                    \"Men's Clothing\",\n                    \"Women's Clothing\",\n                    \"Winter Coats\",\n                    \"Activewear\",\n                    \"Accessories\",\n                ],\n                \"Home & Kitchen\": [\n                    \"Appliances\",\n                    \"Cookware\",\n                    \"Furniture\",\n                    \"Decor\",\n                    \"Organization\",\n                ],\n                \"Books\": [\n                    \"Fiction\",\n                    \"Non-Fiction\",\n                    \"Self-Help\",\n                    \"Textbooks\",\n                    \"Classics\",\n                ],\n                \"Home Office\": [\n                    \"Desks\",\n                    \"Chairs\",\n                    \"Storage\",\n                    \"Office Supplies\",\n                    \"Lighting\",\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"target_age_range\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n    )\n)\n\n# Optionally validate that the columns are configured correctly.\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_subcategory&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>SUBCATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SubcategorySamplerParams(\n            category<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n            values<span style=\"color: #666\">=</span>{\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Smartphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Laptops&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Headphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cameras&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Men&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Women&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Winter Coats&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Activewear&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Appliances&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cookware&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Furniture&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Decor&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Organization&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Non-Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Self-Help&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Textbooks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Classics&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Desks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Chairs&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Storage&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Office Supplies&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Lighting&quot;</span>,\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;target_age_range&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;18-25&quot;</span>, <span style=\"color: #4070A0\">&quot;25-35&quot;</span>, <span style=\"color: #4070A0\">&quot;35-50&quot;</span>, <span style=\"color: #4070A0\">&quot;50-65&quot;</span>, <span style=\"color: #4070A0\">&quot;65+&quot;</span>]),\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Optionally validate that the columns are configured correctly.</span>\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "Next, let's add samplers to generate data related to the customer and their review."
+  },
+  {
+    "type": "code",
+    "source": "config_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"customer\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(age_range=[18, 70], locale=\"en_US\"),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"number_of_stars\",\n        sampler_type=dd.SamplerType.UNIFORM,\n        params=dd.UniformSamplerParams(low=1, high=5),\n        convert_to=\"int\",  # Convert the sampled float to an integer.\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"review_style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n            weights=[1, 2, 2, 1],\n        ),\n    )\n)\n\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(age_range<span style=\"color: #666\">=</span>[<span style=\"color: #40A070\">18</span>, <span style=\"color: #40A070\">70</span>], locale<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;en_US&quot;</span>),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;number_of_stars&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>UNIFORM,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>UniformSamplerParams(low<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1</span>, high<span style=\"color: #666\">=</span><span style=\"color: #40A070\">5</span>),\n        convert_to<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;int&quot;</span>,  <span style=\"color: #60A0B0; font-style: italic\"># Convert the sampled float to an integer.</span>\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;review_style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;rambling&quot;</span>, <span style=\"color: #4070A0\">&quot;brief&quot;</span>, <span style=\"color: #4070A0\">&quot;detailed&quot;</span>, <span style=\"color: #4070A0\">&quot;structured with bullet points&quot;</span>],\n            weights<span style=\"color: #666\">=</span>[<span style=\"color: #40A070\">1</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">1</span>],\n        ),\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \ud83e\udd9c LLM-generated columns\n\n- The real power of Data Designer comes from leveraging LLMs to generate text, code, and structured data.\n\n- When prompting the LLM, we can use Jinja templating to reference other columns in the dataset.\n\n- As we see below, nested json fields can be accessed using dot notation."
+  },
+  {
+    "type": "code",
+    "source": "config_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"product_name\",\n        prompt=(\n            \"You are a helpful assistant that generates product names. DO NOT add quotes around the product name.\\n\\n\"\n            \"Come up with a creative product name for a product in the '{{ product_category }}' category, focusing \"\n            \"on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n            \"{{ target_age_range }} years old. Respond with only the product name, no other text.\"\n        ),\n        model_alias=MODEL_ALIAS,\n    )\n)\n\nconfig_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"customer_review\",\n        prompt=(\n            \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n            \"You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. \"\n            \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n            \"The style of the review should be '{{ review_style }}'. \"\n            \"Respond with only the review, no other text.\"\n        ),\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_name&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;You are a helpful assistant that generates product names. DO NOT add quotes around the product name.</span><span style=\"color: #4070A0; font-weight: bold\">\\n\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Come up with a creative product name for a product in the &#39;{{ product_category }}&#39; category, focusing &quot;</span>\n            <span style=\"color: #4070A0\">&quot;on products related to &#39;{{ product_subcategory }}&#39;. The target age range of the ideal customer is &quot;</span>\n            <span style=\"color: #4070A0\">&quot;{{ target_age_range }} years old. Respond with only the product name, no other text.&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_review&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;The style of the review should be &#39;{{ review_style }}&#39;. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Respond with only the review, no other text.&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd01 Iteration is key \u2013\u00a0preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+  },
+  {
+    "type": "code",
+    "source": "preview = data_designer.preview(config_builder, num_records=2)",
+    "language": "python",
+    "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+  },
+  {
+    "type": "code",
+    "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+  },
+  {
+    "type": "code",
+    "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+  },
+  {
+    "type": "code",
+    "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-1\")",
+    "language": "python",
+    "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-1&quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \u23ed\ufe0f Next Steps\n\nNow that you've seen the basics of Data Designer, check out the following notebooks to learn more about:\n\n- [Structured outputs and jinja expressions](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/2-structured-outputs-and-jinja-expressions/)\n\n- [Seeding synthetic data generation with an external dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)"
+  }
+] };
diff --git a/fern/components/notebooks/2-structured-outputs-and-jinja-expressions.json b/fern/components/notebooks/2-structured-outputs-and-jinja-expressions.json
new file mode 100644
index 000000000..f44f3a3c5
--- /dev/null
+++ b/fern/components/notebooks/2-structured-outputs-and-jinja-expressions.json
@@ -0,0 +1,152 @@
+{
+  "cells": [
+    {
+      "type": "markdown",
+      "source": "# \ud83c\udfa8 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n\n#### \ud83d\udcda What you'll learn\n\nIn this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+    },
+    {
+      "type": "markdown",
+      "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+    },
+    {
+      "type": "code",
+      "source": "%%capture\n!pip install -U data-designer",
+      "language": "python",
+      "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+    },
+    {
+      "type": "code",
+      "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "import data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object that is used to interface with the library.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+    },
+    {
+      "type": "code",
+      "source": "data_designer = DataDesigner()",
+      "language": "python",
+      "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+    },
+    {
+      "type": "code",
+      "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\n# The model ID is from build.nvidia.com.\nMODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n\n# We choose this alias to be descriptive for our use case.\nMODEL_ALIAS = \"nemotron-nano-v3\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=1.0,\n            top_p=1.0,\n            max_tokens=2048,\n            extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n        ),\n    )\n]",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># The model ID is from build.nvidia.com.</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia/nemotron-3-nano-30b-a3b&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># We choose this alias to be descriptive for our use case.</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nemotron-nano-v3&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;chat_template_kwargs&quot;</span>: {<span style=\"color: #4070A0\">&quot;enable_thinking&quot;</span>: <span style=\"color: #007020; font-weight: bold\">False</span>}},\n        ),\n    )\n]\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+    },
+    {
+      "type": "code",
+      "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+      "language": "python",
+      "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83e\uddd1\u200d\ud83c\udfa8 Designing our data\n\n- We will again create a product review dataset, but this time we will use structured outputs and Jinja expressions.\n\n- Structured outputs let you specify the exact schema of the data you want to generate.\n\n- Data Designer supports schemas specified using either json schema or Pydantic data models (recommended).\n\n<br>\n\nWe'll define our structured outputs using [Pydantic](https://docs.pydantic.dev/latest/) data models\n\n> \ud83d\udca1 **Why Pydantic?**\n>\n> - Pydantic models provide better IDE support and type validation.\n>\n> - They are more Pythonic than raw JSON schemas.\n>\n> - They integrate seamlessly with Data Designer's structured output system."
+    },
+    {
+      "type": "code",
+      "source": "from decimal import Decimal\nfrom typing import Literal\n\nfrom pydantic import BaseModel, Field\n\n\n# We define a Product schema so that the name, description, and price are generated\n# in one go, with the types and constraints specified.\nclass Product(BaseModel):\n    name: str = Field(description=\"The name of the product\")\n    description: str = Field(description=\"A description of the product\")\n    price: Decimal = Field(description=\"The price of the product\", ge=10, le=1000, decimal_places=2)\n\n\nclass ProductReview(BaseModel):\n    rating: int = Field(description=\"The rating of the product\", ge=1, le=5)\n    customer_mood: Literal[\"irritated\", \"mad\", \"happy\", \"neutral\", \"excited\"] = Field(\n        description=\"The mood of the customer\"\n    )\n    review: str = Field(description=\"A review of the product\")",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">decimal</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Decimal\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">typing</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Literal\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pydantic</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> BaseModel, Field\n\n\n<span style=\"color: #60A0B0; font-style: italic\"># We define a Product schema so that the name, description, and price are generated</span>\n<span style=\"color: #60A0B0; font-style: italic\"># in one go, with the types and constraints specified.</span>\n<span style=\"color: #007020; font-weight: bold\">class</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">Product</span>(BaseModel):\n    name: <span style=\"color: #007020\">str</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The name of the product&quot;</span>)\n    description: <span style=\"color: #007020\">str</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;A description of the product&quot;</span>)\n    price: Decimal <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The price of the product&quot;</span>, ge<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, le<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1000</span>, decimal_places<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n\n\n<span style=\"color: #007020; font-weight: bold\">class</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">ProductReview</span>(BaseModel):\n    rating: <span style=\"color: #007020\">int</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The rating of the product&quot;</span>, ge<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1</span>, le<span style=\"color: #666\">=</span><span style=\"color: #40A070\">5</span>)\n    customer_mood: Literal[<span style=\"color: #4070A0\">&quot;irritated&quot;</span>, <span style=\"color: #4070A0\">&quot;mad&quot;</span>, <span style=\"color: #4070A0\">&quot;happy&quot;</span>, <span style=\"color: #4070A0\">&quot;neutral&quot;</span>, <span style=\"color: #4070A0\">&quot;excited&quot;</span>] <span style=\"color: #666\">=</span> Field(\n        description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The mood of the customer&quot;</span>\n    )\n    review: <span style=\"color: #007020\">str</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;A review of the product&quot;</span>)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "Next, let's design our product review dataset using a few more tricks compared to the previous notebook."
+    },
+    {
+      "type": "code",
+      "source": "# Since we often only want a few attributes from Person objects, we can\n# set drop=True in the column config to drop the column from the final dataset.\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"customer\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(),\n        drop=True,\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_category\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"Electronics\",\n                \"Clothing\",\n                \"Home & Kitchen\",\n                \"Books\",\n                \"Home Office\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_subcategory\",\n        sampler_type=dd.SamplerType.SUBCATEGORY,\n        params=dd.SubcategorySamplerParams(\n            category=\"product_category\",\n            values={\n                \"Electronics\": [\n                    \"Smartphones\",\n                    \"Laptops\",\n                    \"Headphones\",\n                    \"Cameras\",\n                    \"Accessories\",\n                ],\n                \"Clothing\": [\n                    \"Men's Clothing\",\n                    \"Women's Clothing\",\n                    \"Winter Coats\",\n                    \"Activewear\",\n                    \"Accessories\",\n                ],\n                \"Home & Kitchen\": [\n                    \"Appliances\",\n                    \"Cookware\",\n                    \"Furniture\",\n                    \"Decor\",\n                    \"Organization\",\n                ],\n                \"Books\": [\n                    \"Fiction\",\n                    \"Non-Fiction\",\n                    \"Self-Help\",\n                    \"Textbooks\",\n                    \"Classics\",\n                ],\n                \"Home Office\": [\n                    \"Desks\",\n                    \"Chairs\",\n                    \"Storage\",\n                    \"Office Supplies\",\n                    \"Lighting\",\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"target_age_range\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n    )\n)\n\n# Sampler columns support conditional params, which are used if the condition is met.\n# In this example, we set the review style to rambling if the target age range is 18-25.\n# Note conditional parameters are only supported for Sampler column types.\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"review_style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n            weights=[1, 2, 2, 1],\n        ),\n        conditional_params={\n            \"target_age_range == '18-25'\": dd.CategorySamplerParams(values=[\"rambling\"]),\n        },\n    )\n)\n\n# Optionally validate that the columns are configured correctly.\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Since we often only want a few attributes from Person objects, we can</span>\n<span style=\"color: #60A0B0; font-style: italic\"># set drop=True in the column config to drop the column from the final dataset.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(),\n        drop<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>,\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_subcategory&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>SUBCATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SubcategorySamplerParams(\n            category<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n            values<span style=\"color: #666\">=</span>{\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Smartphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Laptops&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Headphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cameras&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Men&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Women&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Winter Coats&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Activewear&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Appliances&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cookware&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Furniture&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Decor&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Organization&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Non-Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Self-Help&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Textbooks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Classics&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Desks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Chairs&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Storage&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Office Supplies&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Lighting&quot;</span>,\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;target_age_range&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;18-25&quot;</span>, <span style=\"color: #4070A0\">&quot;25-35&quot;</span>, <span style=\"color: #4070A0\">&quot;35-50&quot;</span>, <span style=\"color: #4070A0\">&quot;50-65&quot;</span>, <span style=\"color: #4070A0\">&quot;65+&quot;</span>]),\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Sampler columns support conditional params, which are used if the condition is met.</span>\n<span style=\"color: #60A0B0; font-style: italic\"># In this example, we set the review style to rambling if the target age range is 18-25.</span>\n<span style=\"color: #60A0B0; font-style: italic\"># Note conditional parameters are only supported for Sampler column types.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;review_style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;rambling&quot;</span>, <span style=\"color: #4070A0\">&quot;brief&quot;</span>, <span style=\"color: #4070A0\">&quot;detailed&quot;</span>, <span style=\"color: #4070A0\">&quot;structured with bullet points&quot;</span>],\n            weights<span style=\"color: #666\">=</span>[<span style=\"color: #40A070\">1</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">1</span>],\n        ),\n        conditional_params<span style=\"color: #666\">=</span>{\n            <span style=\"color: #4070A0\">&quot;target_age_range == &#39;18-25&#39;&quot;</span>: dd<span style=\"color: #666\">.</span>CategorySamplerParams(values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;rambling&quot;</span>]),\n        },\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Optionally validate that the columns are configured correctly.</span>\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "Next, we will use more advanced Jinja expressions to create new columns.\n\nJinja expressions let you:\n\n- Access nested attributes: `{{ customer.first_name }}`\n\n- Combine values: `{{ customer.first_name }} {{ customer.last_name }}`\n\n- Use conditional logic: `{% if condition %}...{% endif %}`"
+    },
+    {
+      "type": "code",
+      "source": "# We can create new columns using Jinja expressions that reference\n# existing columns, including attributes of nested objects.\nconfig_builder.add_column(\n    dd.ExpressionColumnConfig(name=\"customer_name\", expr=\"{{ customer.first_name }} {{ customer.last_name }}\")\n)\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"customer_age\", expr=\"{{ customer.age }}\"))\n\nconfig_builder.add_column(\n    dd.LLMStructuredColumnConfig(\n        name=\"product\",\n        prompt=(\n            \"Create a product in the '{{ product_category }}' category, focusing on products  \"\n            \"related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n            \"{{ target_age_range }} years old. The product should be priced between $10 and $1000.\"\n        ),\n        output_format=Product,\n        model_alias=MODEL_ALIAS,\n    )\n)\n\n# We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.\nconfig_builder.add_column(\n    dd.LLMStructuredColumnConfig(\n        name=\"customer_review\",\n        prompt=(\n            \"Your task is to write a review for the following product:\\n\\n\"\n            \"Product Name: {{ product.name }}\\n\"\n            \"Product Description: {{ product.description }}\\n\"\n            \"Price: {{ product.price }}\\n\\n\"\n            \"Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. \"\n            \"Write the review in a style that is '{{ review_style }}'.\"\n            \"{% if target_age_range == '18-25' %}\"\n            \"Make sure the review is more informal and conversational.\\n\"\n            \"{% else %}\"\n            \"Make sure the review is more formal and structured.\\n\"\n            \"{% endif %}\"\n            \"The review field should contain only the review, no other text.\"\n        ),\n        output_format=ProductReview,\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># We can create new columns using Jinja expressions that reference</span>\n<span style=\"color: #60A0B0; font-style: italic\"># existing columns, including attributes of nested objects.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_name&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ customer.first_name }} {{ customer.last_name }}&quot;</span>)\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_age&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ customer.age }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMStructuredColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Create a product in the &#39;{{ product_category }}&#39; category, focusing on products  &quot;</span>\n            <span style=\"color: #4070A0\">&quot;related to &#39;{{ product_subcategory }}&#39;. The target age range of the ideal customer is &quot;</span>\n            <span style=\"color: #4070A0\">&quot;{{ target_age_range }} years old. The product should be priced between $10 and $1000.&quot;</span>\n        ),\n        output_format<span style=\"color: #666\">=</span>Product,\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMStructuredColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_review&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Your task is to write a review for the following product:</span><span style=\"color: #4070A0; font-weight: bold\">\\n\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Product Name: {{ product.name }}</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Product Description: {{ product.description }}</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Price: {{ product.price }}</span><span style=\"color: #4070A0; font-weight: bold\">\\n\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Write the review in a style that is &#39;{{ review_style }}&#39;.&quot;</span>\n            <span style=\"color: #4070A0\">&quot;{</span><span style=\"color: #70A0D0; font-style: italic\">% i</span><span style=\"color: #4070A0\">f target_age_range == &#39;18-25&#39; %}&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Make sure the review is more informal and conversational.</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;{</span><span style=\"color: #70A0D0; font-style: italic\">% e</span><span style=\"color: #4070A0\">lse %}&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Make sure the review is more formal and structured.</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;{</span><span style=\"color: #70A0D0; font-style: italic\">% e</span><span style=\"color: #4070A0\">ndif %}&quot;</span>\n            <span style=\"color: #4070A0\">&quot;The review field should contain only the review, no other text.&quot;</span>\n        ),\n        output_format<span style=\"color: #666\">=</span>ProductReview,\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd01 Iteration is key \u2013\u00a0preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+    },
+    {
+      "type": "code",
+      "source": "preview = data_designer.preview(config_builder, num_records=2)",
+      "language": "python",
+      "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+    },
+    {
+      "type": "code",
+      "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+    },
+    {
+      "type": "code",
+      "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+    },
+    {
+      "type": "code",
+      "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-2\")",
+      "language": "python",
+      "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-2&quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \u23ed\ufe0f Next Steps\n\nCheck out the following notebook to learn more about:\n\n- [Seeding synthetic data generation with an external dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/fern/components/notebooks/2-structured-outputs-and-jinja-expressions.ts b/fern/components/notebooks/2-structured-outputs-and-jinja-expressions.ts
new file mode 100644
index 000000000..24d63ea5d
--- /dev/null
+++ b/fern/components/notebooks/2-structured-outputs-and-jinja-expressions.ts
@@ -0,0 +1,151 @@
+/** Auto-generated by ipynb-to-fern-json.py - do not edit */
+export default { cells: [
+  {
+    "type": "markdown",
+    "source": "# \ud83c\udfa8 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n\n#### \ud83d\udcda What you'll learn\n\nIn this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+  },
+  {
+    "type": "markdown",
+    "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+  },
+  {
+    "type": "code",
+    "source": "%%capture\n!pip install -U data-designer",
+    "language": "python",
+    "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+  },
+  {
+    "type": "code",
+    "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "import data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object that is used to interface with the library.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+  },
+  {
+    "type": "code",
+    "source": "data_designer = DataDesigner()",
+    "language": "python",
+    "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+  },
+  {
+    "type": "code",
+    "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\n# The model ID is from build.nvidia.com.\nMODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n\n# We choose this alias to be descriptive for our use case.\nMODEL_ALIAS = \"nemotron-nano-v3\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=1.0,\n            top_p=1.0,\n            max_tokens=2048,\n            extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n        ),\n    )\n]",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># The model ID is from build.nvidia.com.</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia/nemotron-3-nano-30b-a3b&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># We choose this alias to be descriptive for our use case.</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nemotron-nano-v3&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;chat_template_kwargs&quot;</span>: {<span style=\"color: #4070A0\">&quot;enable_thinking&quot;</span>: <span style=\"color: #007020; font-weight: bold\">False</span>}},\n        ),\n    )\n]\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+  },
+  {
+    "type": "code",
+    "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+    "language": "python",
+    "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83e\uddd1\u200d\ud83c\udfa8 Designing our data\n\n- We will again create a product review dataset, but this time we will use structured outputs and Jinja expressions.\n\n- Structured outputs let you specify the exact schema of the data you want to generate.\n\n- Data Designer supports schemas specified using either json schema or Pydantic data models (recommended).\n\n<br>\n\nWe'll define our structured outputs using [Pydantic](https://docs.pydantic.dev/latest/) data models\n\n> \ud83d\udca1 **Why Pydantic?**\n>\n> - Pydantic models provide better IDE support and type validation.\n>\n> - They are more Pythonic than raw JSON schemas.\n>\n> - They integrate seamlessly with Data Designer's structured output system."
+  },
+  {
+    "type": "code",
+    "source": "from decimal import Decimal\nfrom typing import Literal\n\nfrom pydantic import BaseModel, Field\n\n\n# We define a Product schema so that the name, description, and price are generated\n# in one go, with the types and constraints specified.\nclass Product(BaseModel):\n    name: str = Field(description=\"The name of the product\")\n    description: str = Field(description=\"A description of the product\")\n    price: Decimal = Field(description=\"The price of the product\", ge=10, le=1000, decimal_places=2)\n\n\nclass ProductReview(BaseModel):\n    rating: int = Field(description=\"The rating of the product\", ge=1, le=5)\n    customer_mood: Literal[\"irritated\", \"mad\", \"happy\", \"neutral\", \"excited\"] = Field(\n        description=\"The mood of the customer\"\n    )\n    review: str = Field(description=\"A review of the product\")",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">decimal</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Decimal\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">typing</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Literal\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pydantic</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> BaseModel, Field\n\n\n<span style=\"color: #60A0B0; font-style: italic\"># We define a Product schema so that the name, description, and price are generated</span>\n<span style=\"color: #60A0B0; font-style: italic\"># in one go, with the types and constraints specified.</span>\n<span style=\"color: #007020; font-weight: bold\">class</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">Product</span>(BaseModel):\n    name: <span style=\"color: #007020\">str</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The name of the product&quot;</span>)\n    description: <span style=\"color: #007020\">str</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;A description of the product&quot;</span>)\n    price: Decimal <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The price of the product&quot;</span>, ge<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, le<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1000</span>, decimal_places<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n\n\n<span style=\"color: #007020; font-weight: bold\">class</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">ProductReview</span>(BaseModel):\n    rating: <span style=\"color: #007020\">int</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The rating of the product&quot;</span>, ge<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1</span>, le<span style=\"color: #666\">=</span><span style=\"color: #40A070\">5</span>)\n    customer_mood: Literal[<span style=\"color: #4070A0\">&quot;irritated&quot;</span>, <span style=\"color: #4070A0\">&quot;mad&quot;</span>, <span style=\"color: #4070A0\">&quot;happy&quot;</span>, <span style=\"color: #4070A0\">&quot;neutral&quot;</span>, <span style=\"color: #4070A0\">&quot;excited&quot;</span>] <span style=\"color: #666\">=</span> Field(\n        description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;The mood of the customer&quot;</span>\n    )\n    review: <span style=\"color: #007020\">str</span> <span style=\"color: #666\">=</span> Field(description<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;A review of the product&quot;</span>)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "Next, let's design our product review dataset using a few more tricks compared to the previous notebook."
+  },
+  {
+    "type": "code",
+    "source": "# Since we often only want a few attributes from Person objects, we can\n# set drop=True in the column config to drop the column from the final dataset.\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"customer\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(),\n        drop=True,\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_category\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"Electronics\",\n                \"Clothing\",\n                \"Home & Kitchen\",\n                \"Books\",\n                \"Home Office\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"product_subcategory\",\n        sampler_type=dd.SamplerType.SUBCATEGORY,\n        params=dd.SubcategorySamplerParams(\n            category=\"product_category\",\n            values={\n                \"Electronics\": [\n                    \"Smartphones\",\n                    \"Laptops\",\n                    \"Headphones\",\n                    \"Cameras\",\n                    \"Accessories\",\n                ],\n                \"Clothing\": [\n                    \"Men's Clothing\",\n                    \"Women's Clothing\",\n                    \"Winter Coats\",\n                    \"Activewear\",\n                    \"Accessories\",\n                ],\n                \"Home & Kitchen\": [\n                    \"Appliances\",\n                    \"Cookware\",\n                    \"Furniture\",\n                    \"Decor\",\n                    \"Organization\",\n                ],\n                \"Books\": [\n                    \"Fiction\",\n                    \"Non-Fiction\",\n                    \"Self-Help\",\n                    \"Textbooks\",\n                    \"Classics\",\n                ],\n                \"Home Office\": [\n                    \"Desks\",\n                    \"Chairs\",\n                    \"Storage\",\n                    \"Office Supplies\",\n                    \"Lighting\",\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"target_age_range\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n    )\n)\n\n# Sampler columns support conditional params, which are used if the condition is met.\n# In this example, we set the review style to rambling if the target age range is 18-25.\n# Note conditional parameters are only supported for Sampler column types.\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"review_style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n            weights=[1, 2, 2, 1],\n        ),\n        conditional_params={\n            \"target_age_range == '18-25'\": dd.CategorySamplerParams(values=[\"rambling\"]),\n        },\n    )\n)\n\n# Optionally validate that the columns are configured correctly.\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Since we often only want a few attributes from Person objects, we can</span>\n<span style=\"color: #60A0B0; font-style: italic\"># set drop=True in the column config to drop the column from the final dataset.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(),\n        drop<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>,\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_subcategory&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>SUBCATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SubcategorySamplerParams(\n            category<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product_category&quot;</span>,\n            values<span style=\"color: #666\">=</span>{\n                <span style=\"color: #4070A0\">&quot;Electronics&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Smartphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Laptops&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Headphones&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cameras&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Clothing&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Men&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Women&#39;s Clothing&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Winter Coats&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Activewear&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Accessories&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home &amp; Kitchen&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Appliances&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Cookware&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Furniture&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Decor&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Organization&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Books&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Non-Fiction&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Self-Help&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Textbooks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Classics&quot;</span>,\n                ],\n                <span style=\"color: #4070A0\">&quot;Home Office&quot;</span>: [\n                    <span style=\"color: #4070A0\">&quot;Desks&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Chairs&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Storage&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Office Supplies&quot;</span>,\n                    <span style=\"color: #4070A0\">&quot;Lighting&quot;</span>,\n                ],\n            },\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;target_age_range&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;18-25&quot;</span>, <span style=\"color: #4070A0\">&quot;25-35&quot;</span>, <span style=\"color: #4070A0\">&quot;35-50&quot;</span>, <span style=\"color: #4070A0\">&quot;50-65&quot;</span>, <span style=\"color: #4070A0\">&quot;65+&quot;</span>]),\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Sampler columns support conditional params, which are used if the condition is met.</span>\n<span style=\"color: #60A0B0; font-style: italic\"># In this example, we set the review style to rambling if the target age range is 18-25.</span>\n<span style=\"color: #60A0B0; font-style: italic\"># Note conditional parameters are only supported for Sampler column types.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;review_style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;rambling&quot;</span>, <span style=\"color: #4070A0\">&quot;brief&quot;</span>, <span style=\"color: #4070A0\">&quot;detailed&quot;</span>, <span style=\"color: #4070A0\">&quot;structured with bullet points&quot;</span>],\n            weights<span style=\"color: #666\">=</span>[<span style=\"color: #40A070\">1</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">2</span>, <span style=\"color: #40A070\">1</span>],\n        ),\n        conditional_params<span style=\"color: #666\">=</span>{\n            <span style=\"color: #4070A0\">&quot;target_age_range == &#39;18-25&#39;&quot;</span>: dd<span style=\"color: #666\">.</span>CategorySamplerParams(values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;rambling&quot;</span>]),\n        },\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Optionally validate that the columns are configured correctly.</span>\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "Next, we will use more advanced Jinja expressions to create new columns.\n\nJinja expressions let you:\n\n- Access nested attributes: `{{ customer.first_name }}`\n\n- Combine values: `{{ customer.first_name }} {{ customer.last_name }}`\n\n- Use conditional logic: `{% if condition %}...{% endif %}`"
+  },
+  {
+    "type": "code",
+    "source": "# We can create new columns using Jinja expressions that reference\n# existing columns, including attributes of nested objects.\nconfig_builder.add_column(\n    dd.ExpressionColumnConfig(name=\"customer_name\", expr=\"{{ customer.first_name }} {{ customer.last_name }}\")\n)\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"customer_age\", expr=\"{{ customer.age }}\"))\n\nconfig_builder.add_column(\n    dd.LLMStructuredColumnConfig(\n        name=\"product\",\n        prompt=(\n            \"Create a product in the '{{ product_category }}' category, focusing on products  \"\n            \"related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n            \"{{ target_age_range }} years old. The product should be priced between $10 and $1000.\"\n        ),\n        output_format=Product,\n        model_alias=MODEL_ALIAS,\n    )\n)\n\n# We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.\nconfig_builder.add_column(\n    dd.LLMStructuredColumnConfig(\n        name=\"customer_review\",\n        prompt=(\n            \"Your task is to write a review for the following product:\\n\\n\"\n            \"Product Name: {{ product.name }}\\n\"\n            \"Product Description: {{ product.description }}\\n\"\n            \"Price: {{ product.price }}\\n\\n\"\n            \"Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. \"\n            \"Write the review in a style that is '{{ review_style }}'.\"\n            \"{% if target_age_range == '18-25' %}\"\n            \"Make sure the review is more informal and conversational.\\n\"\n            \"{% else %}\"\n            \"Make sure the review is more formal and structured.\\n\"\n            \"{% endif %}\"\n            \"The review field should contain only the review, no other text.\"\n        ),\n        output_format=ProductReview,\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># We can create new columns using Jinja expressions that reference</span>\n<span style=\"color: #60A0B0; font-style: italic\"># existing columns, including attributes of nested objects.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_name&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ customer.first_name }} {{ customer.last_name }}&quot;</span>)\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_age&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ customer.age }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMStructuredColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;product&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Create a product in the &#39;{{ product_category }}&#39; category, focusing on products  &quot;</span>\n            <span style=\"color: #4070A0\">&quot;related to &#39;{{ product_subcategory }}&#39;. The target age range of the ideal customer is &quot;</span>\n            <span style=\"color: #4070A0\">&quot;{{ target_age_range }} years old. The product should be priced between $10 and $1000.&quot;</span>\n        ),\n        output_format<span style=\"color: #666\">=</span>Product,\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMStructuredColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;customer_review&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Your task is to write a review for the following product:</span><span style=\"color: #4070A0; font-weight: bold\">\\n\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Product Name: {{ product.name }}</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Product Description: {{ product.description }}</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Price: {{ product.price }}</span><span style=\"color: #4070A0; font-weight: bold\">\\n\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Write the review in a style that is &#39;{{ review_style }}&#39;.&quot;</span>\n            <span style=\"color: #4070A0\">&quot;{</span><span style=\"color: #70A0D0; font-style: italic\">% i</span><span style=\"color: #4070A0\">f target_age_range == &#39;18-25&#39; %}&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Make sure the review is more informal and conversational.</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;{</span><span style=\"color: #70A0D0; font-style: italic\">% e</span><span style=\"color: #4070A0\">lse %}&quot;</span>\n            <span style=\"color: #4070A0\">&quot;Make sure the review is more formal and structured.</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">&quot;</span>\n            <span style=\"color: #4070A0\">&quot;{</span><span style=\"color: #70A0D0; font-style: italic\">% e</span><span style=\"color: #4070A0\">ndif %}&quot;</span>\n            <span style=\"color: #4070A0\">&quot;The review field should contain only the review, no other text.&quot;</span>\n        ),\n        output_format<span style=\"color: #666\">=</span>ProductReview,\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd01 Iteration is key \u2013\u00a0preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+  },
+  {
+    "type": "code",
+    "source": "preview = data_designer.preview(config_builder, num_records=2)",
+    "language": "python",
+    "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+  },
+  {
+    "type": "code",
+    "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+  },
+  {
+    "type": "code",
+    "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+  },
+  {
+    "type": "code",
+    "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-2\")",
+    "language": "python",
+    "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-2&quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \u23ed\ufe0f Next Steps\n\nCheck out the following notebook to learn more about:\n\n- [Seeding synthetic data generation with an external dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)"
+  }
+] };
diff --git a/fern/components/notebooks/3-seeding-with-a-dataset.json b/fern/components/notebooks/3-seeding-with-a-dataset.json
new file mode 100644
index 000000000..e538617e2
--- /dev/null
+++ b/fern/components/notebooks/3-seeding-with-a-dataset.json
@@ -0,0 +1,142 @@
+{
+  "cells": [
+    {
+      "type": "markdown",
+      "source": "# \ud83c\udfa8 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n\n#### \ud83d\udcda What you'll learn\n\nIn this notebook, we will demonstrate how to seed synthetic data generation in Data Designer with an external dataset.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+    },
+    {
+      "type": "markdown",
+      "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+    },
+    {
+      "type": "code",
+      "source": "%%capture\n!pip install -U data-designer",
+      "language": "python",
+      "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+    },
+    {
+      "type": "code",
+      "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "import data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object responsible for managing the data generation process.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+    },
+    {
+      "type": "code",
+      "source": "data_designer = DataDesigner()",
+      "language": "python",
+      "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+    },
+    {
+      "type": "code",
+      "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\n# The model ID is from build.nvidia.com.\nMODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n\n# We choose this alias to be descriptive for our use case.\nMODEL_ALIAS = \"nemotron-nano-v3\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=1.0,\n            top_p=1.0,\n            max_tokens=2048,\n            extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n        ),\n    )\n]",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># The model ID is from build.nvidia.com.</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia/nemotron-3-nano-30b-a3b&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># We choose this alias to be descriptive for our use case.</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nemotron-nano-v3&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;chat_template_kwargs&quot;</span>: {<span style=\"color: #4070A0\">&quot;enable_thinking&quot;</span>: <span style=\"color: #007020; font-weight: bold\">False</span>}},\n        ),\n    )\n]\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+    },
+    {
+      "type": "code",
+      "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+      "language": "python",
+      "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \ud83c\udfe5 Prepare a seed dataset\n\n- For this notebook, we'll create a synthetic dataset of patient notes.\n\n- We will _seed_ the generation process with a [symptom-to-diagnosis dataset](https://huggingface.co/datasets/gretelai/symptom_to_diagnosis).\n\n- We already have the dataset downloaded in the [data](../data) directory of this repository.\n\n<br>\n\n> \ud83c\udf31 **Why use a seed dataset?**\n>\n> - Seed datasets let you steer the generation process by providing context that is specific to your use case.\n>\n> - Seed datasets are also an excellent way to inject real-world diversity into your synthetic data.\n>\n> - During generation, prompt templates can reference any of the seed dataset fields."
+    },
+    {
+      "type": "code",
+      "source": "# Download sample dataset from Github\nimport urllib.request\n\nurl = \"https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv\"\nlocal_filename, _ = urllib.request.urlretrieve(url, \"gretelai_symptom_to_diagnosis.csv\")\n\n# Seed datasets are passed as reference objects to the config builder.\nseed_source = dd.LocalFileSeedSource(path=local_filename)\n\nconfig_builder.with_seed_dataset(seed_source)",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Download sample dataset from Github</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">urllib.request</span>\n\nurl <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv&quot;</span>\nlocal_filename, _ <span style=\"color: #666\">=</span> urllib<span style=\"color: #666\">.</span>request<span style=\"color: #666\">.</span>urlretrieve(url, <span style=\"color: #4070A0\">&quot;gretelai_symptom_to_diagnosis.csv&quot;</span>)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Seed datasets are passed as reference objects to the config builder.</span>\nseed_source <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>LocalFileSeedSource(path<span style=\"color: #666\">=</span>local_filename)\n\nconfig_builder<span style=\"color: #666\">.</span>with_seed_dataset(seed_source)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \ud83c\udfa8 Designing our synthetic patient notes dataset\n\n- The prompt template can reference fields from our seed dataset:\n  - `{{ diagnosis }}` - the medical diagnosis from the seed data\n  - `{{ patient_summary }}` - the symptom description from the seed data"
+    },
+    {
+      "type": "code",
+      "source": "config_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"patient_sampler\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"doctor_sampler\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"patient_id\",\n        sampler_type=dd.SamplerType.UUID,\n        params=dd.UUIDSamplerParams(\n            prefix=\"PT-\",\n            short_form=True,\n            uppercase=True,\n        ),\n    )\n)\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"first_name\", expr=\"{{ patient_sampler.first_name }}\"))\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"last_name\", expr=\"{{ patient_sampler.last_name }}\"))\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"dob\", expr=\"{{ patient_sampler.birth_date }}\"))\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"symptom_onset_date\",\n        sampler_type=dd.SamplerType.DATETIME,\n        params=dd.DatetimeSamplerParams(start=\"2024-01-01\", end=\"2024-12-31\"),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"date_of_visit\",\n        sampler_type=dd.SamplerType.TIMEDELTA,\n        params=dd.TimeDeltaSamplerParams(dt_min=1, dt_max=30, reference_column_name=\"symptom_onset_date\"),\n    )\n)\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"physician\", expr=\"Dr. {{ doctor_sampler.last_name }}\"))\n\nconfig_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"physician_notes\",\n        prompt=\"\"\"\\\nYou are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},\nwho has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.\nThe date of today's visit is {{ date_of_visit }}.\n\n{{ patient_summary }}\n\nWrite careful notes about your visit with {{ first_name }},\nas Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.\n\nFormat the notes as a busy doctor might.\nRespond with only the notes, no other text.\n\"\"\",\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;patient_sampler&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;doctor_sampler&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;patient_id&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>UUID,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>UUIDSamplerParams(\n            prefix<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;PT-&quot;</span>,\n            short_form<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>,\n            uppercase<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>,\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;first_name&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ patient_sampler.first_name }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;last_name&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ patient_sampler.last_name }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dob&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ patient_sampler.birth_date }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;symptom_onset_date&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>DATETIME,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>DatetimeSamplerParams(start<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;2024-01-01&quot;</span>, end<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;2024-12-31&quot;</span>),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;date_of_visit&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>TIMEDELTA,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>TimeDeltaSamplerParams(dt_min<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1</span>, dt_max<span style=\"color: #666\">=</span><span style=\"color: #40A070\">30</span>, reference_column_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;symptom_onset_date&quot;</span>),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;physician&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;Dr. {{ doctor_sampler.last_name }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;physician_notes&quot;</span>,\n        prompt<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;&quot;&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\</span>\n<span style=\"color: #4070A0\">You are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},</span>\n<span style=\"color: #4070A0\">who has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.</span>\n<span style=\"color: #4070A0\">The date of today&#39;s visit is {{ date_of_visit }}.</span>\n\n<span style=\"color: #4070A0\">{{ patient_summary }}</span>\n\n<span style=\"color: #4070A0\">Write careful notes about your visit with {{ first_name }},</span>\n<span style=\"color: #4070A0\">as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.</span>\n\n<span style=\"color: #4070A0\">Format the notes as a busy doctor might.</span>\n<span style=\"color: #4070A0\">Respond with only the notes, no other text.</span>\n<span style=\"color: #4070A0\">&quot;&quot;&quot;</span>,\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd01 Iteration is key \u2013\u00a0preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+    },
+    {
+      "type": "code",
+      "source": "preview = data_designer.preview(config_builder, num_records=2)",
+      "language": "python",
+      "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+    },
+    {
+      "type": "code",
+      "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+    },
+    {
+      "type": "code",
+      "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+    },
+    {
+      "type": "code",
+      "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-3\")",
+      "language": "python",
+      "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-3&quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \u23ed\ufe0f Next Steps\n\nCheck out the following notebook to learn more about:\n\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/fern/components/notebooks/3-seeding-with-a-dataset.ts b/fern/components/notebooks/3-seeding-with-a-dataset.ts
new file mode 100644
index 000000000..cae68d7d3
--- /dev/null
+++ b/fern/components/notebooks/3-seeding-with-a-dataset.ts
@@ -0,0 +1,141 @@
+/** Auto-generated by ipynb-to-fern-json.py - do not edit */
+export default { cells: [
+  {
+    "type": "markdown",
+    "source": "# \ud83c\udfa8 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n\n#### \ud83d\udcda What you'll learn\n\nIn this notebook, we will demonstrate how to seed synthetic data generation in Data Designer with an external dataset.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+  },
+  {
+    "type": "markdown",
+    "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+  },
+  {
+    "type": "code",
+    "source": "%%capture\n!pip install -U data-designer",
+    "language": "python",
+    "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+  },
+  {
+    "type": "code",
+    "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "import data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object responsible for managing the data generation process.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+  },
+  {
+    "type": "code",
+    "source": "data_designer = DataDesigner()",
+    "language": "python",
+    "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+  },
+  {
+    "type": "code",
+    "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\n# The model ID is from build.nvidia.com.\nMODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n\n# We choose this alias to be descriptive for our use case.\nMODEL_ALIAS = \"nemotron-nano-v3\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=1.0,\n            top_p=1.0,\n            max_tokens=2048,\n            extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n        ),\n    )\n]",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># The model ID is from build.nvidia.com.</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia/nemotron-3-nano-30b-a3b&quot;</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># We choose this alias to be descriptive for our use case.</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nemotron-nano-v3&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1.0</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;chat_template_kwargs&quot;</span>: {<span style=\"color: #4070A0\">&quot;enable_thinking&quot;</span>: <span style=\"color: #007020; font-weight: bold\">False</span>}},\n        ),\n    )\n]\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+  },
+  {
+    "type": "code",
+    "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+    "language": "python",
+    "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \ud83c\udfe5 Prepare a seed dataset\n\n- For this notebook, we'll create a synthetic dataset of patient notes.\n\n- We will _seed_ the generation process with a [symptom-to-diagnosis dataset](https://huggingface.co/datasets/gretelai/symptom_to_diagnosis).\n\n- We already have the dataset downloaded in the [data](../data) directory of this repository.\n\n<br>\n\n> \ud83c\udf31 **Why use a seed dataset?**\n>\n> - Seed datasets let you steer the generation process by providing context that is specific to your use case.\n>\n> - Seed datasets are also an excellent way to inject real-world diversity into your synthetic data.\n>\n> - During generation, prompt templates can reference any of the seed dataset fields."
+  },
+  {
+    "type": "code",
+    "source": "# Download sample dataset from Github\nimport urllib.request\n\nurl = \"https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv\"\nlocal_filename, _ = urllib.request.urlretrieve(url, \"gretelai_symptom_to_diagnosis.csv\")\n\n# Seed datasets are passed as reference objects to the config builder.\nseed_source = dd.LocalFileSeedSource(path=local_filename)\n\nconfig_builder.with_seed_dataset(seed_source)",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Download sample dataset from Github</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">urllib.request</span>\n\nurl <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv&quot;</span>\nlocal_filename, _ <span style=\"color: #666\">=</span> urllib<span style=\"color: #666\">.</span>request<span style=\"color: #666\">.</span>urlretrieve(url, <span style=\"color: #4070A0\">&quot;gretelai_symptom_to_diagnosis.csv&quot;</span>)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Seed datasets are passed as reference objects to the config builder.</span>\nseed_source <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>LocalFileSeedSource(path<span style=\"color: #666\">=</span>local_filename)\n\nconfig_builder<span style=\"color: #666\">.</span>with_seed_dataset(seed_source)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \ud83c\udfa8 Designing our synthetic patient notes dataset\n\n- The prompt template can reference fields from our seed dataset:\n  - `{{ diagnosis }}` - the medical diagnosis from the seed data\n  - `{{ patient_summary }}` - the symptom description from the seed data"
+  },
+  {
+    "type": "code",
+    "source": "config_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"patient_sampler\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"doctor_sampler\",\n        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,\n        params=dd.PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"patient_id\",\n        sampler_type=dd.SamplerType.UUID,\n        params=dd.UUIDSamplerParams(\n            prefix=\"PT-\",\n            short_form=True,\n            uppercase=True,\n        ),\n    )\n)\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"first_name\", expr=\"{{ patient_sampler.first_name }}\"))\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"last_name\", expr=\"{{ patient_sampler.last_name }}\"))\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"dob\", expr=\"{{ patient_sampler.birth_date }}\"))\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"symptom_onset_date\",\n        sampler_type=dd.SamplerType.DATETIME,\n        params=dd.DatetimeSamplerParams(start=\"2024-01-01\", end=\"2024-12-31\"),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"date_of_visit\",\n        sampler_type=dd.SamplerType.TIMEDELTA,\n        params=dd.TimeDeltaSamplerParams(dt_min=1, dt_max=30, reference_column_name=\"symptom_onset_date\"),\n    )\n)\n\nconfig_builder.add_column(dd.ExpressionColumnConfig(name=\"physician\", expr=\"Dr. {{ doctor_sampler.last_name }}\"))\n\nconfig_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"physician_notes\",\n        prompt=\"\"\"\\\nYou are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},\nwho has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.\nThe date of today's visit is {{ date_of_visit }}.\n\n{{ patient_summary }}\n\nWrite careful notes about your visit with {{ first_name }},\nas Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.\n\nFormat the notes as a busy doctor might.\nRespond with only the notes, no other text.\n\"\"\",\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "config_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;patient_sampler&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;doctor_sampler&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>PERSON_FROM_FAKER,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>PersonFromFakerSamplerParams(),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;patient_id&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>UUID,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>UUIDSamplerParams(\n            prefix<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;PT-&quot;</span>,\n            short_form<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>,\n            uppercase<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>,\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;first_name&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ patient_sampler.first_name }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;last_name&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ patient_sampler.last_name }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dob&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;{{ patient_sampler.birth_date }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;symptom_onset_date&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>DATETIME,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>DatetimeSamplerParams(start<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;2024-01-01&quot;</span>, end<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;2024-12-31&quot;</span>),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;date_of_visit&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>TIMEDELTA,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>TimeDeltaSamplerParams(dt_min<span style=\"color: #666\">=</span><span style=\"color: #40A070\">1</span>, dt_max<span style=\"color: #666\">=</span><span style=\"color: #40A070\">30</span>, reference_column_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;symptom_onset_date&quot;</span>),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(dd<span style=\"color: #666\">.</span>ExpressionColumnConfig(name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;physician&quot;</span>, expr<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;Dr. {{ doctor_sampler.last_name }}&quot;</span>))\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;physician_notes&quot;</span>,\n        prompt<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;&quot;&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\</span>\n<span style=\"color: #4070A0\">You are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},</span>\n<span style=\"color: #4070A0\">who has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.</span>\n<span style=\"color: #4070A0\">The date of today&#39;s visit is {{ date_of_visit }}.</span>\n\n<span style=\"color: #4070A0\">{{ patient_summary }}</span>\n\n<span style=\"color: #4070A0\">Write careful notes about your visit with {{ first_name }},</span>\n<span style=\"color: #4070A0\">as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.</span>\n\n<span style=\"color: #4070A0\">Format the notes as a busy doctor might.</span>\n<span style=\"color: #4070A0\">Respond with only the notes, no other text.</span>\n<span style=\"color: #4070A0\">&quot;&quot;&quot;</span>,\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd01 Iteration is key \u2013\u00a0preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+  },
+  {
+    "type": "code",
+    "source": "preview = data_designer.preview(config_builder, num_records=2)",
+    "language": "python",
+    "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+  },
+  {
+    "type": "code",
+    "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+  },
+  {
+    "type": "code",
+    "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+  },
+  {
+    "type": "code",
+    "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-3\")",
+    "language": "python",
+    "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-3&quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \u23ed\ufe0f Next Steps\n\nCheck out the following notebook to learn more about:\n\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/)"
+  }
+] };
diff --git a/fern/components/notebooks/4-providing-images-as-context.json b/fern/components/notebooks/4-providing-images-as-context.json
new file mode 100644
index 000000000..60b6e9bcd
--- /dev/null
+++ b/fern/components/notebooks/4-providing-images-as-context.json
@@ -0,0 +1,176 @@
+{
+  "cells": [
+    {
+      "type": "markdown",
+      "source": "# \ud83c\udfa8 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation"
+    },
+    {
+      "type": "markdown",
+      "source": "#### \ud83d\udcda What you'll learn\n\nThis notebook demonstrates how to provide images as context to generate text descriptions using vision-language models.\n\n- \u2728 **Visual Document Processing**: Converting images to chat-ready format for model consumption\n- \ud83d\udd0d **Vision-Language Generation**: Using vision models to generate detailed summaries from images\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+    },
+    {
+      "type": "markdown",
+      "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+    },
+    {
+      "type": "code",
+      "source": "%%capture\n!pip install -U data-designer \"pillow>=12.0.0,<13\" \"datasets>=4.0.0,<5\"",
+      "language": "python",
+      "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer <span style=\"color: #4070A0\">&quot;pillow&gt;=12.0.0,&lt;13&quot;</span> <span style=\"color: #4070A0\">&quot;datasets&gt;=4.0.0,&lt;5&quot;</span>\n"
+    },
+    {
+      "type": "code",
+      "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Standard library imports\nimport base64\nimport io\nimport uuid\n\n# Third-party imports\nimport pandas as pd\nimport rich\nfrom datasets import load_dataset\nfrom IPython.display import display\nfrom rich.panel import Panel\n\n# Data Designer imports\nimport data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Standard library imports</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">base64</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">io</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">uuid</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># Third-party imports</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pandas</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pd</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">rich</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">datasets</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> load_dataset\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> display\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">rich.panel</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Panel\n\n<span style=\"color: #60A0B0; font-style: italic\"># Data Designer imports</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object responsible for managing the data generation process.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+    },
+    {
+      "type": "code",
+      "source": "data_designer = DataDesigner()",
+      "language": "python",
+      "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+    },
+    {
+      "type": "code",
+      "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=\"vision\",\n        model=\"meta/llama-4-scout-17b-16e-instruct\",\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=0.60,\n            top_p=0.95,\n            max_tokens=2048,\n        ),\n    ),\n]",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;vision&quot;</span>,\n        model<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;meta/llama-4-scout-17b-16e-instruct&quot;</span>,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">0.60</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">0.95</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n        ),\n    ),\n]\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+    },
+    {
+      "type": "code",
+      "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+      "language": "python",
+      "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf31 Seed Dataset Creation\n\nIn this section, we'll prepare our visual documents as a seed dataset for summarization:\n\n- **Loading Visual Documents**: We use the ColPali dataset containing document images\n- **Image Processing**: Convert images to base64 format for vision model consumption\n- **Metadata Extraction**: Preserve relevant document information (filename, page number, source, etc.)\n\nThe seed dataset will be used to generate detailed text summaries of each document image."
+    },
+    {
+      "type": "code",
+      "source": "# Dataset processing configuration\nIMG_COUNT = 512  # Number of images to process\nBASE64_IMAGE_HEIGHT = 512  # Standardized height for model input\n\n# Load ColPali dataset for visual documents\nimg_dataset_cfg = {\"path\": \"vidore/colpali_train_set\", \"split\": \"train\", \"streaming\": True}",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Dataset processing configuration</span>\nIMG_COUNT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">512</span>  <span style=\"color: #60A0B0; font-style: italic\"># Number of images to process</span>\nBASE64_IMAGE_HEIGHT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">512</span>  <span style=\"color: #60A0B0; font-style: italic\"># Standardized height for model input</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># Load ColPali dataset for visual documents</span>\nimg_dataset_cfg <span style=\"color: #666\">=</span> {<span style=\"color: #4070A0\">&quot;path&quot;</span>: <span style=\"color: #4070A0\">&quot;vidore/colpali_train_set&quot;</span>, <span style=\"color: #4070A0\">&quot;split&quot;</span>: <span style=\"color: #4070A0\">&quot;train&quot;</span>, <span style=\"color: #4070A0\">&quot;streaming&quot;</span>: <span style=\"color: #007020; font-weight: bold\">True</span>}\n"
+    },
+    {
+      "type": "code",
+      "source": "def resize_image(image, height: int):\n    \"\"\"\n    Resize image while maintaining aspect ratio.\n\n    Args:\n        image: PIL Image object\n        height: Target height in pixels\n\n    Returns:\n        Resized PIL Image object\n    \"\"\"\n    original_width, original_height = image.size\n    width = int(original_width * (height / original_height))\n    return image.resize((width, height))\n\n\ndef convert_image_to_chat_format(record, height: int) -> dict:\n    \"\"\"\n    Convert PIL image to base64 format for chat template usage.\n\n    Args:\n        record: Dataset record containing image and metadata\n        height: Target height for image resizing\n\n    Returns:\n        Updated record with base64_image and uuid fields\n    \"\"\"\n    # Resize image for consistent processing\n    image = resize_image(record[\"image\"], height)\n\n    # Convert to base64 string\n    img_buffer = io.BytesIO()\n    image.save(img_buffer, format=\"PNG\")\n    byte_data = img_buffer.getvalue()\n    base64_encoded_data = base64.b64encode(byte_data)\n    base64_string = base64_encoded_data.decode(\"utf-8\")\n\n    # Return updated record\n    return record | {\"base64_image\": base64_string, \"uuid\": str(uuid.uuid4())}",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">resize_image</span>(image, height: <span style=\"color: #007020\">int</span>):\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;</span>\n<span style=\"color: #4070A0; font-style: italic\">    Resize image while maintaining aspect ratio.</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Args:</span>\n<span style=\"color: #4070A0; font-style: italic\">        image: PIL Image object</span>\n<span style=\"color: #4070A0; font-style: italic\">        height: Target height in pixels</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Returns:</span>\n<span style=\"color: #4070A0; font-style: italic\">        Resized PIL Image object</span>\n<span style=\"color: #4070A0; font-style: italic\">    &quot;&quot;&quot;</span>\n    original_width, original_height <span style=\"color: #666\">=</span> image<span style=\"color: #666\">.</span>size\n    width <span style=\"color: #666\">=</span> <span style=\"color: #007020\">int</span>(original_width <span style=\"color: #666\">*</span> (height <span style=\"color: #666\">/</span> original_height))\n    <span style=\"color: #007020; font-weight: bold\">return</span> image<span style=\"color: #666\">.</span>resize((width, height))\n\n\n<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">convert_image_to_chat_format</span>(record, height: <span style=\"color: #007020\">int</span>) <span style=\"color: #666\">-&gt;</span> <span style=\"color: #007020\">dict</span>:\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;</span>\n<span style=\"color: #4070A0; font-style: italic\">    Convert PIL image to base64 format for chat template usage.</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Args:</span>\n<span style=\"color: #4070A0; font-style: italic\">        record: Dataset record containing image and metadata</span>\n<span style=\"color: #4070A0; font-style: italic\">        height: Target height for image resizing</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Returns:</span>\n<span style=\"color: #4070A0; font-style: italic\">        Updated record with base64_image and uuid fields</span>\n<span style=\"color: #4070A0; font-style: italic\">    &quot;&quot;&quot;</span>\n    <span style=\"color: #60A0B0; font-style: italic\"># Resize image for consistent processing</span>\n    image <span style=\"color: #666\">=</span> resize_image(record[<span style=\"color: #4070A0\">&quot;image&quot;</span>], height)\n\n    <span style=\"color: #60A0B0; font-style: italic\"># Convert to base64 string</span>\n    img_buffer <span style=\"color: #666\">=</span> io<span style=\"color: #666\">.</span>BytesIO()\n    image<span style=\"color: #666\">.</span>save(img_buffer, <span style=\"color: #007020\">format</span><span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;PNG&quot;</span>)\n    byte_data <span style=\"color: #666\">=</span> img_buffer<span style=\"color: #666\">.</span>getvalue()\n    base64_encoded_data <span style=\"color: #666\">=</span> base64<span style=\"color: #666\">.</span>b64encode(byte_data)\n    base64_string <span style=\"color: #666\">=</span> base64_encoded_data<span style=\"color: #666\">.</span>decode(<span style=\"color: #4070A0\">&quot;utf-8&quot;</span>)\n\n    <span style=\"color: #60A0B0; font-style: italic\"># Return updated record</span>\n    <span style=\"color: #007020; font-weight: bold\">return</span> record <span style=\"color: #666\">|</span> {<span style=\"color: #4070A0\">&quot;base64_image&quot;</span>: base64_string, <span style=\"color: #4070A0\">&quot;uuid&quot;</span>: <span style=\"color: #007020\">str</span>(uuid<span style=\"color: #666\">.</span>uuid4())}\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load and process the visual document dataset\nprint(\"\ud83d\udce5 Loading and processing document images...\")\n\nimg_dataset_iter = iter(\n    load_dataset(**img_dataset_cfg).map(convert_image_to_chat_format, fn_kwargs={\"height\": BASE64_IMAGE_HEIGHT})\n)\nimg_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])\n\nprint(f\"\u2705 Loaded {len(img_dataset)} images with columns: {list(img_dataset.columns)}\")",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load and process the visual document dataset</span>\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;\ud83d\udce5 Loading and processing document images...&quot;</span>)\n\nimg_dataset_iter <span style=\"color: #666\">=</span> <span style=\"color: #007020\">iter</span>(\n    load_dataset(<span style=\"color: #666\">**</span>img_dataset_cfg)<span style=\"color: #666\">.</span>map(convert_image_to_chat_format, fn_kwargs<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;height&quot;</span>: BASE64_IMAGE_HEIGHT})\n)\nimg_dataset <span style=\"color: #666\">=</span> pd<span style=\"color: #666\">.</span>DataFrame([<span style=\"color: #007020\">next</span>(img_dataset_iter) <span style=\"color: #007020; font-weight: bold\">for</span> _ <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(IMG_COUNT)])\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;\u2705 Loaded </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">len</span>(img_dataset)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\"> images with columns: </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">list</span>(img_dataset<span style=\"color: #666\">.</span>columns)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "img_dataset.head()",
+      "language": "python",
+      "source_html": "img_dataset<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "code",
+      "source": "# Add the seed dataset containing our processed images\ndf_seed = pd.DataFrame(img_dataset)[[\"uuid\", \"image_filename\", \"base64_image\", \"page\", \"options\", \"source\"]]\nconfig_builder.with_seed_dataset(dd.DataFrameSeedSource(df=df_seed))",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Add the seed dataset containing our processed images</span>\ndf_seed <span style=\"color: #666\">=</span> pd<span style=\"color: #666\">.</span>DataFrame(img_dataset)[[<span style=\"color: #4070A0\">&quot;uuid&quot;</span>, <span style=\"color: #4070A0\">&quot;image_filename&quot;</span>, <span style=\"color: #4070A0\">&quot;base64_image&quot;</span>, <span style=\"color: #4070A0\">&quot;page&quot;</span>, <span style=\"color: #4070A0\">&quot;options&quot;</span>, <span style=\"color: #4070A0\">&quot;source&quot;</span>]]\nconfig_builder<span style=\"color: #666\">.</span>with_seed_dataset(dd<span style=\"color: #666\">.</span>DataFrameSeedSource(df<span style=\"color: #666\">=</span>df_seed))\n"
+    },
+    {
+      "type": "code",
+      "source": "# Add a column to generate detailed document summaries\nconfig_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"summary\",\n        model_alias=\"vision\",\n        prompt=(\n            \"Provide a detailed summary of the content in this image in Markdown format. \"\n            \"Start from the top of the image and then describe it from top to bottom. \"\n            \"Place a summary at the bottom.\"\n        ),\n        multi_modal_context=[\n            dd.ImageContext(\n                column_name=\"base64_image\",\n                data_type=dd.ModalityDataType.BASE64,\n                image_format=dd.ImageFormat.PNG,\n            )\n        ],\n    )\n)\n\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Add a column to generate detailed document summaries</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;summary&quot;</span>,\n        model_alias<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;vision&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Provide a detailed summary of the content in this image in Markdown format. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Start from the top of the image and then describe it from top to bottom. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Place a summary at the bottom.&quot;</span>\n        ),\n        multi_modal_context<span style=\"color: #666\">=</span>[\n            dd<span style=\"color: #666\">.</span>ImageContext(\n                column_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;base64_image&quot;</span>,\n                data_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ModalityDataType<span style=\"color: #666\">.</span>BASE64,\n                image_format<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageFormat<span style=\"color: #666\">.</span>PNG,\n            )\n        ],\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd01 Iteration is key \u2013 preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+    },
+    {
+      "type": "code",
+      "source": "preview = data_designer.preview(config_builder, num_records=2)",
+      "language": "python",
+      "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+    },
+    {
+      "type": "code",
+      "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+    },
+    {
+      "type": "code",
+      "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd0e Visual Inspection\n\nLet's compare the original document image with the generated summary to validate quality:"
+    },
+    {
+      "type": "code",
+      "source": "# Compare original document with generated summary\nindex = 0  # Change this to view different examples\n\n# Merge preview data with original images for comparison\ncomparison_dataset = preview.dataset.merge(pd.DataFrame(img_dataset)[[\"uuid\", \"image\"]], how=\"left\", on=\"uuid\")\n\n# Extract the record for display\nrecord = comparison_dataset.iloc[index]\n\nprint(\"\ud83d\udcc4 Original Document Image:\")\ndisplay(resize_image(record.image, BASE64_IMAGE_HEIGHT))\n\nprint(\"\\n\ud83d\udcdd Generated Summary:\")\nrich.print(Panel(record.summary, title=\"Document Summary\", title_align=\"left\"))",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Compare original document with generated summary</span>\nindex <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">0</span>  <span style=\"color: #60A0B0; font-style: italic\"># Change this to view different examples</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># Merge preview data with original images for comparison</span>\ncomparison_dataset <span style=\"color: #666\">=</span> preview<span style=\"color: #666\">.</span>dataset<span style=\"color: #666\">.</span>merge(pd<span style=\"color: #666\">.</span>DataFrame(img_dataset)[[<span style=\"color: #4070A0\">&quot;uuid&quot;</span>, <span style=\"color: #4070A0\">&quot;image&quot;</span>]], how<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;left&quot;</span>, on<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;uuid&quot;</span>)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Extract the record for display</span>\nrecord <span style=\"color: #666\">=</span> comparison_dataset<span style=\"color: #666\">.</span>iloc[index]\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;\ud83d\udcc4 Original Document Image:&quot;</span>)\ndisplay(resize_image(record<span style=\"color: #666\">.</span>image, BASE64_IMAGE_HEIGHT))\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">\ud83d\udcdd Generated Summary:&quot;</span>)\nrich<span style=\"color: #666\">.</span>print(Panel(record<span style=\"color: #666\">.</span>summary, title<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;Document Summary&quot;</span>, title_align<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;left&quot;</span>))\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+    },
+    {
+      "type": "code",
+      "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-4\")",
+      "language": "python",
+      "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-4&quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "code",
+      "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \u23ed\ufe0f Next Steps\n\nNow that you've learned how to use visual context for image summarization in Data Designer, explore more:\n\n- Experiment with different vision models for specific document types\n- Try different prompt variations to generate specialized descriptions (e.g., technical details, key findings)\n- Combine vision-based summaries with other column types for multi-modal workflows\n- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) with Data Designer"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/fern/components/notebooks/4-providing-images-as-context.ts b/fern/components/notebooks/4-providing-images-as-context.ts
new file mode 100644
index 000000000..d48ffe010
--- /dev/null
+++ b/fern/components/notebooks/4-providing-images-as-context.ts
@@ -0,0 +1,175 @@
+/** Auto-generated by ipynb-to-fern-json.py - do not edit */
+export default { cells: [
+  {
+    "type": "markdown",
+    "source": "# \ud83c\udfa8 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation"
+  },
+  {
+    "type": "markdown",
+    "source": "#### \ud83d\udcda What you'll learn\n\nThis notebook demonstrates how to provide images as context to generate text descriptions using vision-language models.\n\n- \u2728 **Visual Document Processing**: Converting images to chat-ready format for model consumption\n- \ud83d\udd0d **Vision-Language Generation**: Using vision models to generate detailed summaries from images\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides access to the configuration API.\n\n- `DataDesigner` is the main interface for data generation."
+  },
+  {
+    "type": "markdown",
+    "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+  },
+  {
+    "type": "code",
+    "source": "%%capture\n!pip install -U data-designer \"pillow>=12.0.0,<13\" \"datasets>=4.0.0,<5\"",
+    "language": "python",
+    "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer <span style=\"color: #4070A0\">&quot;pillow&gt;=12.0.0,&lt;13&quot;</span> <span style=\"color: #4070A0\">&quot;datasets&gt;=4.0.0,&lt;5&quot;</span>\n"
+  },
+  {
+    "type": "code",
+    "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Standard library imports\nimport base64\nimport io\nimport uuid\n\n# Third-party imports\nimport pandas as pd\nimport rich\nfrom datasets import load_dataset\nfrom IPython.display import display\nfrom rich.panel import Panel\n\n# Data Designer imports\nimport data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Standard library imports</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">base64</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">io</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">uuid</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># Third-party imports</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pandas</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pd</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">rich</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">datasets</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> load_dataset\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> display\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">rich.panel</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Panel\n\n<span style=\"color: #60A0B0; font-style: italic\"># Data Designer imports</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\n- `DataDesigner` is the main object responsible for managing the data generation process.\n\n- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) are used."
+  },
+  {
+    "type": "code",
+    "source": "data_designer = DataDesigner()",
+    "language": "python",
+    "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf9b\ufe0f Define model configurations\n\n- Each `ModelConfig` defines a model that can be used during the generation process.\n\n- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n\n- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/default-model-settings/) docs for more details).\n\n- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider."
+  },
+  {
+    "type": "code",
+    "source": "# This name is set in the model provider configuration.\nMODEL_PROVIDER = \"nvidia\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=\"vision\",\n        model=\"meta/llama-4-scout-17b-16e-instruct\",\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ChatCompletionInferenceParams(\n            temperature=0.60,\n            top_p=0.95,\n            max_tokens=2048,\n        ),\n    ),\n]",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># This name is set in the model provider configuration.</span>\nMODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;nvidia&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;vision&quot;</span>,\n        model<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;meta/llama-4-scout-17b-16e-instruct&quot;</span>,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ChatCompletionInferenceParams(\n            temperature<span style=\"color: #666\">=</span><span style=\"color: #40A070\">0.60</span>,\n            top_p<span style=\"color: #666\">=</span><span style=\"color: #40A070\">0.95</span>,\n            max_tokens<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2048</span>,\n        ),\n    ),\n]\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udfd7\ufe0f Initialize the Data Designer Config Builder\n\n- The Data Designer config defines the dataset schema and generation process.\n\n- The config builder provides an intuitive interface for building this configuration.\n\n- The list of model configs is provided to the builder at initialization."
+  },
+  {
+    "type": "code",
+    "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)",
+    "language": "python",
+    "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf31 Seed Dataset Creation\n\nIn this section, we'll prepare our visual documents as a seed dataset for summarization:\n\n- **Loading Visual Documents**: We use the ColPali dataset containing document images\n- **Image Processing**: Convert images to base64 format for vision model consumption\n- **Metadata Extraction**: Preserve relevant document information (filename, page number, source, etc.)\n\nThe seed dataset will be used to generate detailed text summaries of each document image."
+  },
+  {
+    "type": "code",
+    "source": "# Dataset processing configuration\nIMG_COUNT = 512  # Number of images to process\nBASE64_IMAGE_HEIGHT = 512  # Standardized height for model input\n\n# Load ColPali dataset for visual documents\nimg_dataset_cfg = {\"path\": \"vidore/colpali_train_set\", \"split\": \"train\", \"streaming\": True}",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Dataset processing configuration</span>\nIMG_COUNT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">512</span>  <span style=\"color: #60A0B0; font-style: italic\"># Number of images to process</span>\nBASE64_IMAGE_HEIGHT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">512</span>  <span style=\"color: #60A0B0; font-style: italic\"># Standardized height for model input</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># Load ColPali dataset for visual documents</span>\nimg_dataset_cfg <span style=\"color: #666\">=</span> {<span style=\"color: #4070A0\">&quot;path&quot;</span>: <span style=\"color: #4070A0\">&quot;vidore/colpali_train_set&quot;</span>, <span style=\"color: #4070A0\">&quot;split&quot;</span>: <span style=\"color: #4070A0\">&quot;train&quot;</span>, <span style=\"color: #4070A0\">&quot;streaming&quot;</span>: <span style=\"color: #007020; font-weight: bold\">True</span>}\n"
+  },
+  {
+    "type": "code",
+    "source": "def resize_image(image, height: int):\n    \"\"\"\n    Resize image while maintaining aspect ratio.\n\n    Args:\n        image: PIL Image object\n        height: Target height in pixels\n\n    Returns:\n        Resized PIL Image object\n    \"\"\"\n    original_width, original_height = image.size\n    width = int(original_width * (height / original_height))\n    return image.resize((width, height))\n\n\ndef convert_image_to_chat_format(record, height: int) -> dict:\n    \"\"\"\n    Convert PIL image to base64 format for chat template usage.\n\n    Args:\n        record: Dataset record containing image and metadata\n        height: Target height for image resizing\n\n    Returns:\n        Updated record with base64_image and uuid fields\n    \"\"\"\n    # Resize image for consistent processing\n    image = resize_image(record[\"image\"], height)\n\n    # Convert to base64 string\n    img_buffer = io.BytesIO()\n    image.save(img_buffer, format=\"PNG\")\n    byte_data = img_buffer.getvalue()\n    base64_encoded_data = base64.b64encode(byte_data)\n    base64_string = base64_encoded_data.decode(\"utf-8\")\n\n    # Return updated record\n    return record | {\"base64_image\": base64_string, \"uuid\": str(uuid.uuid4())}",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">resize_image</span>(image, height: <span style=\"color: #007020\">int</span>):\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;</span>\n<span style=\"color: #4070A0; font-style: italic\">    Resize image while maintaining aspect ratio.</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Args:</span>\n<span style=\"color: #4070A0; font-style: italic\">        image: PIL Image object</span>\n<span style=\"color: #4070A0; font-style: italic\">        height: Target height in pixels</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Returns:</span>\n<span style=\"color: #4070A0; font-style: italic\">        Resized PIL Image object</span>\n<span style=\"color: #4070A0; font-style: italic\">    &quot;&quot;&quot;</span>\n    original_width, original_height <span style=\"color: #666\">=</span> image<span style=\"color: #666\">.</span>size\n    width <span style=\"color: #666\">=</span> <span style=\"color: #007020\">int</span>(original_width <span style=\"color: #666\">*</span> (height <span style=\"color: #666\">/</span> original_height))\n    <span style=\"color: #007020; font-weight: bold\">return</span> image<span style=\"color: #666\">.</span>resize((width, height))\n\n\n<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">convert_image_to_chat_format</span>(record, height: <span style=\"color: #007020\">int</span>) <span style=\"color: #666\">-&gt;</span> <span style=\"color: #007020\">dict</span>:\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;</span>\n<span style=\"color: #4070A0; font-style: italic\">    Convert PIL image to base64 format for chat template usage.</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Args:</span>\n<span style=\"color: #4070A0; font-style: italic\">        record: Dataset record containing image and metadata</span>\n<span style=\"color: #4070A0; font-style: italic\">        height: Target height for image resizing</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    Returns:</span>\n<span style=\"color: #4070A0; font-style: italic\">        Updated record with base64_image and uuid fields</span>\n<span style=\"color: #4070A0; font-style: italic\">    &quot;&quot;&quot;</span>\n    <span style=\"color: #60A0B0; font-style: italic\"># Resize image for consistent processing</span>\n    image <span style=\"color: #666\">=</span> resize_image(record[<span style=\"color: #4070A0\">&quot;image&quot;</span>], height)\n\n    <span style=\"color: #60A0B0; font-style: italic\"># Convert to base64 string</span>\n    img_buffer <span style=\"color: #666\">=</span> io<span style=\"color: #666\">.</span>BytesIO()\n    image<span style=\"color: #666\">.</span>save(img_buffer, <span style=\"color: #007020\">format</span><span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;PNG&quot;</span>)\n    byte_data <span style=\"color: #666\">=</span> img_buffer<span style=\"color: #666\">.</span>getvalue()\n    base64_encoded_data <span style=\"color: #666\">=</span> base64<span style=\"color: #666\">.</span>b64encode(byte_data)\n    base64_string <span style=\"color: #666\">=</span> base64_encoded_data<span style=\"color: #666\">.</span>decode(<span style=\"color: #4070A0\">&quot;utf-8&quot;</span>)\n\n    <span style=\"color: #60A0B0; font-style: italic\"># Return updated record</span>\n    <span style=\"color: #007020; font-weight: bold\">return</span> record <span style=\"color: #666\">|</span> {<span style=\"color: #4070A0\">&quot;base64_image&quot;</span>: base64_string, <span style=\"color: #4070A0\">&quot;uuid&quot;</span>: <span style=\"color: #007020\">str</span>(uuid<span style=\"color: #666\">.</span>uuid4())}\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load and process the visual document dataset\nprint(\"\ud83d\udce5 Loading and processing document images...\")\n\nimg_dataset_iter = iter(\n    load_dataset(**img_dataset_cfg).map(convert_image_to_chat_format, fn_kwargs={\"height\": BASE64_IMAGE_HEIGHT})\n)\nimg_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])\n\nprint(f\"\u2705 Loaded {len(img_dataset)} images with columns: {list(img_dataset.columns)}\")",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load and process the visual document dataset</span>\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;\ud83d\udce5 Loading and processing document images...&quot;</span>)\n\nimg_dataset_iter <span style=\"color: #666\">=</span> <span style=\"color: #007020\">iter</span>(\n    load_dataset(<span style=\"color: #666\">**</span>img_dataset_cfg)<span style=\"color: #666\">.</span>map(convert_image_to_chat_format, fn_kwargs<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;height&quot;</span>: BASE64_IMAGE_HEIGHT})\n)\nimg_dataset <span style=\"color: #666\">=</span> pd<span style=\"color: #666\">.</span>DataFrame([<span style=\"color: #007020\">next</span>(img_dataset_iter) <span style=\"color: #007020; font-weight: bold\">for</span> _ <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(IMG_COUNT)])\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;\u2705 Loaded </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">len</span>(img_dataset)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\"> images with columns: </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">list</span>(img_dataset<span style=\"color: #666\">.</span>columns)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "img_dataset.head()",
+    "language": "python",
+    "source_html": "img_dataset<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "code",
+    "source": "# Add the seed dataset containing our processed images\ndf_seed = pd.DataFrame(img_dataset)[[\"uuid\", \"image_filename\", \"base64_image\", \"page\", \"options\", \"source\"]]\nconfig_builder.with_seed_dataset(dd.DataFrameSeedSource(df=df_seed))",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Add the seed dataset containing our processed images</span>\ndf_seed <span style=\"color: #666\">=</span> pd<span style=\"color: #666\">.</span>DataFrame(img_dataset)[[<span style=\"color: #4070A0\">&quot;uuid&quot;</span>, <span style=\"color: #4070A0\">&quot;image_filename&quot;</span>, <span style=\"color: #4070A0\">&quot;base64_image&quot;</span>, <span style=\"color: #4070A0\">&quot;page&quot;</span>, <span style=\"color: #4070A0\">&quot;options&quot;</span>, <span style=\"color: #4070A0\">&quot;source&quot;</span>]]\nconfig_builder<span style=\"color: #666\">.</span>with_seed_dataset(dd<span style=\"color: #666\">.</span>DataFrameSeedSource(df<span style=\"color: #666\">=</span>df_seed))\n"
+  },
+  {
+    "type": "code",
+    "source": "# Add a column to generate detailed document summaries\nconfig_builder.add_column(\n    dd.LLMTextColumnConfig(\n        name=\"summary\",\n        model_alias=\"vision\",\n        prompt=(\n            \"Provide a detailed summary of the content in this image in Markdown format. \"\n            \"Start from the top of the image and then describe it from top to bottom. \"\n            \"Place a summary at the bottom.\"\n        ),\n        multi_modal_context=[\n            dd.ImageContext(\n                column_name=\"base64_image\",\n                data_type=dd.ModalityDataType.BASE64,\n                image_format=dd.ImageFormat.PNG,\n            )\n        ],\n    )\n)\n\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Add a column to generate detailed document summaries</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>LLMTextColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;summary&quot;</span>,\n        model_alias<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;vision&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Provide a detailed summary of the content in this image in Markdown format. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Start from the top of the image and then describe it from top to bottom. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Place a summary at the bottom.&quot;</span>\n        ),\n        multi_modal_context<span style=\"color: #666\">=</span>[\n            dd<span style=\"color: #666\">.</span>ImageContext(\n                column_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;base64_image&quot;</span>,\n                data_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ModalityDataType<span style=\"color: #666\">.</span>BASE64,\n                image_format<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageFormat<span style=\"color: #666\">.</span>PNG,\n            )\n        ],\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd01 Iteration is key \u2013 preview the dataset!\n\n1. Use the `preview` method to generate a sample of records quickly.\n\n2. Inspect the results for quality and format issues.\n\n3. Adjust column configurations, prompts, or parameters as needed.\n\n4. Re-run the preview until satisfied."
+  },
+  {
+    "type": "code",
+    "source": "preview = data_designer.preview(config_builder, num_records=2)",
+    "language": "python",
+    "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Run this cell multiple times to cycle through the 2 preview records.\npreview.display_sample_record()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Run this cell multiple times to cycle through the 2 preview records.</span>\npreview<span style=\"color: #666\">.</span>display_sample_record()\n"
+  },
+  {
+    "type": "code",
+    "source": "# The preview dataset is available as a pandas DataFrame.\npreview.dataset",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># The preview dataset is available as a pandas DataFrame.</span>\npreview<span style=\"color: #666\">.</span>dataset\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udcca Analyze the generated data\n\n- Data Designer automatically generates a basic statistical analysis of the generated data.\n\n- This analysis is available via the `analysis` property of generation result objects."
+  },
+  {
+    "type": "code",
+    "source": "# Print the analysis as a table.\npreview.analysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Print the analysis as a table.</span>\npreview<span style=\"color: #666\">.</span>analysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd0e Visual Inspection\n\nLet's compare the original document image with the generated summary to validate quality:"
+  },
+  {
+    "type": "code",
+    "source": "# Compare original document with generated summary\nindex = 0  # Change this to view different examples\n\n# Merge preview data with original images for comparison\ncomparison_dataset = preview.dataset.merge(pd.DataFrame(img_dataset)[[\"uuid\", \"image\"]], how=\"left\", on=\"uuid\")\n\n# Extract the record for display\nrecord = comparison_dataset.iloc[index]\n\nprint(\"\ud83d\udcc4 Original Document Image:\")\ndisplay(resize_image(record.image, BASE64_IMAGE_HEIGHT))\n\nprint(\"\\n\ud83d\udcdd Generated Summary:\")\nrich.print(Panel(record.summary, title=\"Document Summary\", title_align=\"left\"))",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Compare original document with generated summary</span>\nindex <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">0</span>  <span style=\"color: #60A0B0; font-style: italic\"># Change this to view different examples</span>\n\n<span style=\"color: #60A0B0; font-style: italic\"># Merge preview data with original images for comparison</span>\ncomparison_dataset <span style=\"color: #666\">=</span> preview<span style=\"color: #666\">.</span>dataset<span style=\"color: #666\">.</span>merge(pd<span style=\"color: #666\">.</span>DataFrame(img_dataset)[[<span style=\"color: #4070A0\">&quot;uuid&quot;</span>, <span style=\"color: #4070A0\">&quot;image&quot;</span>]], how<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;left&quot;</span>, on<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;uuid&quot;</span>)\n\n<span style=\"color: #60A0B0; font-style: italic\"># Extract the record for display</span>\nrecord <span style=\"color: #666\">=</span> comparison_dataset<span style=\"color: #666\">.</span>iloc[index]\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;\ud83d\udcc4 Original Document Image:&quot;</span>)\ndisplay(resize_image(record<span style=\"color: #666\">.</span>image, BASE64_IMAGE_HEIGHT))\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">\ud83d\udcdd Generated Summary:&quot;</span>)\nrich<span style=\"color: #666\">.</span>print(Panel(record<span style=\"color: #666\">.</span>summary, title<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;Document Summary&quot;</span>, title_align<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;left&quot;</span>))\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udd99 Scale up!\n\n- Happy with your preview data?\n\n- Use the `create` method to submit larger Data Designer generation jobs."
+  },
+  {
+    "type": "code",
+    "source": "results = data_designer.create(config_builder, num_records=10, dataset_name=\"tutorial-4\")",
+    "language": "python",
+    "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">10</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-4&quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the generated dataset as a pandas DataFrame.\ndataset = results.load_dataset()\n\ndataset.head()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the generated dataset as a pandas DataFrame.</span>\ndataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\n\ndataset<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "code",
+    "source": "# Load the analysis results into memory.\nanalysis = results.load_analysis()\n\nanalysis.to_report()",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Load the analysis results into memory.</span>\nanalysis <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_analysis()\n\nanalysis<span style=\"color: #666\">.</span>to_report()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \u23ed\ufe0f Next Steps\n\nNow that you've learned how to use visual context for image summarization in Data Designer, explore more:\n\n- Experiment with different vision models for specific document types\n- Try different prompt variations to generate specialized descriptions (e.g., technical details, key findings)\n- Combine vision-based summaries with other column types for multi-modal workflows\n- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering\n\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) with Data Designer"
+  }
+] };
diff --git a/fern/components/notebooks/5-generating-images.json b/fern/components/notebooks/5-generating-images.json
new file mode 100644
index 000000000..924f25b1e
--- /dev/null
+++ b/fern/components/notebooks/5-generating-images.json
@@ -0,0 +1,112 @@
+{
+  "cells": [
+    {
+      "type": "markdown",
+      "source": "# \ud83c\udfa8 Data Designer Tutorial: Generating Images\n\n#### \ud83d\udcda What you'll learn\n\nThis notebook shows how to generate synthetic image data with Data Designer using image-generation models.\n\n- \ud83d\uddbc\ufe0f **Image generation columns**: Add columns that produce images from text prompts\n- \ud83d\udcdd **Jinja2 prompts**: Drive diversity by referencing other columns in your prompt template\n- \ud83d\udcbe **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk and stores paths\n\nData Designer supports both **diffusion** (e.g. DALL\u00b7E, Stable Diffusion, Imagen) and **autoregressive** (e.g. Gemini image, GPT image) models.\n\n> **Prerequisites**: This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro image model. Set `OPENROUTER_API_KEY` in your environment before running.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides the configuration API.\n- `DataDesigner` is the main interface for generation."
+    },
+    {
+      "type": "markdown",
+      "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+    },
+    {
+      "type": "code",
+      "source": "%%capture\n!pip install -U data-designer",
+      "language": "python",
+      "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+    },
+    {
+      "type": "code",
+      "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "from IPython.display import Image as IPImage\nfrom IPython.display import display\n\nimport data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Image <span style=\"color: #007020; font-weight: bold\">as</span> IPImage\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> display\n\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\nWe initialize Data Designer without arguments here\u2014the image model is configured explicitly in the next cell. No default text model is needed for this tutorial."
+    },
+    {
+      "type": "code",
+      "source": "data_designer = DataDesigner()",
+      "language": "python",
+      "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf9b\ufe0f Define an image-generation model\n\n- Use `ImageInferenceParams` so Data Designer treats this model as an image generator.\n- Image options (size, quality, aspect ratio, etc.) are model-specific; pass them via `extra_body`."
+    },
+    {
+      "type": "code",
+      "source": "MODEL_PROVIDER = \"openrouter\"\nMODEL_ID = \"black-forest-labs/flux.2-pro\"\nMODEL_ALIAS = \"image-model\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ImageInferenceParams(\n            extra_body={\"height\": 512, \"width\": 512},\n        ),\n    )\n]",
+      "language": "python",
+      "source_html": "MODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;openrouter&quot;</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;black-forest-labs/flux.2-pro&quot;</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;image-model&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageInferenceParams(\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;height&quot;</span>: <span style=\"color: #40A070\">512</span>, <span style=\"color: #4070A0\">&quot;width&quot;</span>: <span style=\"color: #40A070\">512</span>},\n        ),\n    )\n]\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udfd7\ufe0f Build the config: samplers + image column\n\nWe'll generate diverse **dog portrait** images: sampler columns drive subject (breed), age, style, look direction, and emotion. The image-generation column uses a Jinja2 prompt that references all of them."
+    },
+    {
+      "type": "code",
+      "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"photorealistic\",\n                \"oil painting\",\n                \"watercolor\",\n                \"digital art\",\n                \"sketch\",\n                \"anime\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_breed\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a Golden Retriever\",\n                \"a German Shepherd\",\n                \"a Labrador Retriever\",\n                \"a Bulldog\",\n                \"a Beagle\",\n                \"a Poodle\",\n                \"a Corgi\",\n                \"a Siberian Husky\",\n                \"a Dalmatian\",\n                \"a Yorkshire Terrier\",\n                \"a Boxer\",\n                \"a Dachshund\",\n                \"a Doberman Pinscher\",\n                \"a Shih Tzu\",\n                \"a Chihuahua\",\n                \"a Border Collie\",\n                \"an Australian Shepherd\",\n                \"a Cocker Spaniel\",\n                \"a Maltese\",\n                \"a Pomeranian\",\n                \"a Saint Bernard\",\n                \"a Great Dane\",\n                \"an Akita\",\n                \"a Samoyed\",\n                \"a Boston Terrier\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_breed\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a Persian\",\n                \"a Maine Coon\",\n                \"a Siamese\",\n                \"a Ragdoll\",\n                \"a Bengal\",\n                \"an Abyssinian\",\n                \"a British Shorthair\",\n                \"a Sphynx\",\n                \"a Scottish Fold\",\n                \"a Russian Blue\",\n                \"a Birman\",\n                \"an Oriental Shorthair\",\n                \"a Norwegian Forest Cat\",\n                \"a Devon Rex\",\n                \"a Burmese\",\n                \"an Egyptian Mau\",\n                \"a Tonkinese\",\n                \"a Himalayan\",\n                \"a Savannah\",\n                \"a Chartreux\",\n                \"a Somali\",\n                \"a Manx\",\n                \"a Turkish Angora\",\n                \"a Balinese\",\n                \"an American Shorthair\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_age\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"1-3\", \"3-6\", \"6-9\", \"9-12\", \"12-15\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_age\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"1-3\", \"3-6\", \"6-9\", \"9-12\", \"12-18\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_look_direction\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"left\", \"right\", \"front\", \"up\", \"down\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_look_direction\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"left\", \"right\", \"front\", \"up\", \"down\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_emotion\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"happy\", \"curious\", \"serious\", \"sleepy\", \"excited\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_emotion\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"aloof\", \"curious\", \"content\", \"sleepy\", \"playful\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.ImageColumnConfig(\n        name=\"generated_image\",\n        prompt=(\n            \"\"\"\nA {{ style }} family pet portrait of a {{ dog_breed }} dog of {{ dog_age }} years old looking {{dog_look_direction}} with an {{ dog_emotion }} expression and\n{{ cat_breed }} cat of {{ cat_age }} years old looking {{ cat_look_direction }} with an {{ cat_emotion }} expression in the background. Both subjects should be in focus.\n        \"\"\"\n        ),\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;photorealistic&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;oil painting&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;watercolor&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;digital art&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;sketch&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;anime&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_breed&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a Golden Retriever&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a German Shepherd&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Labrador Retriever&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Bulldog&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Beagle&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Poodle&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Corgi&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Siberian Husky&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Dalmatian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Yorkshire Terrier&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Boxer&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Dachshund&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Doberman Pinscher&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Shih Tzu&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Chihuahua&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Border Collie&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Australian Shepherd&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Cocker Spaniel&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Maltese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Pomeranian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Saint Bernard&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Great Dane&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Akita&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Samoyed&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Boston Terrier&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_breed&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a Persian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Maine Coon&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Siamese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Ragdoll&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Bengal&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Abyssinian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a British Shorthair&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Sphynx&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Scottish Fold&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Russian Blue&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Birman&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Oriental Shorthair&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Norwegian Forest Cat&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Devon Rex&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Burmese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Egyptian Mau&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Tonkinese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Himalayan&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Savannah&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Chartreux&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Somali&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Manx&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Turkish Angora&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Balinese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an American Shorthair&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_age&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;1-3&quot;</span>, <span style=\"color: #4070A0\">&quot;3-6&quot;</span>, <span style=\"color: #4070A0\">&quot;6-9&quot;</span>, <span style=\"color: #4070A0\">&quot;9-12&quot;</span>, <span style=\"color: #4070A0\">&quot;12-15&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_age&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;1-3&quot;</span>, <span style=\"color: #4070A0\">&quot;3-6&quot;</span>, <span style=\"color: #4070A0\">&quot;6-9&quot;</span>, <span style=\"color: #4070A0\">&quot;9-12&quot;</span>, <span style=\"color: #4070A0\">&quot;12-18&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_look_direction&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;left&quot;</span>, <span style=\"color: #4070A0\">&quot;right&quot;</span>, <span style=\"color: #4070A0\">&quot;front&quot;</span>, <span style=\"color: #4070A0\">&quot;up&quot;</span>, <span style=\"color: #4070A0\">&quot;down&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_look_direction&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;left&quot;</span>, <span style=\"color: #4070A0\">&quot;right&quot;</span>, <span style=\"color: #4070A0\">&quot;front&quot;</span>, <span style=\"color: #4070A0\">&quot;up&quot;</span>, <span style=\"color: #4070A0\">&quot;down&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_emotion&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;happy&quot;</span>, <span style=\"color: #4070A0\">&quot;curious&quot;</span>, <span style=\"color: #4070A0\">&quot;serious&quot;</span>, <span style=\"color: #4070A0\">&quot;sleepy&quot;</span>, <span style=\"color: #4070A0\">&quot;excited&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_emotion&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;aloof&quot;</span>, <span style=\"color: #4070A0\">&quot;curious&quot;</span>, <span style=\"color: #4070A0\">&quot;content&quot;</span>, <span style=\"color: #4070A0\">&quot;sleepy&quot;</span>, <span style=\"color: #4070A0\">&quot;playful&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>ImageColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;generated_image&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n<span style=\"color: #BBB\">            </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;</span>\n<span style=\"color: #4070A0; font-style: italic\">A {{ style }} family pet portrait of a {{ dog_breed }} dog of {{ dog_age }} years old looking {{dog_look_direction}} with an {{ dog_emotion }} expression and</span>\n<span style=\"color: #4070A0; font-style: italic\">{{ cat_breed }} cat of {{ cat_age }} years old looking {{ cat_look_direction }} with an {{ cat_emotion }} expression in the background. Both subjects should be in focus.</span>\n<span style=\"color: #4070A0; font-style: italic\">        &quot;&quot;&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd01 Preview: images as base64\n\nIn **preview** mode, generated images are stored as base64 strings in the dataframe. Run the next cell to step through each record (images are shown in the sample record display, but only in a notebook environment)."
+    },
+    {
+      "type": "code",
+      "source": "preview = data_designer.preview(config_builder, num_records=2)",
+      "language": "python",
+      "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "for i in range(len(preview.dataset)):\n    preview.display_sample_record()",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> i <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(<span style=\"color: #007020\">len</span>(preview<span style=\"color: #666\">.</span>dataset)):\n    preview<span style=\"color: #666\">.</span>display_sample_record()\n"
+    },
+    {
+      "type": "code",
+      "source": "preview.dataset",
+      "language": "python",
+      "source_html": "preview<span style=\"color: #666\">.</span>dataset\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udd99 Create: images saved to disk\n\nIn **create** mode, images are written to an `images/` folder with UUID filenames; the dataframe stores relative paths (e.g. `images/1d16b6e2-562f-4f51-91e5-baaa999ea916.png`)."
+    },
+    {
+      "type": "code",
+      "source": "results = data_designer.create(config_builder, num_records=2, dataset_name=\"tutorial-5-images\")",
+      "language": "python",
+      "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-5-images&quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "dataset = results.load_dataset()\ndataset.head()",
+      "language": "python",
+      "source_html": "dataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\ndataset<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "code",
+      "source": "# Display all images from the created dataset. Paths are relative to the artifact output directory.\nfor index, row in dataset.iterrows():\n    path_or_list = row.get(\"generated_image\")\n    if path_or_list is not None:\n        paths = path_or_list if not isinstance(path_or_list, str) else [path_or_list]\n        for path in paths:\n            full_path = results.artifact_storage.base_dataset_path / path\n            display(IPImage(filename=str(full_path)))",
+      "language": "python",
+      "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Display all images from the created dataset. Paths are relative to the artifact output directory.</span>\n<span style=\"color: #007020; font-weight: bold\">for</span> index, row <span style=\"color: #007020; font-weight: bold\">in</span> dataset<span style=\"color: #666\">.</span>iterrows():\n    path_or_list <span style=\"color: #666\">=</span> row<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;generated_image&quot;</span>)\n    <span style=\"color: #007020; font-weight: bold\">if</span> path_or_list <span style=\"color: #007020; font-weight: bold\">is</span> <span style=\"color: #007020; font-weight: bold\">not</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n        paths <span style=\"color: #666\">=</span> path_or_list <span style=\"color: #007020; font-weight: bold\">if</span> <span style=\"color: #007020; font-weight: bold\">not</span> <span style=\"color: #007020\">isinstance</span>(path_or_list, <span style=\"color: #007020\">str</span>) <span style=\"color: #007020; font-weight: bold\">else</span> [path_or_list]\n        <span style=\"color: #007020; font-weight: bold\">for</span> path <span style=\"color: #007020; font-weight: bold\">in</span> paths:\n            full_path <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>artifact_storage<span style=\"color: #666\">.</span>base_dataset_path <span style=\"color: #666\">/</span> path\n            display(IPImage(filename<span style=\"color: #666\">=</span><span style=\"color: #007020\">str</span>(full_path)))\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \u23ed\ufe0f Next steps\n\n- [The basics](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/): samplers and LLM text columns\n- [Structured outputs and Jinja](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/2-structured-outputs-and-jinja-expressions/)\n- [Seeding with a dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n- [Image-to-image editing](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/6-editing-images-with-image-context/): edit existing images with seed datasets"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/fern/components/notebooks/5-generating-images.ts b/fern/components/notebooks/5-generating-images.ts
new file mode 100644
index 000000000..b347ceec5
--- /dev/null
+++ b/fern/components/notebooks/5-generating-images.ts
@@ -0,0 +1,111 @@
+/** Auto-generated by ipynb-to-fern-json.py - do not edit */
+export default { cells: [
+  {
+    "type": "markdown",
+    "source": "# \ud83c\udfa8 Data Designer Tutorial: Generating Images\n\n#### \ud83d\udcda What you'll learn\n\nThis notebook shows how to generate synthetic image data with Data Designer using image-generation models.\n\n- \ud83d\uddbc\ufe0f **Image generation columns**: Add columns that produce images from text prompts\n- \ud83d\udcdd **Jinja2 prompts**: Drive diversity by referencing other columns in your prompt template\n- \ud83d\udcbe **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk and stores paths\n\nData Designer supports both **diffusion** (e.g. DALL\u00b7E, Stable Diffusion, Imagen) and **autoregressive** (e.g. Gemini image, GPT image) models.\n\n> **Prerequisites**: This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro image model. Set `OPENROUTER_API_KEY` in your environment before running.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides the configuration API.\n- `DataDesigner` is the main interface for generation."
+  },
+  {
+    "type": "markdown",
+    "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+  },
+  {
+    "type": "code",
+    "source": "%%capture\n!pip install -U data-designer",
+    "language": "python",
+    "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+  },
+  {
+    "type": "code",
+    "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "from IPython.display import Image as IPImage\nfrom IPython.display import display\n\nimport data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Image <span style=\"color: #007020; font-weight: bold\">as</span> IPImage\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> display\n\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\nWe initialize Data Designer without arguments here\u2014the image model is configured explicitly in the next cell. No default text model is needed for this tutorial."
+  },
+  {
+    "type": "code",
+    "source": "data_designer = DataDesigner()",
+    "language": "python",
+    "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf9b\ufe0f Define an image-generation model\n\n- Use `ImageInferenceParams` so Data Designer treats this model as an image generator.\n- Image options (size, quality, aspect ratio, etc.) are model-specific; pass them via `extra_body`."
+  },
+  {
+    "type": "code",
+    "source": "MODEL_PROVIDER = \"openrouter\"\nMODEL_ID = \"black-forest-labs/flux.2-pro\"\nMODEL_ALIAS = \"image-model\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ImageInferenceParams(\n            extra_body={\"height\": 512, \"width\": 512},\n        ),\n    )\n]",
+    "language": "python",
+    "source_html": "MODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;openrouter&quot;</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;black-forest-labs/flux.2-pro&quot;</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;image-model&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageInferenceParams(\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;height&quot;</span>: <span style=\"color: #40A070\">512</span>, <span style=\"color: #4070A0\">&quot;width&quot;</span>: <span style=\"color: #40A070\">512</span>},\n        ),\n    )\n]\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udfd7\ufe0f Build the config: samplers + image column\n\nWe'll generate diverse **dog portrait** images: sampler columns drive subject (breed), age, style, look direction, and emotion. The image-generation column uses a Jinja2 prompt that references all of them."
+  },
+  {
+    "type": "code",
+    "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"photorealistic\",\n                \"oil painting\",\n                \"watercolor\",\n                \"digital art\",\n                \"sketch\",\n                \"anime\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_breed\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a Golden Retriever\",\n                \"a German Shepherd\",\n                \"a Labrador Retriever\",\n                \"a Bulldog\",\n                \"a Beagle\",\n                \"a Poodle\",\n                \"a Corgi\",\n                \"a Siberian Husky\",\n                \"a Dalmatian\",\n                \"a Yorkshire Terrier\",\n                \"a Boxer\",\n                \"a Dachshund\",\n                \"a Doberman Pinscher\",\n                \"a Shih Tzu\",\n                \"a Chihuahua\",\n                \"a Border Collie\",\n                \"an Australian Shepherd\",\n                \"a Cocker Spaniel\",\n                \"a Maltese\",\n                \"a Pomeranian\",\n                \"a Saint Bernard\",\n                \"a Great Dane\",\n                \"an Akita\",\n                \"a Samoyed\",\n                \"a Boston Terrier\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_breed\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a Persian\",\n                \"a Maine Coon\",\n                \"a Siamese\",\n                \"a Ragdoll\",\n                \"a Bengal\",\n                \"an Abyssinian\",\n                \"a British Shorthair\",\n                \"a Sphynx\",\n                \"a Scottish Fold\",\n                \"a Russian Blue\",\n                \"a Birman\",\n                \"an Oriental Shorthair\",\n                \"a Norwegian Forest Cat\",\n                \"a Devon Rex\",\n                \"a Burmese\",\n                \"an Egyptian Mau\",\n                \"a Tonkinese\",\n                \"a Himalayan\",\n                \"a Savannah\",\n                \"a Chartreux\",\n                \"a Somali\",\n                \"a Manx\",\n                \"a Turkish Angora\",\n                \"a Balinese\",\n                \"an American Shorthair\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_age\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"1-3\", \"3-6\", \"6-9\", \"9-12\", \"12-15\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_age\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"1-3\", \"3-6\", \"6-9\", \"9-12\", \"12-18\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_look_direction\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"left\", \"right\", \"front\", \"up\", \"down\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_look_direction\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"left\", \"right\", \"front\", \"up\", \"down\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"dog_emotion\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"happy\", \"curious\", \"serious\", \"sleepy\", \"excited\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"cat_emotion\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\"aloof\", \"curious\", \"content\", \"sleepy\", \"playful\"],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.ImageColumnConfig(\n        name=\"generated_image\",\n        prompt=(\n            \"\"\"\nA {{ style }} family pet portrait of a {{ dog_breed }} dog of {{ dog_age }} years old looking {{dog_look_direction}} with an {{ dog_emotion }} expression and\n{{ cat_breed }} cat of {{ cat_age }} years old looking {{ cat_look_direction }} with an {{ cat_emotion }} expression in the background. Both subjects should be in focus.\n        \"\"\"\n        ),\n        model_alias=MODEL_ALIAS,\n    )\n)\n\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;photorealistic&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;oil painting&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;watercolor&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;digital art&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;sketch&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;anime&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_breed&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a Golden Retriever&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a German Shepherd&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Labrador Retriever&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Bulldog&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Beagle&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Poodle&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Corgi&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Siberian Husky&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Dalmatian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Yorkshire Terrier&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Boxer&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Dachshund&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Doberman Pinscher&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Shih Tzu&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Chihuahua&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Border Collie&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Australian Shepherd&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Cocker Spaniel&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Maltese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Pomeranian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Saint Bernard&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Great Dane&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Akita&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Samoyed&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Boston Terrier&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_breed&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a Persian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Maine Coon&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Siamese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Ragdoll&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Bengal&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Abyssinian&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a British Shorthair&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Sphynx&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Scottish Fold&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Russian Blue&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Birman&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Oriental Shorthair&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Norwegian Forest Cat&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Devon Rex&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Burmese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an Egyptian Mau&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Tonkinese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Himalayan&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Savannah&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Chartreux&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Somali&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Manx&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Turkish Angora&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Balinese&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;an American Shorthair&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_age&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;1-3&quot;</span>, <span style=\"color: #4070A0\">&quot;3-6&quot;</span>, <span style=\"color: #4070A0\">&quot;6-9&quot;</span>, <span style=\"color: #4070A0\">&quot;9-12&quot;</span>, <span style=\"color: #4070A0\">&quot;12-15&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_age&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;1-3&quot;</span>, <span style=\"color: #4070A0\">&quot;3-6&quot;</span>, <span style=\"color: #4070A0\">&quot;6-9&quot;</span>, <span style=\"color: #4070A0\">&quot;9-12&quot;</span>, <span style=\"color: #4070A0\">&quot;12-18&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_look_direction&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;left&quot;</span>, <span style=\"color: #4070A0\">&quot;right&quot;</span>, <span style=\"color: #4070A0\">&quot;front&quot;</span>, <span style=\"color: #4070A0\">&quot;up&quot;</span>, <span style=\"color: #4070A0\">&quot;down&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_look_direction&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;left&quot;</span>, <span style=\"color: #4070A0\">&quot;right&quot;</span>, <span style=\"color: #4070A0\">&quot;front&quot;</span>, <span style=\"color: #4070A0\">&quot;up&quot;</span>, <span style=\"color: #4070A0\">&quot;down&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;dog_emotion&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;happy&quot;</span>, <span style=\"color: #4070A0\">&quot;curious&quot;</span>, <span style=\"color: #4070A0\">&quot;serious&quot;</span>, <span style=\"color: #4070A0\">&quot;sleepy&quot;</span>, <span style=\"color: #4070A0\">&quot;excited&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;cat_emotion&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[<span style=\"color: #4070A0\">&quot;aloof&quot;</span>, <span style=\"color: #4070A0\">&quot;curious&quot;</span>, <span style=\"color: #4070A0\">&quot;content&quot;</span>, <span style=\"color: #4070A0\">&quot;sleepy&quot;</span>, <span style=\"color: #4070A0\">&quot;playful&quot;</span>],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>ImageColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;generated_image&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n<span style=\"color: #BBB\">            </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;</span>\n<span style=\"color: #4070A0; font-style: italic\">A {{ style }} family pet portrait of a {{ dog_breed }} dog of {{ dog_age }} years old looking {{dog_look_direction}} with an {{ dog_emotion }} expression and</span>\n<span style=\"color: #4070A0; font-style: italic\">{{ cat_breed }} cat of {{ cat_age }} years old looking {{ cat_look_direction }} with an {{ cat_emotion }} expression in the background. Both subjects should be in focus.</span>\n<span style=\"color: #4070A0; font-style: italic\">        &quot;&quot;&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd01 Preview: images as base64\n\nIn **preview** mode, generated images are stored as base64 strings in the dataframe. Run the next cell to step through each record (images are shown in the sample record display, but only in a notebook environment)."
+  },
+  {
+    "type": "code",
+    "source": "preview = data_designer.preview(config_builder, num_records=2)",
+    "language": "python",
+    "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "for i in range(len(preview.dataset)):\n    preview.display_sample_record()",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> i <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(<span style=\"color: #007020\">len</span>(preview<span style=\"color: #666\">.</span>dataset)):\n    preview<span style=\"color: #666\">.</span>display_sample_record()\n"
+  },
+  {
+    "type": "code",
+    "source": "preview.dataset",
+    "language": "python",
+    "source_html": "preview<span style=\"color: #666\">.</span>dataset\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udd99 Create: images saved to disk\n\nIn **create** mode, images are written to an `images/` folder with UUID filenames; the dataframe stores relative paths (e.g. `images/1d16b6e2-562f-4f51-91e5-baaa999ea916.png`)."
+  },
+  {
+    "type": "code",
+    "source": "results = data_designer.create(config_builder, num_records=2, dataset_name=\"tutorial-5-images\")",
+    "language": "python",
+    "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-5-images&quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "dataset = results.load_dataset()\ndataset.head()",
+    "language": "python",
+    "source_html": "dataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\ndataset<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "code",
+    "source": "# Display all images from the created dataset. Paths are relative to the artifact output directory.\nfor index, row in dataset.iterrows():\n    path_or_list = row.get(\"generated_image\")\n    if path_or_list is not None:\n        paths = path_or_list if not isinstance(path_or_list, str) else [path_or_list]\n        for path in paths:\n            full_path = results.artifact_storage.base_dataset_path / path\n            display(IPImage(filename=str(full_path)))",
+    "language": "python",
+    "source_html": "<span style=\"color: #60A0B0; font-style: italic\"># Display all images from the created dataset. Paths are relative to the artifact output directory.</span>\n<span style=\"color: #007020; font-weight: bold\">for</span> index, row <span style=\"color: #007020; font-weight: bold\">in</span> dataset<span style=\"color: #666\">.</span>iterrows():\n    path_or_list <span style=\"color: #666\">=</span> row<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;generated_image&quot;</span>)\n    <span style=\"color: #007020; font-weight: bold\">if</span> path_or_list <span style=\"color: #007020; font-weight: bold\">is</span> <span style=\"color: #007020; font-weight: bold\">not</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n        paths <span style=\"color: #666\">=</span> path_or_list <span style=\"color: #007020; font-weight: bold\">if</span> <span style=\"color: #007020; font-weight: bold\">not</span> <span style=\"color: #007020\">isinstance</span>(path_or_list, <span style=\"color: #007020\">str</span>) <span style=\"color: #007020; font-weight: bold\">else</span> [path_or_list]\n        <span style=\"color: #007020; font-weight: bold\">for</span> path <span style=\"color: #007020; font-weight: bold\">in</span> paths:\n            full_path <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>artifact_storage<span style=\"color: #666\">.</span>base_dataset_path <span style=\"color: #666\">/</span> path\n            display(IPImage(filename<span style=\"color: #666\">=</span><span style=\"color: #007020\">str</span>(full_path)))\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \u23ed\ufe0f Next steps\n\n- [The basics](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/): samplers and LLM text columns\n- [Structured outputs and Jinja](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/2-structured-outputs-and-jinja-expressions/)\n- [Seeding with a dataset](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/3-seeding-with-a-dataset/)\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/)\n- [Image-to-image editing](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/6-editing-images-with-image-context/): edit existing images with seed datasets"
+  }
+] };
diff --git a/fern/components/notebooks/6-editing-images-with-image-context.json b/fern/components/notebooks/6-editing-images-with-image-context.json
new file mode 100644
index 000000000..9cf211ea3
--- /dev/null
+++ b/fern/components/notebooks/6-editing-images-with-image-context.json
@@ -0,0 +1,144 @@
+{
+  "cells": [
+    {
+      "type": "markdown",
+      "source": "# \ud83c\udfa8 Data Designer Tutorial: Image-to-Image Editing\n\n#### \ud83d\udcda What you'll learn\n\nThis notebook shows how to edit existing images by combining a seed dataset with image generation. You'll load animal portrait photographs from HuggingFace, feed them as context to an autoregressive model, and generate fun edited versions with accessories like sunglasses, top hats, and bow ties.\n\n- \ud83c\udf31 **Seed datasets with images**: Load a HuggingFace image dataset and use it as a seed\n- \ud83d\uddbc\ufe0f **Image context for editing**: Pass existing images to an image-generation model via `multi_modal_context`\n- \ud83c\udfb2 **Sampler-driven diversity**: Combine sampled accessories and settings with seed images for varied results\n- \ud83d\udcbe **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk\n\nThis tutorial uses an **autoregressive** model (one that supports both image input *and* image output via the chat completions API). Diffusion models (DALL\u00b7E, Stable Diffusion, etc.) do not support image context\u2014see [Tutorial 5](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) for text-to-image generation with diffusion models.\n\n> **Prerequisites**: This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro model. Set `OPENROUTER_API_KEY` in your environment before running.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides the configuration API.\n- `DataDesigner` is the main interface for generation."
+    },
+    {
+      "type": "markdown",
+      "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+    },
+    {
+      "type": "code",
+      "source": "%%capture\n!pip install -U data-designer",
+      "language": "python",
+      "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+    },
+    {
+      "type": "code",
+      "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "import base64\nimport io\nimport uuid\n\nimport pandas as pd\nfrom datasets import load_dataset\nfrom IPython.display import Image as IPImage\nfrom IPython.display import display\n\nimport data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">base64</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">io</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">uuid</span>\n\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pandas</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">datasets</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> load_dataset\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Image <span style=\"color: #007020; font-weight: bold\">as</span> IPImage\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> display\n\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\nWe initialize Data Designer without arguments here\u2014the image-editing model is configured explicitly in the next cell. No default text model is needed for this tutorial."
+    },
+    {
+      "type": "code",
+      "source": "data_designer = DataDesigner()",
+      "language": "python",
+      "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf9b\ufe0f Define an image-editing model\n\nWe need an **autoregressive** model that supports both image input and image output via the chat completions API. This lets us pass existing images as context and receive edited images back.\n\n- Use `ImageInferenceParams` so Data Designer treats this model as an image generator.\n- Image-specific options are model-dependent; pass them via `extra_body`.\n\n> **Note**: This tutorial uses the Flux 2 Pro model via [OpenRouter](https://openrouter.ai). Set `OPENROUTER_API_KEY` in your environment."
+    },
+    {
+      "type": "code",
+      "source": "MODEL_PROVIDER = \"openrouter\"\nMODEL_ID = \"black-forest-labs/flux.2-pro\"\nMODEL_ALIAS = \"image-editor\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ImageInferenceParams(\n            extra_body={\"height\": 512, \"width\": 512},\n        ),\n    )\n]",
+      "language": "python",
+      "source_html": "MODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;openrouter&quot;</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;black-forest-labs/flux.2-pro&quot;</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;image-editor&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageInferenceParams(\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;height&quot;</span>: <span style=\"color: #40A070\">512</span>, <span style=\"color: #4070A0\">&quot;width&quot;</span>: <span style=\"color: #40A070\">512</span>},\n        ),\n    )\n]\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udf31 Load animal portraits from HuggingFace\n\nWe'll load animal face photographs from the [AFHQ](https://huggingface.co/datasets/huggan/AFHQv2) (Animal Faces-HQ) dataset, convert them to base64, and use them as a seed dataset.\n\nAFHQ contains high-quality 512\u00d7512 close-up portraits of cats, dogs, and wildlife\u2014perfect subjects for adding fun accessories."
+    },
+    {
+      "type": "code",
+      "source": "SEED_COUNT = 10\nBASE64_IMAGE_HEIGHT = 512\n\nANIMAL_LABELS = {0: \"cat\", 1: \"dog\", 2: \"wild\"}\n\n\ndef resize_image(image, height: int):\n    \"\"\"Resize image maintaining aspect ratio.\"\"\"\n    original_width, original_height = image.size\n    width = int(original_width * (height / original_height))\n    return image.resize((width, height))\n\n\ndef prepare_record(record: dict, height: int) -> dict:\n    \"\"\"Convert a HuggingFace record to base64 with metadata.\"\"\"\n    image = resize_image(record[\"image\"], height)\n    img_buffer = io.BytesIO()\n    image.save(img_buffer, format=\"PNG\")\n    base64_string = base64.b64encode(img_buffer.getvalue()).decode(\"utf-8\")\n    return {\n        \"uuid\": str(uuid.uuid4()),\n        \"base64_image\": base64_string,\n        \"animal\": ANIMAL_LABELS[record[\"label\"]],\n    }",
+      "language": "python",
+      "source_html": "SEED_COUNT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">10</span>\nBASE64_IMAGE_HEIGHT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">512</span>\n\nANIMAL_LABELS <span style=\"color: #666\">=</span> {<span style=\"color: #40A070\">0</span>: <span style=\"color: #4070A0\">&quot;cat&quot;</span>, <span style=\"color: #40A070\">1</span>: <span style=\"color: #4070A0\">&quot;dog&quot;</span>, <span style=\"color: #40A070\">2</span>: <span style=\"color: #4070A0\">&quot;wild&quot;</span>}\n\n\n<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">resize_image</span>(image, height: <span style=\"color: #007020\">int</span>):\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;Resize image maintaining aspect ratio.&quot;&quot;&quot;</span>\n    original_width, original_height <span style=\"color: #666\">=</span> image<span style=\"color: #666\">.</span>size\n    width <span style=\"color: #666\">=</span> <span style=\"color: #007020\">int</span>(original_width <span style=\"color: #666\">*</span> (height <span style=\"color: #666\">/</span> original_height))\n    <span style=\"color: #007020; font-weight: bold\">return</span> image<span style=\"color: #666\">.</span>resize((width, height))\n\n\n<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">prepare_record</span>(record: <span style=\"color: #007020\">dict</span>, height: <span style=\"color: #007020\">int</span>) <span style=\"color: #666\">-&gt;</span> <span style=\"color: #007020\">dict</span>:\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;Convert a HuggingFace record to base64 with metadata.&quot;&quot;&quot;</span>\n    image <span style=\"color: #666\">=</span> resize_image(record[<span style=\"color: #4070A0\">&quot;image&quot;</span>], height)\n    img_buffer <span style=\"color: #666\">=</span> io<span style=\"color: #666\">.</span>BytesIO()\n    image<span style=\"color: #666\">.</span>save(img_buffer, <span style=\"color: #007020\">format</span><span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;PNG&quot;</span>)\n    base64_string <span style=\"color: #666\">=</span> base64<span style=\"color: #666\">.</span>b64encode(img_buffer<span style=\"color: #666\">.</span>getvalue())<span style=\"color: #666\">.</span>decode(<span style=\"color: #4070A0\">&quot;utf-8&quot;</span>)\n    <span style=\"color: #007020; font-weight: bold\">return</span> {\n        <span style=\"color: #4070A0\">&quot;uuid&quot;</span>: <span style=\"color: #007020\">str</span>(uuid<span style=\"color: #666\">.</span>uuid4()),\n        <span style=\"color: #4070A0\">&quot;base64_image&quot;</span>: base64_string,\n        <span style=\"color: #4070A0\">&quot;animal&quot;</span>: ANIMAL_LABELS[record[<span style=\"color: #4070A0\">&quot;label&quot;</span>]],\n    }\n"
+    },
+    {
+      "type": "code",
+      "source": "print(\"\ud83d\udce5 Streaming animal portraits from HuggingFace...\")\nhf_dataset = load_dataset(\"huggan/AFHQv2\", split=\"train\", streaming=True)\n\nhf_iter = iter(hf_dataset)\nrecords = [prepare_record(next(hf_iter), BASE64_IMAGE_HEIGHT) for _ in range(SEED_COUNT)]\ndf_seed = pd.DataFrame(records)\n\nprint(f\"\u2705 Prepared {len(df_seed)} animal portraits with columns: {list(df_seed.columns)}\")\ndf_seed.head()",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;\ud83d\udce5 Streaming animal portraits from HuggingFace...&quot;</span>)\nhf_dataset <span style=\"color: #666\">=</span> load_dataset(<span style=\"color: #4070A0\">&quot;huggan/AFHQv2&quot;</span>, split<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;train&quot;</span>, streaming<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>)\n\nhf_iter <span style=\"color: #666\">=</span> <span style=\"color: #007020\">iter</span>(hf_dataset)\nrecords <span style=\"color: #666\">=</span> [prepare_record(<span style=\"color: #007020\">next</span>(hf_iter), BASE64_IMAGE_HEIGHT) <span style=\"color: #007020; font-weight: bold\">for</span> _ <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(SEED_COUNT)]\ndf_seed <span style=\"color: #666\">=</span> pd<span style=\"color: #666\">.</span>DataFrame(records)\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;\u2705 Prepared </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">len</span>(df_seed)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\"> animal portraits with columns: </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">list</span>(df_seed<span style=\"color: #666\">.</span>columns)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\ndf_seed<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udfd7\ufe0f Build the configuration\n\nWe combine three ingredients:\n\n1. **Seed dataset** \u2014 original animal portraits as base64 and their species labels\n2. **Sampler columns** \u2014 randomly sample accessories and settings for each image\n3. **Image column with context** \u2014 generate an edited image using the original as reference\n\nThe `multi_modal_context` parameter on `ImageColumnConfig` tells Data Designer to pass the seed image to the model alongside the text prompt. The model receives both the image and the editing instructions, and generates a new image."
+    },
+    {
+      "type": "code",
+      "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)\n\n# 1. Seed the original animal portraits\nconfig_builder.with_seed_dataset(dd.DataFrameSeedSource(df=df_seed))\n\n# 2. Add sampler columns for accessory diversity\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"accessory\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a tiny top hat\",\n                \"oversized sunglasses\",\n                \"a red bow tie\",\n                \"a knitted beanie\",\n                \"a flower crown\",\n                \"a monocle and mustache\",\n                \"a pirate hat and eye patch\",\n                \"a chef hat\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"setting\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a cozy living room\",\n                \"a sunny park\",\n                \"a photo studio with soft lighting\",\n                \"a red carpet event\",\n                \"a holiday card backdrop with snowflakes\",\n                \"a tropical beach at sunset\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"art_style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a photorealistic style\",\n                \"a Disney Pixar 3D render\",\n                \"a watercolor painting\",\n                \"a pop art poster\",\n            ],\n        ),\n    )\n)\n\n# 3. Image column that reads the seed image as context and generates an edited version\nconfig_builder.add_column(\n    dd.ImageColumnConfig(\n        name=\"edited_image\",\n        prompt=(\n            \"Edit this {{ animal }} portrait photo. \"\n            \"Add {{ accessory }} on the animal. \"\n            \"Place the {{ animal }} in {{ setting }}. \"\n            \"Render the result in {{ art_style }}. \"\n            \"Keep the animal's face, expression, and features faithful to the original photo.\"\n        ),\n        model_alias=MODEL_ALIAS,\n        multi_modal_context=[\n            dd.ImageContext(\n                column_name=\"base64_image\",\n                data_type=dd.ModalityDataType.BASE64,\n                image_format=dd.ImageFormat.PNG,\n            )\n        ],\n    )\n)\n\ndata_designer.validate(config_builder)",
+      "language": "python",
+      "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n\n<span style=\"color: #60A0B0; font-style: italic\"># 1. Seed the original animal portraits</span>\nconfig_builder<span style=\"color: #666\">.</span>with_seed_dataset(dd<span style=\"color: #666\">.</span>DataFrameSeedSource(df<span style=\"color: #666\">=</span>df_seed))\n\n<span style=\"color: #60A0B0; font-style: italic\"># 2. Add sampler columns for accessory diversity</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;accessory&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a tiny top hat&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;oversized sunglasses&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a red bow tie&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a knitted beanie&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a flower crown&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a monocle and mustache&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a pirate hat and eye patch&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a chef hat&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;setting&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a cozy living room&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a sunny park&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a photo studio with soft lighting&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a red carpet event&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a holiday card backdrop with snowflakes&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a tropical beach at sunset&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;art_style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a photorealistic style&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Disney Pixar 3D render&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a watercolor painting&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a pop art poster&quot;</span>,\n            ],\n        ),\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># 3. Image column that reads the seed image as context and generates an edited version</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>ImageColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;edited_image&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Edit this {{ animal }} portrait photo. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Add {{ accessory }} on the animal. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Place the {{ animal }} in {{ setting }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Render the result in {{ art_style }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Keep the animal&#39;s face, expression, and features faithful to the original photo.&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        multi_modal_context<span style=\"color: #666\">=</span>[\n            dd<span style=\"color: #666\">.</span>ImageContext(\n                column_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;base64_image&quot;</span>,\n                data_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ModalityDataType<span style=\"color: #666\">.</span>BASE64,\n                image_format<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageFormat<span style=\"color: #666\">.</span>PNG,\n            )\n        ],\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd01 Preview: quick iteration\n\nIn **preview** mode, generated images are stored as base64 strings in the dataframe. Use this to iterate on your prompts, accessories, and sampler values before scaling up."
+    },
+    {
+      "type": "code",
+      "source": "preview = data_designer.preview(config_builder, num_records=2)",
+      "language": "python",
+      "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "for i in range(len(preview.dataset)):\n    preview.display_sample_record()",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> i <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(<span style=\"color: #007020\">len</span>(preview<span style=\"color: #666\">.</span>dataset)):\n    preview<span style=\"color: #666\">.</span>display_sample_record()\n"
+    },
+    {
+      "type": "code",
+      "source": "preview.dataset",
+      "language": "python",
+      "source_html": "preview<span style=\"color: #666\">.</span>dataset\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83d\udd0e Compare original vs edited\n\nLet's display the original animal portraits next to their accessorized versions."
+    },
+    {
+      "type": "code",
+      "source": "def display_before_after(row: pd.Series, index: int, base_path=None) -> None:\n    \"\"\"Display original vs edited image for a single record.\n\n    When base_path is None (preview mode), edited_image is decoded from base64.\n    When base_path is provided (create mode), edited_image is loaded from disk.\n    \"\"\"\n    print(f\"\\n{'=' * 60}\")\n    print(f\"Record {index}: {row['animal']} wearing {row['accessory']}\")\n    print(f\"Setting: {row['setting']}\")\n    print(f\"Style: {row['art_style']}\")\n    print(f\"{'=' * 60}\")\n\n    print(\"\\n\ud83d\udcf7 Original portrait:\")\n    display(IPImage(data=base64.b64decode(row[\"base64_image\"])))\n\n    print(\"\\n\ud83c\udfa8 Edited version:\")\n    edited = row.get(\"edited_image\")\n    if edited is None:\n        return\n    if base_path is None:\n        images = edited if isinstance(edited, list) else [edited]\n        for img_b64 in images:\n            display(IPImage(data=base64.b64decode(img_b64)))\n    else:\n        paths = edited if not isinstance(edited, str) else [edited]\n        for path in paths:\n            display(IPImage(filename=str(base_path / path)))",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">display_before_after</span>(row: pd<span style=\"color: #666\">.</span>Series, index: <span style=\"color: #007020\">int</span>, base_path<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">None</span>) <span style=\"color: #666\">-&gt;</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;Display original vs edited image for a single record.</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    When base_path is None (preview mode), edited_image is decoded from base64.</span>\n<span style=\"color: #4070A0; font-style: italic\">    When base_path is provided (create mode), edited_image is loaded from disk.</span>\n<span style=\"color: #4070A0; font-style: italic\">    &quot;&quot;&quot;</span>\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #4070A0\">&#39;=&#39;</span><span style=\"color: #BBB\"> </span><span style=\"color: #666\">*</span><span style=\"color: #BBB\"> </span><span style=\"color: #40A070\">60</span><span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;Record </span><span style=\"color: #70A0D0; font-style: italic\">{</span>index<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">: </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;animal&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\"> wearing </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;accessory&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;Setting: </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;setting&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;Style: </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;art_style&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;</span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #4070A0\">&#39;=&#39;</span><span style=\"color: #BBB\"> </span><span style=\"color: #666\">*</span><span style=\"color: #BBB\"> </span><span style=\"color: #40A070\">60</span><span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">\ud83d\udcf7 Original portrait:&quot;</span>)\n    display(IPImage(data<span style=\"color: #666\">=</span>base64<span style=\"color: #666\">.</span>b64decode(row[<span style=\"color: #4070A0\">&quot;base64_image&quot;</span>])))\n\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">\ud83c\udfa8 Edited version:&quot;</span>)\n    edited <span style=\"color: #666\">=</span> row<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;edited_image&quot;</span>)\n    <span style=\"color: #007020; font-weight: bold\">if</span> edited <span style=\"color: #007020; font-weight: bold\">is</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n        <span style=\"color: #007020; font-weight: bold\">return</span>\n    <span style=\"color: #007020; font-weight: bold\">if</span> base_path <span style=\"color: #007020; font-weight: bold\">is</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n        images <span style=\"color: #666\">=</span> edited <span style=\"color: #007020; font-weight: bold\">if</span> <span style=\"color: #007020\">isinstance</span>(edited, <span style=\"color: #007020\">list</span>) <span style=\"color: #007020; font-weight: bold\">else</span> [edited]\n        <span style=\"color: #007020; font-weight: bold\">for</span> img_b64 <span style=\"color: #007020; font-weight: bold\">in</span> images:\n            display(IPImage(data<span style=\"color: #666\">=</span>base64<span style=\"color: #666\">.</span>b64decode(img_b64)))\n    <span style=\"color: #007020; font-weight: bold\">else</span>:\n        paths <span style=\"color: #666\">=</span> edited <span style=\"color: #007020; font-weight: bold\">if</span> <span style=\"color: #007020; font-weight: bold\">not</span> <span style=\"color: #007020\">isinstance</span>(edited, <span style=\"color: #007020\">str</span>) <span style=\"color: #007020; font-weight: bold\">else</span> [edited]\n        <span style=\"color: #007020; font-weight: bold\">for</span> path <span style=\"color: #007020; font-weight: bold\">in</span> paths:\n            display(IPImage(filename<span style=\"color: #666\">=</span><span style=\"color: #007020\">str</span>(base_path <span style=\"color: #666\">/</span> path)))\n"
+    },
+    {
+      "type": "code",
+      "source": "for index, row in preview.dataset.iterrows():\n    display_before_after(row, index)",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> index, row <span style=\"color: #007020; font-weight: bold\">in</span> preview<span style=\"color: #666\">.</span>dataset<span style=\"color: #666\">.</span>iterrows():\n    display_before_after(row, index)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "### \ud83c\udd99 Create at scale\n\nIn **create** mode, images are saved to disk in an `images/<column_name>/` folder with UUID filenames. The dataframe stores relative paths."
+    },
+    {
+      "type": "code",
+      "source": "results = data_designer.create(config_builder, num_records=5, dataset_name=\"tutorial-6-edited-images\")",
+      "language": "python",
+      "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">5</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-6-edited-images&quot;</span>)\n"
+    },
+    {
+      "type": "code",
+      "source": "dataset = results.load_dataset()\ndataset.head()",
+      "language": "python",
+      "source_html": "dataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\ndataset<span style=\"color: #666\">.</span>head()\n"
+    },
+    {
+      "type": "code",
+      "source": "for index, row in dataset.head(10).iterrows():\n    display_before_after(row, index, base_path=results.artifact_storage.base_dataset_path)",
+      "language": "python",
+      "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> index, row <span style=\"color: #007020; font-weight: bold\">in</span> dataset<span style=\"color: #666\">.</span>head(<span style=\"color: #40A070\">10</span>)<span style=\"color: #666\">.</span>iterrows():\n    display_before_after(row, index, base_path<span style=\"color: #666\">=</span>results<span style=\"color: #666\">.</span>artifact_storage<span style=\"color: #666\">.</span>base_dataset_path)\n"
+    },
+    {
+      "type": "markdown",
+      "source": "## \u23ed\ufe0f Next steps\n\n- Experiment with different autoregressive models for image editing\n- Try more creative editing prompts (style transfer, background replacement, artistic filters)\n- Combine image editing with text generation (e.g., generate captions for edited images using an LLM-Text column)\n\nRelated tutorials:\n\n- [The basics](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/): samplers and LLM text columns\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/): image-to-text with VLMs\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/): text-to-image generation with diffusion models"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/fern/components/notebooks/6-editing-images-with-image-context.ts b/fern/components/notebooks/6-editing-images-with-image-context.ts
new file mode 100644
index 000000000..14df73f72
--- /dev/null
+++ b/fern/components/notebooks/6-editing-images-with-image-context.ts
@@ -0,0 +1,143 @@
+/** Auto-generated by ipynb-to-fern-json.py - do not edit */
+export default { cells: [
+  {
+    "type": "markdown",
+    "source": "# \ud83c\udfa8 Data Designer Tutorial: Image-to-Image Editing\n\n#### \ud83d\udcda What you'll learn\n\nThis notebook shows how to edit existing images by combining a seed dataset with image generation. You'll load animal portrait photographs from HuggingFace, feed them as context to an autoregressive model, and generate fun edited versions with accessories like sunglasses, top hats, and bow ties.\n\n- \ud83c\udf31 **Seed datasets with images**: Load a HuggingFace image dataset and use it as a seed\n- \ud83d\uddbc\ufe0f **Image context for editing**: Pass existing images to an image-generation model via `multi_modal_context`\n- \ud83c\udfb2 **Sampler-driven diversity**: Combine sampled accessories and settings with seed images for varied results\n- \ud83d\udcbe **Preview vs create**: Preview stores base64 in the dataframe; create saves images to disk\n\nThis tutorial uses an **autoregressive** model (one that supports both image input *and* image output via the chat completions API). Diffusion models (DALL\u00b7E, Stable Diffusion, etc.) do not support image context\u2014see [Tutorial 5](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/) for text-to-image generation with diffusion models.\n\n> **Prerequisites**: This tutorial uses [OpenRouter](https://openrouter.ai) with the Flux 2 Pro model. Set `OPENROUTER_API_KEY` in your environment before running.\n\nIf this is your first time using Data Designer, we recommend starting with the [first notebook](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/) in this tutorial series."
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udce6 Import Data Designer\n\n- `data_designer.config` provides the configuration API.\n- `DataDesigner` is the main interface for generation."
+  },
+  {
+    "type": "markdown",
+    "source": "### \u26a1 Colab Setup\n\nRun the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com)."
+  },
+  {
+    "type": "code",
+    "source": "%%capture\n!pip install -U data-designer",
+    "language": "python",
+    "source_html": "<span style=\"color: #666\">%%</span>capture\n<span style=\"border: 1px solid #F00\">!</span>pip install <span style=\"color: #666\">-</span>U data<span style=\"color: #666\">-</span>designer\n"
+  },
+  {
+    "type": "code",
+    "source": "import getpass\nimport os\n\nfrom google.colab import userdata\n\ntry:\n    os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\nexcept userdata.SecretNotFoundError:\n    os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">getpass</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">os</span>\n\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">google.colab</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> userdata\n\n<span style=\"color: #007020; font-weight: bold\">try</span>:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> userdata<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>)\n<span style=\"color: #007020; font-weight: bold\">except</span> userdata<span style=\"color: #666\">.</span>SecretNotFoundError:\n    os<span style=\"color: #666\">.</span>environ[<span style=\"color: #4070A0\">&quot;NVIDIA_API_KEY&quot;</span>] <span style=\"color: #666\">=</span> getpass<span style=\"color: #666\">.</span>getpass(<span style=\"color: #4070A0\">&quot;Enter your NVIDIA API key: &quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "import base64\nimport io\nimport uuid\n\nimport pandas as pd\nfrom datasets import load_dataset\nfrom IPython.display import Image as IPImage\nfrom IPython.display import display\n\nimport data_designer.config as dd\nfrom data_designer.interface import DataDesigner",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">base64</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">io</span>\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">uuid</span>\n\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pandas</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">pd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">datasets</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> load_dataset\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> Image <span style=\"color: #007020; font-weight: bold\">as</span> IPImage\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">IPython.display</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> display\n\n<span style=\"color: #007020; font-weight: bold\">import</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.config</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">as</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">dd</span>\n<span style=\"color: #007020; font-weight: bold\">from</span><span style=\"color: #BBB\"> </span><span style=\"color: #0E84B5; font-weight: bold\">data_designer.interface</span><span style=\"color: #BBB\"> </span><span style=\"color: #007020; font-weight: bold\">import</span> DataDesigner\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \u2699\ufe0f Initialize the Data Designer interface\n\nWe initialize Data Designer without arguments here\u2014the image-editing model is configured explicitly in the next cell. No default text model is needed for this tutorial."
+  },
+  {
+    "type": "code",
+    "source": "data_designer = DataDesigner()",
+    "language": "python",
+    "source_html": "data_designer <span style=\"color: #666\">=</span> DataDesigner()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf9b\ufe0f Define an image-editing model\n\nWe need an **autoregressive** model that supports both image input and image output via the chat completions API. This lets us pass existing images as context and receive edited images back.\n\n- Use `ImageInferenceParams` so Data Designer treats this model as an image generator.\n- Image-specific options are model-dependent; pass them via `extra_body`.\n\n> **Note**: This tutorial uses the Flux 2 Pro model via [OpenRouter](https://openrouter.ai). Set `OPENROUTER_API_KEY` in your environment."
+  },
+  {
+    "type": "code",
+    "source": "MODEL_PROVIDER = \"openrouter\"\nMODEL_ID = \"black-forest-labs/flux.2-pro\"\nMODEL_ALIAS = \"image-editor\"\n\nmodel_configs = [\n    dd.ModelConfig(\n        alias=MODEL_ALIAS,\n        model=MODEL_ID,\n        provider=MODEL_PROVIDER,\n        inference_parameters=dd.ImageInferenceParams(\n            extra_body={\"height\": 512, \"width\": 512},\n        ),\n    )\n]",
+    "language": "python",
+    "source_html": "MODEL_PROVIDER <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;openrouter&quot;</span>\nMODEL_ID <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;black-forest-labs/flux.2-pro&quot;</span>\nMODEL_ALIAS <span style=\"color: #666\">=</span> <span style=\"color: #4070A0\">&quot;image-editor&quot;</span>\n\nmodel_configs <span style=\"color: #666\">=</span> [\n    dd<span style=\"color: #666\">.</span>ModelConfig(\n        alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        model<span style=\"color: #666\">=</span>MODEL_ID,\n        provider<span style=\"color: #666\">=</span>MODEL_PROVIDER,\n        inference_parameters<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageInferenceParams(\n            extra_body<span style=\"color: #666\">=</span>{<span style=\"color: #4070A0\">&quot;height&quot;</span>: <span style=\"color: #40A070\">512</span>, <span style=\"color: #4070A0\">&quot;width&quot;</span>: <span style=\"color: #40A070\">512</span>},\n        ),\n    )\n]\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udf31 Load animal portraits from HuggingFace\n\nWe'll load animal face photographs from the [AFHQ](https://huggingface.co/datasets/huggan/AFHQv2) (Animal Faces-HQ) dataset, convert them to base64, and use them as a seed dataset.\n\nAFHQ contains high-quality 512\u00d7512 close-up portraits of cats, dogs, and wildlife\u2014perfect subjects for adding fun accessories."
+  },
+  {
+    "type": "code",
+    "source": "SEED_COUNT = 10\nBASE64_IMAGE_HEIGHT = 512\n\nANIMAL_LABELS = {0: \"cat\", 1: \"dog\", 2: \"wild\"}\n\n\ndef resize_image(image, height: int):\n    \"\"\"Resize image maintaining aspect ratio.\"\"\"\n    original_width, original_height = image.size\n    width = int(original_width * (height / original_height))\n    return image.resize((width, height))\n\n\ndef prepare_record(record: dict, height: int) -> dict:\n    \"\"\"Convert a HuggingFace record to base64 with metadata.\"\"\"\n    image = resize_image(record[\"image\"], height)\n    img_buffer = io.BytesIO()\n    image.save(img_buffer, format=\"PNG\")\n    base64_string = base64.b64encode(img_buffer.getvalue()).decode(\"utf-8\")\n    return {\n        \"uuid\": str(uuid.uuid4()),\n        \"base64_image\": base64_string,\n        \"animal\": ANIMAL_LABELS[record[\"label\"]],\n    }",
+    "language": "python",
+    "source_html": "SEED_COUNT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">10</span>\nBASE64_IMAGE_HEIGHT <span style=\"color: #666\">=</span> <span style=\"color: #40A070\">512</span>\n\nANIMAL_LABELS <span style=\"color: #666\">=</span> {<span style=\"color: #40A070\">0</span>: <span style=\"color: #4070A0\">&quot;cat&quot;</span>, <span style=\"color: #40A070\">1</span>: <span style=\"color: #4070A0\">&quot;dog&quot;</span>, <span style=\"color: #40A070\">2</span>: <span style=\"color: #4070A0\">&quot;wild&quot;</span>}\n\n\n<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">resize_image</span>(image, height: <span style=\"color: #007020\">int</span>):\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;Resize image maintaining aspect ratio.&quot;&quot;&quot;</span>\n    original_width, original_height <span style=\"color: #666\">=</span> image<span style=\"color: #666\">.</span>size\n    width <span style=\"color: #666\">=</span> <span style=\"color: #007020\">int</span>(original_width <span style=\"color: #666\">*</span> (height <span style=\"color: #666\">/</span> original_height))\n    <span style=\"color: #007020; font-weight: bold\">return</span> image<span style=\"color: #666\">.</span>resize((width, height))\n\n\n<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">prepare_record</span>(record: <span style=\"color: #007020\">dict</span>, height: <span style=\"color: #007020\">int</span>) <span style=\"color: #666\">-&gt;</span> <span style=\"color: #007020\">dict</span>:\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;Convert a HuggingFace record to base64 with metadata.&quot;&quot;&quot;</span>\n    image <span style=\"color: #666\">=</span> resize_image(record[<span style=\"color: #4070A0\">&quot;image&quot;</span>], height)\n    img_buffer <span style=\"color: #666\">=</span> io<span style=\"color: #666\">.</span>BytesIO()\n    image<span style=\"color: #666\">.</span>save(img_buffer, <span style=\"color: #007020\">format</span><span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;PNG&quot;</span>)\n    base64_string <span style=\"color: #666\">=</span> base64<span style=\"color: #666\">.</span>b64encode(img_buffer<span style=\"color: #666\">.</span>getvalue())<span style=\"color: #666\">.</span>decode(<span style=\"color: #4070A0\">&quot;utf-8&quot;</span>)\n    <span style=\"color: #007020; font-weight: bold\">return</span> {\n        <span style=\"color: #4070A0\">&quot;uuid&quot;</span>: <span style=\"color: #007020\">str</span>(uuid<span style=\"color: #666\">.</span>uuid4()),\n        <span style=\"color: #4070A0\">&quot;base64_image&quot;</span>: base64_string,\n        <span style=\"color: #4070A0\">&quot;animal&quot;</span>: ANIMAL_LABELS[record[<span style=\"color: #4070A0\">&quot;label&quot;</span>]],\n    }\n"
+  },
+  {
+    "type": "code",
+    "source": "print(\"\ud83d\udce5 Streaming animal portraits from HuggingFace...\")\nhf_dataset = load_dataset(\"huggan/AFHQv2\", split=\"train\", streaming=True)\n\nhf_iter = iter(hf_dataset)\nrecords = [prepare_record(next(hf_iter), BASE64_IMAGE_HEIGHT) for _ in range(SEED_COUNT)]\ndf_seed = pd.DataFrame(records)\n\nprint(f\"\u2705 Prepared {len(df_seed)} animal portraits with columns: {list(df_seed.columns)}\")\ndf_seed.head()",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;\ud83d\udce5 Streaming animal portraits from HuggingFace...&quot;</span>)\nhf_dataset <span style=\"color: #666\">=</span> load_dataset(<span style=\"color: #4070A0\">&quot;huggan/AFHQv2&quot;</span>, split<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;train&quot;</span>, streaming<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">True</span>)\n\nhf_iter <span style=\"color: #666\">=</span> <span style=\"color: #007020\">iter</span>(hf_dataset)\nrecords <span style=\"color: #666\">=</span> [prepare_record(<span style=\"color: #007020\">next</span>(hf_iter), BASE64_IMAGE_HEIGHT) <span style=\"color: #007020; font-weight: bold\">for</span> _ <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(SEED_COUNT)]\ndf_seed <span style=\"color: #666\">=</span> pd<span style=\"color: #666\">.</span>DataFrame(records)\n\n<span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;\u2705 Prepared </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">len</span>(df_seed)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\"> animal portraits with columns: </span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #007020\">list</span>(df_seed<span style=\"color: #666\">.</span>columns)<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\ndf_seed<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udfd7\ufe0f Build the configuration\n\nWe combine three ingredients:\n\n1. **Seed dataset** \u2014 original animal portraits as base64 and their species labels\n2. **Sampler columns** \u2014 randomly sample accessories and settings for each image\n3. **Image column with context** \u2014 generate an edited image using the original as reference\n\nThe `multi_modal_context` parameter on `ImageColumnConfig` tells Data Designer to pass the seed image to the model alongside the text prompt. The model receives both the image and the editing instructions, and generates a new image."
+  },
+  {
+    "type": "code",
+    "source": "config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)\n\n# 1. Seed the original animal portraits\nconfig_builder.with_seed_dataset(dd.DataFrameSeedSource(df=df_seed))\n\n# 2. Add sampler columns for accessory diversity\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"accessory\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a tiny top hat\",\n                \"oversized sunglasses\",\n                \"a red bow tie\",\n                \"a knitted beanie\",\n                \"a flower crown\",\n                \"a monocle and mustache\",\n                \"a pirate hat and eye patch\",\n                \"a chef hat\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"setting\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a cozy living room\",\n                \"a sunny park\",\n                \"a photo studio with soft lighting\",\n                \"a red carpet event\",\n                \"a holiday card backdrop with snowflakes\",\n                \"a tropical beach at sunset\",\n            ],\n        ),\n    )\n)\n\nconfig_builder.add_column(\n    dd.SamplerColumnConfig(\n        name=\"art_style\",\n        sampler_type=dd.SamplerType.CATEGORY,\n        params=dd.CategorySamplerParams(\n            values=[\n                \"a photorealistic style\",\n                \"a Disney Pixar 3D render\",\n                \"a watercolor painting\",\n                \"a pop art poster\",\n            ],\n        ),\n    )\n)\n\n# 3. Image column that reads the seed image as context and generates an edited version\nconfig_builder.add_column(\n    dd.ImageColumnConfig(\n        name=\"edited_image\",\n        prompt=(\n            \"Edit this {{ animal }} portrait photo. \"\n            \"Add {{ accessory }} on the animal. \"\n            \"Place the {{ animal }} in {{ setting }}. \"\n            \"Render the result in {{ art_style }}. \"\n            \"Keep the animal's face, expression, and features faithful to the original photo.\"\n        ),\n        model_alias=MODEL_ALIAS,\n        multi_modal_context=[\n            dd.ImageContext(\n                column_name=\"base64_image\",\n                data_type=dd.ModalityDataType.BASE64,\n                image_format=dd.ImageFormat.PNG,\n            )\n        ],\n    )\n)\n\ndata_designer.validate(config_builder)",
+    "language": "python",
+    "source_html": "config_builder <span style=\"color: #666\">=</span> dd<span style=\"color: #666\">.</span>DataDesignerConfigBuilder(model_configs<span style=\"color: #666\">=</span>model_configs)\n\n<span style=\"color: #60A0B0; font-style: italic\"># 1. Seed the original animal portraits</span>\nconfig_builder<span style=\"color: #666\">.</span>with_seed_dataset(dd<span style=\"color: #666\">.</span>DataFrameSeedSource(df<span style=\"color: #666\">=</span>df_seed))\n\n<span style=\"color: #60A0B0; font-style: italic\"># 2. Add sampler columns for accessory diversity</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;accessory&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a tiny top hat&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;oversized sunglasses&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a red bow tie&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a knitted beanie&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a flower crown&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a monocle and mustache&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a pirate hat and eye patch&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a chef hat&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;setting&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a cozy living room&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a sunny park&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a photo studio with soft lighting&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a red carpet event&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a holiday card backdrop with snowflakes&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a tropical beach at sunset&quot;</span>,\n            ],\n        ),\n    )\n)\n\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>SamplerColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;art_style&quot;</span>,\n        sampler_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>SamplerType<span style=\"color: #666\">.</span>CATEGORY,\n        params<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>CategorySamplerParams(\n            values<span style=\"color: #666\">=</span>[\n                <span style=\"color: #4070A0\">&quot;a photorealistic style&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a Disney Pixar 3D render&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a watercolor painting&quot;</span>,\n                <span style=\"color: #4070A0\">&quot;a pop art poster&quot;</span>,\n            ],\n        ),\n    )\n)\n\n<span style=\"color: #60A0B0; font-style: italic\"># 3. Image column that reads the seed image as context and generates an edited version</span>\nconfig_builder<span style=\"color: #666\">.</span>add_column(\n    dd<span style=\"color: #666\">.</span>ImageColumnConfig(\n        name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;edited_image&quot;</span>,\n        prompt<span style=\"color: #666\">=</span>(\n            <span style=\"color: #4070A0\">&quot;Edit this {{ animal }} portrait photo. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Add {{ accessory }} on the animal. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Place the {{ animal }} in {{ setting }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Render the result in {{ art_style }}. &quot;</span>\n            <span style=\"color: #4070A0\">&quot;Keep the animal&#39;s face, expression, and features faithful to the original photo.&quot;</span>\n        ),\n        model_alias<span style=\"color: #666\">=</span>MODEL_ALIAS,\n        multi_modal_context<span style=\"color: #666\">=</span>[\n            dd<span style=\"color: #666\">.</span>ImageContext(\n                column_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;base64_image&quot;</span>,\n                data_type<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ModalityDataType<span style=\"color: #666\">.</span>BASE64,\n                image_format<span style=\"color: #666\">=</span>dd<span style=\"color: #666\">.</span>ImageFormat<span style=\"color: #666\">.</span>PNG,\n            )\n        ],\n    )\n)\n\ndata_designer<span style=\"color: #666\">.</span>validate(config_builder)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd01 Preview: quick iteration\n\nIn **preview** mode, generated images are stored as base64 strings in the dataframe. Use this to iterate on your prompts, accessories, and sampler values before scaling up."
+  },
+  {
+    "type": "code",
+    "source": "preview = data_designer.preview(config_builder, num_records=2)",
+    "language": "python",
+    "source_html": "preview <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>preview(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">2</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "for i in range(len(preview.dataset)):\n    preview.display_sample_record()",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> i <span style=\"color: #007020; font-weight: bold\">in</span> <span style=\"color: #007020\">range</span>(<span style=\"color: #007020\">len</span>(preview<span style=\"color: #666\">.</span>dataset)):\n    preview<span style=\"color: #666\">.</span>display_sample_record()\n"
+  },
+  {
+    "type": "code",
+    "source": "preview.dataset",
+    "language": "python",
+    "source_html": "preview<span style=\"color: #666\">.</span>dataset\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83d\udd0e Compare original vs edited\n\nLet's display the original animal portraits next to their accessorized versions."
+  },
+  {
+    "type": "code",
+    "source": "def display_before_after(row: pd.Series, index: int, base_path=None) -> None:\n    \"\"\"Display original vs edited image for a single record.\n\n    When base_path is None (preview mode), edited_image is decoded from base64.\n    When base_path is provided (create mode), edited_image is loaded from disk.\n    \"\"\"\n    print(f\"\\n{'=' * 60}\")\n    print(f\"Record {index}: {row['animal']} wearing {row['accessory']}\")\n    print(f\"Setting: {row['setting']}\")\n    print(f\"Style: {row['art_style']}\")\n    print(f\"{'=' * 60}\")\n\n    print(\"\\n\ud83d\udcf7 Original portrait:\")\n    display(IPImage(data=base64.b64decode(row[\"base64_image\"])))\n\n    print(\"\\n\ud83c\udfa8 Edited version:\")\n    edited = row.get(\"edited_image\")\n    if edited is None:\n        return\n    if base_path is None:\n        images = edited if isinstance(edited, list) else [edited]\n        for img_b64 in images:\n            display(IPImage(data=base64.b64decode(img_b64)))\n    else:\n        paths = edited if not isinstance(edited, str) else [edited]\n        for path in paths:\n            display(IPImage(filename=str(base_path / path)))",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">def</span><span style=\"color: #BBB\"> </span><span style=\"color: #06287E\">display_before_after</span>(row: pd<span style=\"color: #666\">.</span>Series, index: <span style=\"color: #007020\">int</span>, base_path<span style=\"color: #666\">=</span><span style=\"color: #007020; font-weight: bold\">None</span>) <span style=\"color: #666\">-&gt;</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n<span style=\"color: #BBB\">    </span><span style=\"color: #4070A0; font-style: italic\">&quot;&quot;&quot;Display original vs edited image for a single record.</span>\n\n<span style=\"color: #4070A0; font-style: italic\">    When base_path is None (preview mode), edited_image is decoded from base64.</span>\n<span style=\"color: #4070A0; font-style: italic\">    When base_path is provided (create mode), edited_image is loaded from disk.</span>\n<span style=\"color: #4070A0; font-style: italic\">    &quot;&quot;&quot;</span>\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #4070A0\">&#39;=&#39;</span><span style=\"color: #BBB\"> </span><span style=\"color: #666\">*</span><span style=\"color: #BBB\"> </span><span style=\"color: #40A070\">60</span><span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;Record </span><span style=\"color: #70A0D0; font-style: italic\">{</span>index<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">: </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;animal&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\"> wearing </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;accessory&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;Setting: </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;setting&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;Style: </span><span style=\"color: #70A0D0; font-style: italic\">{</span>row[<span style=\"color: #4070A0\">&#39;art_style&#39;</span>]<span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">f&quot;</span><span style=\"color: #70A0D0; font-style: italic\">{</span><span style=\"color: #4070A0\">&#39;=&#39;</span><span style=\"color: #BBB\"> </span><span style=\"color: #666\">*</span><span style=\"color: #BBB\"> </span><span style=\"color: #40A070\">60</span><span style=\"color: #70A0D0; font-style: italic\">}</span><span style=\"color: #4070A0\">&quot;</span>)\n\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">\ud83d\udcf7 Original portrait:&quot;</span>)\n    display(IPImage(data<span style=\"color: #666\">=</span>base64<span style=\"color: #666\">.</span>b64decode(row[<span style=\"color: #4070A0\">&quot;base64_image&quot;</span>])))\n\n    <span style=\"color: #007020\">print</span>(<span style=\"color: #4070A0\">&quot;</span><span style=\"color: #4070A0; font-weight: bold\">\\n</span><span style=\"color: #4070A0\">\ud83c\udfa8 Edited version:&quot;</span>)\n    edited <span style=\"color: #666\">=</span> row<span style=\"color: #666\">.</span>get(<span style=\"color: #4070A0\">&quot;edited_image&quot;</span>)\n    <span style=\"color: #007020; font-weight: bold\">if</span> edited <span style=\"color: #007020; font-weight: bold\">is</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n        <span style=\"color: #007020; font-weight: bold\">return</span>\n    <span style=\"color: #007020; font-weight: bold\">if</span> base_path <span style=\"color: #007020; font-weight: bold\">is</span> <span style=\"color: #007020; font-weight: bold\">None</span>:\n        images <span style=\"color: #666\">=</span> edited <span style=\"color: #007020; font-weight: bold\">if</span> <span style=\"color: #007020\">isinstance</span>(edited, <span style=\"color: #007020\">list</span>) <span style=\"color: #007020; font-weight: bold\">else</span> [edited]\n        <span style=\"color: #007020; font-weight: bold\">for</span> img_b64 <span style=\"color: #007020; font-weight: bold\">in</span> images:\n            display(IPImage(data<span style=\"color: #666\">=</span>base64<span style=\"color: #666\">.</span>b64decode(img_b64)))\n    <span style=\"color: #007020; font-weight: bold\">else</span>:\n        paths <span style=\"color: #666\">=</span> edited <span style=\"color: #007020; font-weight: bold\">if</span> <span style=\"color: #007020; font-weight: bold\">not</span> <span style=\"color: #007020\">isinstance</span>(edited, <span style=\"color: #007020\">str</span>) <span style=\"color: #007020; font-weight: bold\">else</span> [edited]\n        <span style=\"color: #007020; font-weight: bold\">for</span> path <span style=\"color: #007020; font-weight: bold\">in</span> paths:\n            display(IPImage(filename<span style=\"color: #666\">=</span><span style=\"color: #007020\">str</span>(base_path <span style=\"color: #666\">/</span> path)))\n"
+  },
+  {
+    "type": "code",
+    "source": "for index, row in preview.dataset.iterrows():\n    display_before_after(row, index)",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> index, row <span style=\"color: #007020; font-weight: bold\">in</span> preview<span style=\"color: #666\">.</span>dataset<span style=\"color: #666\">.</span>iterrows():\n    display_before_after(row, index)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "### \ud83c\udd99 Create at scale\n\nIn **create** mode, images are saved to disk in an `images/<column_name>/` folder with UUID filenames. The dataframe stores relative paths."
+  },
+  {
+    "type": "code",
+    "source": "results = data_designer.create(config_builder, num_records=5, dataset_name=\"tutorial-6-edited-images\")",
+    "language": "python",
+    "source_html": "results <span style=\"color: #666\">=</span> data_designer<span style=\"color: #666\">.</span>create(config_builder, num_records<span style=\"color: #666\">=</span><span style=\"color: #40A070\">5</span>, dataset_name<span style=\"color: #666\">=</span><span style=\"color: #4070A0\">&quot;tutorial-6-edited-images&quot;</span>)\n"
+  },
+  {
+    "type": "code",
+    "source": "dataset = results.load_dataset()\ndataset.head()",
+    "language": "python",
+    "source_html": "dataset <span style=\"color: #666\">=</span> results<span style=\"color: #666\">.</span>load_dataset()\ndataset<span style=\"color: #666\">.</span>head()\n"
+  },
+  {
+    "type": "code",
+    "source": "for index, row in dataset.head(10).iterrows():\n    display_before_after(row, index, base_path=results.artifact_storage.base_dataset_path)",
+    "language": "python",
+    "source_html": "<span style=\"color: #007020; font-weight: bold\">for</span> index, row <span style=\"color: #007020; font-weight: bold\">in</span> dataset<span style=\"color: #666\">.</span>head(<span style=\"color: #40A070\">10</span>)<span style=\"color: #666\">.</span>iterrows():\n    display_before_after(row, index, base_path<span style=\"color: #666\">=</span>results<span style=\"color: #666\">.</span>artifact_storage<span style=\"color: #666\">.</span>base_dataset_path)\n"
+  },
+  {
+    "type": "markdown",
+    "source": "## \u23ed\ufe0f Next steps\n\n- Experiment with different autoregressive models for image editing\n- Try more creative editing prompts (style transfer, background replacement, artistic filters)\n- Combine image editing with text generation (e.g., generate captions for edited images using an LLM-Text column)\n\nRelated tutorials:\n\n- [The basics](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/1-the-basics/): samplers and LLM text columns\n- [Providing images as context](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/4-providing-images-as-context/): image-to-text with VLMs\n- [Generating images](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/5-generating-images/): text-to-image generation with diffusion models"
+  }
+] };
diff --git a/fern/docs.yml b/fern/docs.yml
new file mode 100644
index 000000000..f18ee874a
--- /dev/null
+++ b/fern/docs.yml
@@ -0,0 +1,59 @@
+instances:
+  - url: https://nemo-datadesigner.docs.buildwithfern.com
+
+title: NeMo Data Designer
+
+versions:
+  - display-name: v0.5.0
+    path: versions/v0.5.0.yml
+    slug: v0.5.0
+  - display-name: v0.3.3
+    path: versions/v0.3.3.yml
+    slug: v0.3.3
+
+footer: ./components/CustomFooter.tsx
+
+layout:
+  searchbar-placement: header
+  page-width: 1376px
+  sidebar-width: 248px
+  content-width: 812px
+  tabs-placement: header
+  hide-feedback: true
+
+colors:
+  accentPrimary:
+    dark: "#76B900"
+    light: "#76B900"
+  background:
+    light: "#FFFFFF"
+    dark: "#000000"
+
+theme:
+  page-actions: toolbar
+  footer-nav: minimal
+
+logo:
+  dark: ./assets/NVIDIA_dark.svg
+  light: ./assets/NVIDIA_light.svg
+  height: 20
+  href: /
+  right-text: Data Designer
+
+favicon: ./assets/NVIDIA_symbol.svg
+
+css:
+  - ./main.css
+  - ./styles/notebook-viewer.css
+  - ./styles/trajectory-viewer.css
+  - ./styles/expandable-code.css
+  - ./styles/metrics-table.css
+  - ./styles/authors.css
+
+navbar-links:
+  - type: github
+    value: https://github.com/NVIDIA-NeMo/DataDesigner
+
+experimental:
+  mdx-components:
+    - ./components
diff --git a/fern/fern.config.json b/fern/fern.config.json
new file mode 100644
index 000000000..593901647
--- /dev/null
+++ b/fern/fern.config.json
@@ -0,0 +1,4 @@
+{
+  "organization": "nvidia",
+  "version": "3.77.0"
+}
diff --git a/fern/main.css b/fern/main.css
new file mode 100644
index 000000000..87f5dbf7f
--- /dev/null
+++ b/fern/main.css
@@ -0,0 +1,867 @@
+/*!
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/* Color themes for light and dark modes */
+:root {
+    /* Brand Colors */
+    --nv-color-green: #74B900;
+    --nv-color-green-2: #004B31;
+    --nv-color-black: #000000;
+    --nv-color-white: #FFFFFF;
+
+    /* Grey Scale - Light */
+    --nv-light-grey-1: #f7f7f7;
+    --nv-light-grey-2: #EEEEEE;
+    --nv-light-grey-3: #DDDDDD;
+    --nv-light-grey-4: #CCCCCC;
+    --nv-light-grey-5: #999999;
+
+    /* Grey Scale - Dark */
+    --nv-dark-grey-1: #111111;
+    --nv-dark-grey-2: #1A1A1A;
+    --nv-dark-grey-3: #222222;
+    --nv-dark-grey-4: #333333;
+    --nv-dark-grey-5: #666666;
+
+    /* Colors by Usage */
+    --nv-color-text: #000000;
+    --nv-color-bg-default: #FFFFFF;
+    --nv-color-bg-alt: #f7f7f7;
+    --nv-color-success: #76B900;
+    --nv-color-error: #f44336;
+
+    /* Theme-independent settings */
+    --rounded: 999px;
+}
+main {
+    min-height: calc(100vh - 200px);
+  }
+/* Typography - Headers */
+h1 {
+    font-size: 36px;
+    font-weight: 700;
+    line-height: 1.25em; /* 45px */
+}
+
+h2 {
+    font-size: 28px;
+    font-weight: 700;
+    line-height: 1.25em; /* 35px */
+}
+
+h3 {
+    font-size: 24px;
+    font-weight: 700;
+    line-height: 1.25em; /* 30px */
+}
+
+h4 {
+    font-size: 20px;
+    font-weight: 700;
+    line-height: 1.25em; /* 25px */
+}
+
+/* Typography - Paragraphs */
+.prose{
+    color: var(--nv-dark-grey-2) !important;
+}
+.dark .prose{
+    color: var(--nv-light-grey-2) !important;
+}
+p {
+    text-decoration-thickness: 3px;
+}
+.fern-mdx-link {
+    color: var(--tw-prose-body);
+    text-decoration-color: var(--accent);
+    font-weight: var(--font-weight-normal);
+}
+
+/* Light theme (default) */
+html:not([data-theme]),html[data-theme=light] {
+    --pst-color-background: #fff;
+    --pst-color-on-background: #fff;
+    --pst-color-shadow: #ccc;
+    --pst-color-heading: #000;
+    --pst-color-text-base: #1a1a1a;
+    --pst-color-text-muted: #666;
+    --pst-color-surface: #f7f7f7;
+    --pst-color-on-surface: #333;
+    --pst-color-primary: var(--nv-color-green-2);
+    --pst-color-table-row-hover-bg: var(--nv-color-green);
+    --pst-color-link: var(--pst-color-text-base);
+    --pst-color-link-hover: var(--pst-color-text-base);
+    --pst-color-inline-code: var(--pst-color-primary);
+    --pst-color-inline-code-links: var(--pst-color-primary);
+    --pst-color-secondary: var(--pst-color-primary);
+    --pst-color-secondary-bg: var(--nv-color-green);
+    --pst-color-accent: var(--nv-color-green);
+}
+
+/* Dark theme */
+html[data-theme=dark] {
+    --pst-color-background: #111;
+    --pst-color-on-background: #000;
+    --pst-color-shadow: #000;
+    --pst-color-heading: #fff;
+    --pst-color-text-base: #eee;
+    --pst-color-text-muted: #999;
+    --pst-color-surface: #1a1a1a;
+    --pst-color-on-surface: #ddd;
+    --pst-color-primary: var(--nv-color-green);
+    --pst-color-table-row-hover-bg: var(--nv-color-green-2);
+    --pst-color-link: var(--pst-color-text-base);
+    --pst-color-link-hover: var(--pst-color-text-base);
+    --pst-color-inline-code: var(--pst-color-primary);
+    --pst-color-inline-code-links: var(--pst-color-primary);
+    --pst-color-secondary: var(--pst-color-primary);
+    --pst-color-secondary-bg: var(--nv-color-green-2);
+    --pst-color-accent: var(--nv-color-green);
+}
+
+/* Product and verion selector styling */
+
+.fern-product-selector {
+  border-radius: 8px;
+  pointer-events: none !important;
+  padding-right: 2px;
+}
+
+.product-dropdown-trigger svg{
+    display: none !important;
+}
+
+.fern-product-selector .product-dropdown-trigger p{
+    font-weight: bold !important;
+}
+.fern-product-selector-radio-group {
+    display: grid;
+    grid-template-columns: repeat(3, 1fr);
+    gap: 8px;
+    max-width: 1000px;
+}
+
+@media (max-width: 768px) {
+    .fern-product-selector-radio-group {
+        grid-template-columns: repeat(2, 1fr);
+    }
+}
+.fern-version-selector {
+    transform: translateY(-1px);
+}
+
+.fern-version-selector .version-dropdown-trigger{
+    outline: 1px solid var(--border, var(--grayscale-a5)) !important;
+    border-radius: 5px;
+    transition: box-shadow 0.3s ease, outline 0.3s ease;
+}
+.product-dropdown-trigger{
+    padding-left: 0px !important;
+}
+
+.product-dropdown-trigger, .version-dropdown-trigger{
+    background-color: transparent !important;
+}
+.product-dropdown-trigger svg:hover{
+    stroke: var(--nv-color-green) !important;
+}
+.version-dropdown-trigger:hover{
+    box-shadow: 0 0 0 1px var(--nv-color-green) !important;
+}
+.version-dropdown-trigger svg:hover{
+    stroke: var(--nv-color-green) !important;
+}
+/* Sidebar styling */
+#fern-sidebar {
+    border-right: 1px solid var(--border, var(--grayscale-a5)) !important;
+    height: 100vh !important;
+}
+.fern-sidebar-link:not(:hover){
+    background-color: transparent !important;
+}
+.fern-sidebar-link {
+    padding-left: 1rem !important;
+    padding-right: 1rem !important;
+    padding-top: 0.5rem !important;
+    padding-bottom: 0.5rem !important;
+    border-radius: 0px !important;
+    &.nested {
+        padding-left: 1rem !important;
+    }
+}
+/* Section-level sidebar links (pages that have children) should match sidebar heading padding */
+.fern-sidebar-group > li > .fern-sidebar-link:has(+ .fern-sidebar-group) {
+    padding-left: 0.25rem !important;
+}
+.fern-sidebar-group{
+    padding: 0 !important
+}
+#fern-sidebar-scroll-area{
+    padding-right: 0 !important
+}
+
+/* header styling */
+.fern-header-content{
+    padding-left: 18.5px;
+    margin-top: -5px;
+    margin-bottom: -5px;
+}
+#fern-header {
+    border-color: var(--border, var(--grayscale-a5)) !important;
+}
+@keyframes header-background-fade {
+    0% {
+      background-color: transparent;
+    }
+    100% {
+      background-color: var(--header-background);
+    }
+  }
+
+[data-theme=default]#fern-header {
+animation: header-background-fade linear;
+animation-timeline: scroll();
+animation-range: 0 50px;
+}
+.fern-header-navbar-links .fern-button{
+    background-color: transparent !important;
+}
+.fern-header-navbar-links > button{
+    background-color: transparent !important;
+}
+.fern-header-logo-container > div > div > a > img{
+    padding-right: 0.5rem;
+}
+.fern-header-logo-container .font-heading{
+    font-size: 16px !important;
+    font-weight: bold !important;
+    color: var(--grayscale-a12) !important;
+    border-inline: 1px solid var(--border, var(--grayscale-a5));
+    padding: 15px 1rem;
+    margin: -20px 0.5rem;
+}
+@media (max-width: 1024px) {
+    .fern-header-logo-container .font-heading{
+        display: none !important;
+    }
+}
+/* Search bar styling */
+#fern-search-button{
+    background-color: transparent !important;
+    border-radius: var(--rounded);
+    transition: box-shadow 0.3s ease, outline 0.3s ease;
+}
+#fern-search-button:hover{
+    box-shadow: 0 0 0 1px var(--nv-color-green) !important;
+}
+#fern-search-button .fern-kbd{
+    display: none;
+}
+
+.fern-layout-footer-toolbar button{
+    background-color: transparent !important;
+    border-color: transparent !important;
+    padding-inline: 0px !important;
+}
+
+/* ========== Custom footer (native React component) – 1:1 with original ========== */
+.bd-footer {
+  border-top: 1px solid var(--border, var(--grayscale-a5)) !important;
+  font-family: NVIDIA, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif !important;
+  font-size: 0.875rem;
+  padding: 2rem 0;
+  width: 100%;
+}
+.bd-footer * {
+  font-family: inherit;
+}
+.bd-footer__inner {
+  padding: 0 2rem;
+}
+.footer-items__start {
+  display: flex;
+  flex-direction: column;
+  gap: 1.5rem;
+}
+.footer-logos-container {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  width: 100%;
+  gap: 1rem;
+}
+.footer-brand {
+  display: inline-block;
+  text-decoration: none;
+}
+.footer-brand .logo__image {
+  height: 24px;
+  width: auto;
+  transition: opacity 0.2s ease;
+}
+.footer-brand:hover .logo__image {
+  opacity: 0.8;
+}
+.footer-brand-fern {
+  display: flex;
+  align-items: center;
+  margin-left: auto;
+}
+/* Logo theme visibility – .dark is on ancestor in Fern */
+.only-light {
+  display: block;
+  filter: invert(1);
+}
+.only-dark {
+  display: none;
+}
+.dark .only-light {
+  display: none;
+}
+.dark .only-dark {
+  display: block;
+  filter: none;
+}
+.footer-links {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 0.25rem 0.5rem;
+  line-height: 1.65;
+  margin: 0;
+  padding: 0;
+}
+.footer-links a {
+  color: var(--grayscale-a11);
+  text-decoration: none;
+  transition: color 0.2s ease;
+  white-space: nowrap;
+}
+.pipe-separator {
+  color: var(--grayscale-a11);
+  white-space: nowrap;
+}
+.copyright {
+  color: var(--grayscale-a11);
+  font-size: 0.875rem;
+  line-height: 1.65;
+  margin: 0;
+}
+@media (max-width: 768px) {
+  .bd-footer { padding: 1.5rem 0; }
+  .bd-footer__inner { padding: 0 1.5rem; }
+  .footer-items__start { gap: 1rem; }
+  .footer-links { flex-direction: row; gap: 0.5rem 0.75rem; }
+  .footer-links a { white-space: normal; word-break: break-word; }
+}
+@media (max-width: 480px) {
+  .footer-links { gap: 0.5rem; }
+  .footer-links a { font-size: 0.8125rem; }
+  .copyright { font-size: 0.8125rem; }
+}
+/* Built with Fern link + tooltip */
+.built-with-fern-link {
+  display: flex;
+  align-items: baseline;
+  gap: 0.25rem;
+  text-decoration: none;
+  position: relative;
+}
+.built-with-fern-logo {
+  height: 1rem;
+  margin: 0;
+  transition: filter 150ms ease;
+}
+.built-with-fern-logo path { fill: var(--grayscale-a12); }
+.built-with-fern-link:hover .built-with-fern-logo { filter: saturate(1) opacity(1); }
+.built-with-fern-link:hover .built-with-fern-logo path:nth-child(2) { fill: #51C233; }
+.built-with-fern-tooltip {
+  position: absolute;
+  top: 50%;
+  right: calc(100%);
+  bottom: auto;
+  left: auto;
+  transform: translateY(-50%);
+  margin: 0;
+  margin-right: 0.5rem;
+  padding: 0.5rem 0.75rem;
+  background-color: #FFFFFF;
+  color: #000000;
+  font-size: 0.85rem;
+  border-radius: 0.375rem;
+  border: 1px solid var(--grayscale-a5);
+  white-space: nowrap;
+  pointer-events: none;
+  opacity: 0;
+  transition: opacity 150ms ease;
+  transition-delay: 0s;
+  z-index: 50;
+  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
+  width: max-content;
+}
+.built-with-fern-link:hover .built-with-fern-tooltip {
+  opacity: 1;
+  transition-delay: 0.75s;
+}
+.dark .built-with-fern-tooltip {
+  background-color: #000000;
+  color: #FFFFFF;
+}
+.built-with-fern-logo-dark { display: none; }
+.dark .built-with-fern-logo-light { display: none; }
+.dark .built-with-fern-logo-dark { display: block; }
+@media (prefers-color-scheme: dark) {
+  .built-with-fern-logo-light { display: none; }
+  .built-with-fern-logo-dark { display: block; }
+}
+
+/* Footer styling */
+.fern-footer-nav{
+    border-radius: var(--rounded);
+    background-color: transparent !important;
+    transition: box-shadow 0.3s ease, outline 0.3s ease;
+}
+/* Hide line numbers */
+.code-block-line-gutter {
+    display: none !important;
+}
+.fern-footer-prev h4, .fern-footer-next h4{
+    font-size: inherit !important;
+}
+.fern-sidebar-link.nested[data-state="active"]:before {
+    left: -0px !important;
+    bottom: -0px !important;
+    top: -0px !important;
+    width: 2px !important;
+}
+.fern-sidebar-link[data-state="active"] {
+    color: unset !important;
+}
+
+.fern-selection-item .fern-selection-item-icon{
+    border-color: transparent !important;
+}
+/* Button styling */
+.fern-button{
+    border-radius: var(--rounded);
+    font-weight: bold;
+}
+.fern-button.filled.primary{
+    color: var(--nv-color-black);
+}
+.dark .fern-button.filled.primary{
+    background-color: var(--nv-color-white);
+}
+.dark .fern-button.filled.primary:hover{
+    background-color: var(--nv-light-grey-2);
+}
+.fern-button.outlined.normal{
+    background-color: transparent;
+    --tw-ring-color: transparent;
+    color: var(--nv-color-black);
+}
+.fern-button.outlined.normal:hover{
+    color: var(--nv-color-green)
+}
+.dark .fern-button.outlined.normal{
+    color: var(--nv-color-white);
+}
+.dark .fern-button.outlined.normal:hover{
+    color: var(--nv-color-green);
+}
+/* Card styling */
+.fern-card{
+    transition: box-shadow 0.3s ease, outline 0.3s ease;
+}
+svg.card-icon{
+    height: 24px !important;
+    width: 24px !important;
+}
+.card-icon{
+    background-color: transparent !important;
+}
+.fern-card:hover{
+    box-shadow: 0 0 0 1px var(--nv-color-green) !important;
+}
+.fern-docs-badge{
+    border-radius: var(--rounded);
+}
+.fern-page-actions button:hover{
+    background-color: transparent !important;
+}
+.fern-page-actions a:hover{
+    background-color: transparent !important;
+}
+/* Moving logo to footer */
+#builtwithfern, #builtwithfern * {
+    display: none !important;
+}
+
+/* Landing Page Gradients */
+/* Top: Simple radial gradient (no mask, responsive) */
+.landing-gradient-top {
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 800px;
+    background: radial-gradient(ellipse 100% 100% at 50% 10%,
+        rgba(191, 242, 48, 0.15) 0%,
+        rgba(158, 228, 179, 0.12) 30%,
+        rgba(124, 215, 254, 0.12) 50%,
+        rgba(124, 215, 254, 0.06) 75%,
+        transparent 100%);
+    pointer-events: none;
+    z-index: 0;
+}
+
+/* Bottom: Masked gradient for organic transition */
+.landing-gradient-bottom {
+    position: absolute;
+    bottom: -282px;
+    left: 0;
+    right: 0;
+    height: 1232px;
+    background: linear-gradient(85deg, #BFF230 41.98%, #7CD7FE 99.52%);
+    opacity: 0.05;
+    pointer-events: none;
+    z-index: 5;
+    mask-image: url('https://www.figma.com/api/mcp/asset/27509afa-9c16-46bb-8415-4395e2e5a347');
+    mask-repeat: no-repeat;
+    mask-position: 0% -17px;
+    mask-size: 100% auto;
+    -webkit-mask-image: url('https://www.figma.com/api/mcp/asset/27509afa-9c16-46bb-8415-4395e2e5a347');
+    -webkit-mask-repeat: no-repeat;
+    -webkit-mask-position: 0% -17px;
+    -webkit-mask-size: 100% auto;
+}
+
+/* Landing Page Gradients Wrapper */
+.landing-page-gradients {
+    position: relative;
+    width: 100%;
+    margin-top: -100px;
+    padding-top: 100px;
+    overflow: visible;
+    background: #181818;
+}
+
+/* Hero Section (Landing page only) */
+.hero-section {
+    position: relative;
+    width: 100%;
+    padding: 3rem 6rem;
+    margin: 0 auto;
+    overflow: visible;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    z-index: 10;
+}
+
+/* Hero Section Content - constrain width */
+.hero-section > * {
+    position: relative;
+    z-index: 100;
+    max-width: 1440px;
+    width: 100%;
+}
+
+/* Tablet and Mobile: fix spacing and layout */
+@media (max-width: 1024px) {
+    /* Extend dark background behind header */
+    .landing-page body, .landing-page html, .landing-page main {
+        background: #181818 !important;
+    }
+
+    .landing-page-gradients {
+        margin-top: -100px;
+        padding-top: 100px;
+    }
+
+    .hero-section {
+        padding: 2rem 2rem;
+    }
+
+    .hero-section > * {
+        max-width: none;
+    }
+
+    .hero-content-grid {
+        grid-template-columns: 1fr;
+        gap: 2rem;
+    }
+
+    .hero-heading {
+        font-size: 36px;
+    }
+
+    .hero-subtitle {
+        font-size: 16px;
+    }
+
+    .hero-title-section {
+        margin-bottom: 2rem;
+    }
+}
+
+/* Small mobile only */
+@media (max-width: 600px) {
+    .hero-heading {
+        font-size: 28px;
+    }
+
+    .hero-section {
+        padding: 1.5rem 1.5rem;
+    }
+}
+
+.hero-section h1,
+.hero-section h2,
+.hero-section h3,
+.hero-section h4,
+.hero-section h5,
+.hero-section h6 {
+    pointer-events: none !important;
+}
+/* Hero Title Section */
+.hero-title-section {
+    text-align: center;
+    margin-bottom: 4rem;
+    position: relative;
+    z-index: 100;
+}
+
+.hero-heading {
+    font-size: 48px;
+    font-weight: 700;
+    line-height: 1.2;
+    margin: 0 0 1rem 0;
+    color: var(--nv-color-white);
+}
+
+.hero-subtitle {
+    font-size: 18px;
+    line-height: 1.5;
+    margin: 0;
+    color: var(--nv-color-white);
+}
+
+/* Hero Content Grid */
+.hero-content-grid {
+    display: grid;
+    grid-template-columns: repeat(2, 1fr);
+    gap: 3rem;
+    align-items: start;
+    position: relative;
+    z-index: 100;
+}
+
+.hero-column {
+    display: flex;
+    flex-direction: column;
+    gap: 1rem;
+}
+
+.hero-column-title {
+    font-size: 24px;
+    font-weight: 700;
+    margin: 0;
+    color: var(--nv-color-white);
+}
+
+.hero-column-subtitle {
+    font-size: 16px;
+    margin: 0 0 1rem 0;
+    color: var(--nv-color-white);
+}
+
+/* Hero Card Container (Left Column) */
+.hero-card-container {
+    display: flex;
+    flex-direction: column;
+    border-radius: 8px;
+    overflow: hidden;
+    border: 1px solid var(--border, var(--grayscale-a5));
+    margin-top: 1.5rem !important;
+    background: rgba(26, 26, 26, 0.2);
+    backdrop-filter: blur(6px);
+}
+
+.hero-card-image {
+    width: 100%;
+    height: auto;
+    display: block;
+}
+
+.hero-card-content {
+    padding: 1.5rem;
+    display: flex;
+    flex-direction: row;
+    gap: 1rem;
+    align-items: center;
+    justify-content: space-between;
+    background: rgba(26, 26, 26, 0.2);
+    backdrop-filter: blur(6px);
+}
+
+.hero-card-text-wrapper {
+    flex: 1;
+}
+
+.hero-card-text {
+    margin: 0;
+    font-size: 14px;
+    line-height: 1.5;
+    color: var(--nv-color-white);
+}
+
+.hero-card-button-wrapper {
+    flex-shrink: 0;
+}
+.hero-card-button-wrapper .fern-mdx-link{
+    text-decoration: none !important;
+}
+
+.hero-card-button {
+    white-space: nowrap;
+}
+
+/* Hero Cards */
+
+.hero-column .fern-card {
+    padding: 9px 17px;
+    background-color: rgba(26, 26, 26, 0.2) !important;
+    backdrop-filter: blur(6px);
+}
+
+.hero-section .fern-card{
+    color: white !important;
+}
+
+.hero-column .card-icon {
+    font-size: 64px !important;
+    width: 64px !important;
+    height: 64px !important;
+}
+
+.hero-column .card-icon svg,
+.hero-column .card-icon i {
+    font-size: 64px !important;
+    width: 64px !important;
+    height: 64px !important;
+}
+
+.hero-column .fern-card-title {
+    font-size: 16px;
+    font-weight: 500;
+    line-height: 24px;
+}
+
+.hero-column .fern-card p {
+    font-size: 14px;
+    line-height: 20px;
+    color: white !important;
+}
+
+/* Body Section */
+.body-section {
+    display: flex;
+    padding: 4rem 16rem;
+    flex-direction: column;
+    justify-content: center;
+    align-items: center;
+    gap: 4rem;
+    align-self: stretch;
+    position: relative;
+    z-index: 1;
+    background: #181818;
+}
+
+/* Body Section Content - constrain width */
+.body-section > * {
+    max-width: 1440px;
+    width: 100%;
+    position: relative;
+    z-index: 10;
+}
+
+.code-block .fern-code-link{
+    text-decoration: underline !important;
+    text-decoration-color: var(--accent) !important;
+    text-underline-offset: 1px !important;
+    text-decoration-style: underline !important;
+}
+
+/* Mobile Styles */
+@media (max-width: 768px) {
+    .hero-section {
+        padding: 2rem 1.5rem;
+    }
+
+    .hero-title-section {
+        margin-bottom: 2rem;
+    }
+
+    .hero-heading {
+        font-size: 32px;
+    }
+
+    .hero-subtitle {
+        font-size: 16px;
+    }
+
+    .hero-content-grid {
+        grid-template-columns: 1fr;
+        gap: 2rem;
+    }
+
+    .hero-column-title {
+        font-size: 20px;
+    }
+
+    .hero-column-subtitle {
+        font-size: 14px;
+    }
+
+    .hero-card-content {
+        flex-direction: column;
+        align-items: flex-start;
+    }
+
+    .hero-card-button-wrapper {
+        align-self: flex-start;
+    }
+
+    .hero-column .card-icon,
+    .hero-column .card-icon svg,
+    .hero-column .card-icon i {
+        font-size: 40px !important;
+        width: 40px !important;
+        height: 40px !important;
+    }
+
+    .hero-column .fern-card-title {
+        font-size: 14px;
+    }
+
+    .hero-column .fern-card p {
+        font-size: 11px;
+    }
+
+    .body-section {
+        padding: 2rem 1.5rem;
+    }
+
+    .fern-selection-item-icon.use-icon {
+        display: none !important;
+    }
+}
\ No newline at end of file
diff --git a/fern/scripts/ipynb-to-fern-json.py b/fern/scripts/ipynb-to-fern-json.py
new file mode 100644
index 000000000..24905881a
--- /dev/null
+++ b/fern/scripts/ipynb-to-fern-json.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Convert Jupyter notebooks (.ipynb) to Fern NotebookViewer JSON format.
+
+Reads notebook JSON and outputs a minimal format with cells array:
+  { "cells": [ { "type": "markdown"|"code", "source": "...", "source_html"?: "...",
+    "language"?: "python", "outputs"?: [{ "type": "text"|"image", "data": "...", "format"?: "plain"|"html" }] } ] }
+
+Code cells include source_html (Pygments syntax-highlighted HTML) and outputs when available.
+
+Usage:
+  python ipynb-to-fern-json.py input.ipynb -o output.json
+  python ipynb-to-fern-json.py docs/colab_notebooks/1-the-basics.ipynb -o fern/components/notebooks/1-the-basics.json
+
+Run after: make convert-execute-notebooks && make generate-colab-notebooks
+  (executed notebooks preserve outputs; generate-colab injects Colab setup)
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+from pygments import highlight
+from pygments.formatters import HtmlFormatter
+from pygments.lexers import get_lexer_by_name
+from pygments.util import ClassNotFound
+
+
+def get_language(metadata: dict) -> str:
+    """Extract language from notebook metadata."""
+    info = metadata.get("kernelspec", {}) or {}
+    lang = info.get("language", "python")
+    return "python" if lang == "python3" else lang
+
+
+def highlight_code(source: str, language: str) -> str | None:
+    """Return Pygments syntax-highlighted HTML, or None if highlighting fails."""
+    try:
+        lexer = get_lexer_by_name(language, stripall=True)
+    except ClassNotFound:
+        return None
+    formatter = HtmlFormatter(
+        noclasses=True,
+        style="friendly",
+        nowrap=True,
+    )
+    return highlight(source, lexer, formatter)
+
+
+def _join_source(source: list | str) -> str:
+    """Normalize cell source to a single string."""
+    if isinstance(source, list):
+        return "".join(source)
+    return str(source)
+
+
+def extract_outputs(outputs: list) -> list[dict]:
+    """Convert Jupyter outputs to Fern CellOutput format."""
+    result: list[dict] = []
+    for out in outputs:
+        out_type = out.get("output_type", "")
+        if out_type == "stream":
+            text = _join_source(out.get("text", []))
+            if text.strip():
+                result.append({"type": "text", "data": text.rstrip("\n"), "format": "plain"})
+        elif out_type in ("display_data", "execute_result"):
+            data = out.get("data", {})
+            if "image/png" in data:
+                b64 = data["image/png"]
+                if isinstance(b64, list):
+                    b64 = "".join(b64)
+                result.append({"type": "image", "data": b64})
+            elif "text/html" in data:
+                html = data["text/html"]
+                if isinstance(html, list):
+                    html = "".join(html)
+                if html.strip():
+                    result.append({"type": "text", "data": html, "format": "html"})
+            elif "text/plain" in data:
+                text = data["text/plain"]
+                if isinstance(text, list):
+                    text = "".join(text)
+                if text.strip():
+                    result.append({"type": "text", "data": text.rstrip("\n"), "format": "plain"})
+    return result
+
+
+def convert_cell(cell: dict, default_language: str) -> dict:
+    """Convert a Jupyter cell to Fern format."""
+    cell_type = cell.get("cell_type", "code")
+    source = _join_source(cell.get("source", [])).rstrip("\n")
+    result: dict = {"type": cell_type, "source": source}
+    if cell_type == "code":
+        result["language"] = default_language
+        source_html = highlight_code(source, default_language)
+        if source_html:
+            result["source_html"] = source_html
+        raw_outputs = cell.get("outputs", [])
+        if raw_outputs:
+            result["outputs"] = extract_outputs(raw_outputs)
+    return result
+
+
+def convert_notebook(ipynb_path: Path) -> dict:
+    """Convert a .ipynb file to Fern NotebookViewer format."""
+    with open(ipynb_path, encoding="utf-8") as f:
+        nb = json.load(f)
+    metadata = nb.get("metadata", {})
+    default_language = get_language(metadata)
+    cells = [convert_cell(c, default_language) for c in nb.get("cells", [])]
+    return {"cells": cells}
+
+
+def write_ts_export(data: dict, ts_path: Path) -> None:
+    """Write a .ts file that exports the notebook for MDX import (avoids JSON import)."""
+    cells_json = json.dumps(data["cells"], indent=2)
+    ts_path.write_text(
+        f'/** Auto-generated by ipynb-to-fern-json.py - do not edit */\n'
+        f'export default {{ cells: {cells_json} }};\n',
+        encoding="utf-8",
+    )
+
+
+def main() -> int:
+    args = sys.argv[1:]
+    if not args or "-h" in args or "--help" in args:
+        print(__doc__)
+        return 0
+    input_path = Path(args[0])
+    output_path: Path | None = None
+    if "-o" in args:
+        idx = args.index("-o")
+        if idx + 1 < len(args):
+            output_path = Path(args[idx + 1])
+    if not output_path:
+        output_path = input_path.with_suffix(".json")
+    if not input_path.exists():
+        print(f"Error: {input_path} not found", file=sys.stderr)
+        return 1
+    data = convert_notebook(input_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+    print(f"Wrote {output_path}")
+    ts_path = output_path.with_suffix(".ts")
+    write_ts_export(data, ts_path)
+    print(f"Wrote {ts_path}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/fern/styles/authors.css b/fern/styles/authors.css
new file mode 100644
index 000000000..e39866828
--- /dev/null
+++ b/fern/styles/authors.css
@@ -0,0 +1,39 @@
+/* Authors component styles */
+
+.devnote-authors {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 1rem;
+  margin: 1rem 0 1.5rem;
+  padding: 0.75rem 0;
+  border-bottom: 1px solid rgba(128, 128, 128, 0.15);
+}
+
+.devnote-authors__item {
+  display: flex;
+  align-items: center;
+  gap: 0.6rem;
+}
+
+.devnote-authors__avatar {
+  width: 32px;
+  height: 32px;
+  border-radius: 50%;
+  object-fit: cover;
+}
+
+.devnote-authors__meta {
+  display: flex;
+  flex-direction: column;
+  gap: 0.1rem;
+}
+
+.devnote-authors__name {
+  font-weight: 600;
+  font-size: 0.9rem;
+}
+
+.devnote-authors__description {
+  font-size: 0.8rem;
+  opacity: 0.75;
+}
diff --git a/fern/styles/expandable-code.css b/fern/styles/expandable-code.css
new file mode 100644
index 000000000..052571754
--- /dev/null
+++ b/fern/styles/expandable-code.css
@@ -0,0 +1,97 @@
+/* ExpandableCode component styles */
+
+.expandable-code {
+  margin: 1rem 0;
+  border: 1px solid rgba(128, 128, 128, 0.2);
+  border-radius: 8px;
+  overflow: hidden;
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04);
+}
+
+.expandable-code__summary {
+  cursor: pointer;
+  padding: 0.75rem 1rem;
+  background: rgba(0, 0, 0, 0.03);
+  font-size: 0.9rem;
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+}
+
+.dark .expandable-code__summary {
+  background: rgba(255, 255, 255, 0.05);
+}
+
+.expandable-code__summary:hover {
+  background: rgba(0, 0, 0, 0.05);
+}
+
+.dark .expandable-code__summary:hover {
+  background: rgba(255, 255, 255, 0.08);
+}
+
+.expandable-code__badge {
+  font-size: 0.7rem;
+  padding: 0.15rem 0.5rem;
+  background: rgba(128, 128, 128, 0.15);
+  border-radius: 4px;
+  font-family: "SF Mono", Menlo, Monaco, "Cascadia Code", monospace;
+  font-weight: 500;
+  text-transform: uppercase;
+  letter-spacing: 0.03em;
+}
+
+.dark .expandable-code__badge {
+  background: rgba(255, 255, 255, 0.1);
+}
+
+.expandable-code__content {
+  padding: 0;
+  overflow-x: auto;
+  position: relative;
+}
+
+.expandable-code__toolbar {
+  position: absolute;
+  top: 0.5rem;
+  right: 0.5rem;
+  z-index: 1;
+}
+
+.expandable-code__copy {
+  font-size: 0.75rem;
+  padding: 0.35rem 0.65rem;
+  background: rgba(128, 128, 128, 0.12);
+  border: 1px solid rgba(128, 128, 128, 0.2);
+  border-radius: 5px;
+  cursor: pointer;
+  transition: background 0.15s, border-color 0.15s;
+}
+
+.expandable-code__copy:hover {
+  background: rgba(128, 128, 128, 0.18);
+  border-color: rgba(128, 128, 128, 0.3);
+}
+
+.expandable-code__copy--copied {
+  background: rgba(76, 175, 80, 0.2);
+  border-color: rgba(76, 175, 80, 0.4);
+  color: #2e7d32;
+}
+
+.dark .expandable-code__copy--copied {
+  background: rgba(76, 175, 80, 0.25);
+  color: #81c784;
+}
+
+.expandable-code__pre {
+  margin: 0;
+  padding: 1rem;
+  padding-top: 2.25rem;
+  font-size: 0.85rem;
+  line-height: 1.5;
+}
+
+.expandable-code__pre code {
+  font-family: "SF Mono", Menlo, Monaco, "Cascadia Code", monospace;
+}
diff --git a/fern/styles/metrics-table.css b/fern/styles/metrics-table.css
new file mode 100644
index 000000000..a9a2f6719
--- /dev/null
+++ b/fern/styles/metrics-table.css
@@ -0,0 +1,50 @@
+/* MetricsTable component styles */
+
+.metrics-table-wrapper {
+  margin: 1.5rem 0;
+  overflow-x: auto;
+  border-radius: 8px;
+  overflow: hidden;
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
+}
+
+.metrics-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-size: 0.9rem;
+}
+
+.metrics-table th,
+.metrics-table td {
+  border: 1px solid rgba(128, 128, 128, 0.2);
+  padding: 0.6rem 0.85rem;
+  text-align: left;
+}
+
+.metrics-table th {
+  background: rgba(0, 0, 0, 0.05);
+  font-weight: 600;
+}
+
+.dark .metrics-table th {
+  background: rgba(255, 255, 255, 0.07);
+}
+
+.metrics-table__row--alt td {
+  background: rgba(0, 0, 0, 0.02);
+}
+
+.dark .metrics-table__row--alt td {
+  background: rgba(255, 255, 255, 0.03);
+}
+
+.metrics-table td.metrics-table__cell--best {
+  font-weight: 700;
+  background: rgba(76, 175, 80, 0.12) !important;
+  color: #1b5e20;
+}
+
+.dark .metrics-table td.metrics-table__cell--best {
+  background: rgba(76, 175, 80, 0.18) !important;
+  color: #a5d6a7;
+}
diff --git a/fern/styles/notebook-viewer.css b/fern/styles/notebook-viewer.css
new file mode 100644
index 000000000..f55577999
--- /dev/null
+++ b/fern/styles/notebook-viewer.css
@@ -0,0 +1,187 @@
+/* NotebookViewer component styles */
+
+.notebook-viewer {
+  margin: 1.5rem 0;
+}
+
+/* Colab banner – uses Fern's .fern-button styling */
+.notebook-viewer__colab-banner {
+  margin-bottom: 1rem;
+}
+
+/* Override prose/link color so Colab link renders as a button, not a green link */
+.notebook-viewer__colab-link,
+.notebook-viewer__colab-link:hover,
+.notebook-viewer__colab-link:visited,
+.notebook-viewer__colab-link:focus-visible {
+  color: white;
+  text-decoration: none;
+}
+
+.notebook-viewer__colab-link .fern-button-text {
+  color: inherit;
+}
+
+.notebook-viewer__cells {
+  display: flex;
+  flex-direction: column;
+  gap: 1.25rem;
+}
+
+.notebook-viewer__cell {
+  margin: 0;
+}
+
+.notebook-viewer__cell--markdown .notebook-viewer__markdown {
+  line-height: 1.6;
+}
+
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h1,
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h2,
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h3,
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h4 {
+  margin-top: 1rem;
+  margin-bottom: 0.5rem;
+}
+
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h1:first-child,
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h2:first-child,
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h3:first-child,
+.notebook-viewer__cell--markdown .notebook-viewer__markdown h4:first-child {
+  margin-top: 0;
+}
+
+/* Lists use Fern prose/ul/ol classes from renderMarkdown */
+
+.notebook-viewer__cell--markdown .notebook-viewer__markdown p {
+  margin: 0.5rem 0;
+}
+
+/* Code block line numbers (Fern structure) – gutter and content styling */
+.fern-scroll-area-viewport .code-block-line-gutter {
+  padding-right: 1rem;
+  padding-left: 1rem;
+  text-align: right;
+  user-select: none;
+  color: var(--grayscale-a9, #6b7280);
+  font-size: 0.875rem;
+  line-height: 1.5;
+}
+
+.fern-scroll-area-viewport .code-block-line-content {
+  padding-right: 1.25rem;
+  font-size: 0.875rem;
+  line-height: 1.5;
+}
+
+.fern-scroll-area-viewport .code-block-line-content .line {
+  display: block;
+  min-width: min-content;
+}
+
+/* Override Pygments error/border styling on magic commands (e.g. !pip) */
+.code-block-line-content span[style*="border:"] {
+  border: none !important;
+}
+
+/* Feedback button – matches Fern's report incorrect code button */
+.fern-feedback-button {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  padding: 0.25rem;
+  color: var(--grayscale-a11, #374151);
+  background: transparent;
+  border: none;
+  border-radius: 0.25rem;
+  cursor: pointer;
+  transition: color 0.15s, background-color 0.15s;
+}
+
+.fern-feedback-button:hover {
+  color: var(--accent-11, #2563eb);
+  background-color: var(--grayscale-a3, rgba(0, 0, 0, 0.05));
+}
+
+/* External link icon in markdown links */
+.notebook-viewer__markdown .external-link-icon {
+  width: 1em;
+  height: 1em;
+  vertical-align: middle;
+  margin-left: 0.125rem;
+}
+
+/* Copy button in code block header – icon only to match Fern's default code blocks */
+.notebook-viewer__copy-btn {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  padding: 0.25rem;
+  color: var(--grayscale-a11, #374151);
+  background: transparent;
+  border: none;
+  border-radius: 0.25rem;
+  cursor: pointer;
+  transition: color 0.15s, background-color 0.15s;
+}
+
+.notebook-viewer__copy-btn:hover {
+  color: var(--accent-11, #2563eb);
+  background-color: var(--grayscale-a3, rgba(0, 0, 0, 0.05));
+}
+
+.dark .notebook-viewer__copy-btn {
+  color: var(--grayscale-a11, #9ca3af);
+}
+
+.dark .notebook-viewer__copy-btn:hover {
+  color: var(--accent-11, #60a5fa);
+  background-color: var(--grayscale-a3, rgba(255, 255, 255, 0.08));
+}
+
+/* Output block – sits directly below input, reduce top margin */
+.notebook-viewer__output-block {
+  margin-top: 0.5rem !important;
+}
+
+/* Output block content – padding for content area */
+.notebook-viewer__output-content {
+  padding: 1rem 1.25rem;
+}
+
+/* Output block content – inside Fern code block structure (parent has padding) */
+.notebook-viewer__outputs-inner {
+  overflow-x: auto;
+}
+
+.notebook-viewer__output-text,
+.notebook-viewer__output-html {
+  margin: 0;
+  font-size: 0.875rem;
+}
+
+.notebook-viewer__output-text {
+  overflow-x: auto;
+  min-width: min-content;
+}
+
+.notebook-viewer__output-html table {
+  border-collapse: collapse;
+}
+
+.notebook-viewer__output-html th,
+.notebook-viewer__output-html td {
+  border: 1px solid #e5e7eb;
+  padding: 0.25rem 0.5rem;
+}
+
+.dark .notebook-viewer__output-html th,
+.dark .notebook-viewer__output-html td {
+  border-color: #374151;
+}
+
+.notebook-viewer__output-image {
+  max-width: 100%;
+  height: auto;
+  border-radius: 4px;
+}
diff --git a/fern/styles/trajectory-viewer.css b/fern/styles/trajectory-viewer.css
new file mode 100644
index 000000000..b1db62a09
--- /dev/null
+++ b/fern/styles/trajectory-viewer.css
@@ -0,0 +1,179 @@
+/* TrajectoryViewer component styles */
+
+.trajectory-viewer {
+  font-family: -apple-system, system-ui, sans-serif;
+  max-width: 960px;
+  margin: 16px 0;
+  padding: 0;
+}
+
+.trajectory-viewer__details {
+  margin: 1rem 0;
+}
+
+.trajectory-viewer__summary {
+  cursor: pointer;
+  padding: 0.5rem 0;
+  font-size: 0.95rem;
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+  flex-wrap: wrap;
+}
+
+.trajectory-viewer__stats {
+  font-size: 0.8em;
+  opacity: 0.6;
+  font-weight: normal;
+  font-family: "SF Mono", Menlo, Monaco, "Cascadia Code", monospace;
+}
+
+.trajectory-viewer__icon {
+  margin-right: 0.35em;
+  font-size: 0.9em;
+}
+
+.trajectory-viewer__question {
+  background: rgba(66, 165, 245, 0.08);
+  padding: 12px 16px;
+  border-radius: 8px;
+  margin-bottom: 8px;
+}
+
+.trajectory-viewer__question strong {
+  color: #42a5f5;
+}
+
+.trajectory-viewer__ref {
+  background: rgba(76, 175, 80, 0.08);
+  padding: 12px 16px;
+  border-radius: 8px;
+  margin-bottom: 20px;
+  border-left: 4px solid #4caf50;
+}
+
+.trajectory-viewer__ref strong {
+  color: #66bb6a;
+}
+
+.trajectory-viewer__hint {
+  opacity: 0.5;
+  font-size: 0.8em;
+  margin-bottom: 12px;
+}
+
+.trajectory-viewer__turn {
+  margin: 6px 0;
+  display: flex;
+  align-items: flex-start;
+  gap: 12px;
+}
+
+.trajectory-viewer__label {
+  min-width: 48px;
+  padding: 6px 0;
+  opacity: 0.5;
+  font-size: 0.75em;
+  font-family: "SF Mono", Menlo, Monaco, "Cascadia Code", monospace;
+  text-align: right;
+  flex-shrink: 0;
+}
+
+.trajectory-viewer__body {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  gap: 3px;
+}
+
+.trajectory-viewer__group {
+  display: flex;
+  flex-direction: column;
+  gap: 3px;
+  position: relative;
+}
+
+.trajectory-viewer__group--multi {
+  padding-left: 13px;
+}
+
+.trajectory-viewer__group--multi::before {
+  content: "";
+  position: absolute;
+  left: 0;
+  top: 4px;
+  bottom: 4px;
+  width: 3px;
+  background: rgba(128, 128, 128, 0.3);
+  border-radius: 2px;
+}
+
+.trajectory-viewer__call {
+  padding: 5px 12px;
+  border-radius: 5px;
+  font-family: "SF Mono", Menlo, Monaco, "Cascadia Code", monospace;
+  font-size: 0.82em;
+  display: flex;
+  gap: 8px;
+}
+
+.trajectory-viewer__call .trajectory-viewer__fn {
+  font-weight: bold;
+  min-width: 55px;
+  flex-shrink: 0;
+}
+
+.trajectory-viewer__call .trajectory-viewer__arg {
+  opacity: 0.85;
+}
+
+.trajectory-viewer__call--search {
+  background: rgba(66, 165, 245, 0.1);
+  border-left: 3px solid #42a5f5;
+}
+
+.trajectory-viewer__call--search .trajectory-viewer__fn {
+  color: #42a5f5;
+}
+
+.trajectory-viewer__call--open {
+  background: rgba(102, 187, 106, 0.1);
+  border-left: 3px solid #66bb6a;
+}
+
+.trajectory-viewer__call--open .trajectory-viewer__fn {
+  color: #66bb6a;
+}
+
+.trajectory-viewer__call--find {
+  background: rgba(255, 167, 38, 0.1);
+  border-left: 3px solid #ffa726;
+}
+
+.trajectory-viewer__call--find .trajectory-viewer__fn {
+  color: #ffa726;
+}
+
+.trajectory-viewer__call--answer {
+  background: rgba(76, 175, 80, 0.08);
+  border-left: 3px solid #4caf50;
+  padding: 10px 16px;
+  border-radius: 6px;
+  font-size: 0.88em;
+  line-height: 1.5;
+  flex-direction: column;
+  align-items: flex-start;
+  gap: 4px;
+}
+
+.trajectory-viewer__call--answer .trajectory-viewer__fn {
+  color: #4caf50;
+  font-weight: bold;
+  font-family: "SF Mono", Menlo, Monaco, "Cascadia Code", monospace;
+}
+
+.trajectory-viewer__call--answer .trajectory-viewer__body {
+  width: 100%;
+  font-family: -apple-system, system-ui, sans-serif;
+  font-size: 1em;
+}
diff --git a/fern/v0.3.3/pages/api-reference/analysis.mdx b/fern/v0.3.3/pages/api-reference/analysis.mdx
new file mode 100644
index 000000000..a912d2b9f
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/analysis.mdx
@@ -0,0 +1,160 @@
+---
+title: Analysis
+description: API reference for dataset analysis and profiling.
+---
+
+The `analysis` modules provide tools for profiling and analyzing generated datasets. It includes statistics tracking, column profiling, and reporting capabilities.
+
+## Column Statistics
+
+Column statistics are automatically computed for every column after generation. They provide basic metrics specific to the column type. For example, LLM columns track token usage statistics, sampler columns track distribution information, and validation columns track validation success rates.
+
+### LLMColumnStatistics
+
+```python
+class LLMColumnStatistics(BaseModel):
+    """Statistics for LLM-generated columns."""
+    
+    total_input_tokens: int  # Total prompt tokens across all generations
+    total_output_tokens: int  # Total completion tokens
+    avg_input_tokens: float  # Average prompt tokens per generation
+    avg_output_tokens: float  # Average completion tokens per generation
+    generation_time_seconds: float  # Total generation time
+    generations_per_second: float  # Generation throughput
+```
+
+### SamplerColumnStatistics
+
+```python
+class SamplerColumnStatistics(BaseModel):
+    """Statistics for sampler columns."""
+    
+    unique_values: int  # Number of unique values generated
+    value_counts: dict[str, int]  # Counts per value (for categorical)
+    min_value: float | None  # Minimum value (for numerical)
+    max_value: float | None  # Maximum value (for numerical)
+    mean_value: float | None  # Mean value (for numerical)
+    std_value: float | None  # Standard deviation (for numerical)
+```
+
+### ValidationColumnStatistics
+
+```python
+class ValidationColumnStatistics(BaseModel):
+    """Statistics for validation columns."""
+    
+    total_validated: int  # Total records validated
+    valid_count: int  # Number of valid records
+    invalid_count: int  # Number of invalid records
+    null_count: int  # Number of null results
+    pass_rate: float  # Percentage of valid records
+```
+
+### ExpressionColumnStatistics
+
+```python
+class ExpressionColumnStatistics(BaseModel):
+    """Statistics for expression columns."""
+    
+    unique_values: int  # Number of unique values
+    null_count: int  # Number of null results
+    evaluation_time_seconds: float  # Time to evaluate expressions
+```
+
+## Column Profilers
+
+Column profilers are optional analysis tools that provide deeper insights into specific column types. Currently, the only column profiler available is the Judge Score Profiler.
+
+### JudgeScoreProfilerResults
+
+```python
+class JudgeScoreProfilerResults(BaseModel):
+    """Profiling results for LLM judge columns."""
+    
+    score_name: str  # Name of the score dimension
+    score_distribution: dict[str, int]  # Distribution of scores
+    avg_score: float | None  # Average score (for numeric scores)
+    score_counts: dict[str | int, int]  # Counts per score value
+```
+
+## Dataset Profiler
+
+The `DatasetProfilerResults` class contains complete profiling results for a generated dataset. It aggregates column-level statistics, metadata, and profiler results.
+
+### DatasetProfilerResults
+
+```python
+class DatasetProfilerResults(BaseModel):
+    """Complete profiling results for a generated dataset."""
+    
+    dataset_name: str  # Name of the dataset
+    total_records: int  # Total records generated
+    generation_time_seconds: float  # Total generation time
+    column_statistics: dict[str, ColumnStatistics]  # Per-column stats
+    column_profiler_results: dict[str, list[ProfilerResults]]  # Profiler results
+    
+    def to_report(
+        self,
+        output_format: Literal["console", "html", "svg"] = "console",
+    ) -> None:
+        """Generate a formatted analysis report.
+        
+        Args:
+            output_format: Output format for the report.
+        """
+        ...
+    
+    def get_column_statistics(
+        self,
+        column_name: str,
+    ) -> ColumnStatistics:
+        """Get statistics for a specific column.
+        
+        Args:
+            column_name: Name of the column.
+        
+        Returns:
+            Column statistics object.
+        """
+        ...
+    
+    def filter_by_column_type(
+        self,
+        column_type: str,
+    ) -> dict[str, ColumnStatistics]:
+        """Filter statistics by column type.
+        
+        Args:
+            column_type: Type of columns to filter (e.g., "llm-text").
+        
+        Returns:
+            Dictionary of column statistics for matching columns.
+        """
+        ...
+```
+
+### Example: Accessing Analysis Results
+
+```python
+from data_designer.essentials import DataDesigner, DataDesignerConfigBuilder
+
+# Generate a dataset
+data_designer = DataDesigner()
+builder = DataDesignerConfigBuilder()
+# ... add columns ...
+
+results = data_designer.create(builder, num_records=100)
+
+# Load and display analysis
+analysis = results.load_analysis()
+analysis.to_report()
+
+# Access specific column statistics
+llm_stats = analysis.get_column_statistics("generated_text")
+print(f"Average output tokens: {llm_stats.avg_output_tokens}")
+
+# Filter by column type
+all_llm_stats = analysis.filter_by_column_type("llm-text")
+for col_name, stats in all_llm_stats.items():
+    print(f"{col_name}: {stats.generations_per_second:.2f} gen/sec")
+```
diff --git a/fern/v0.3.3/pages/api-reference/column-configs.mdx b/fern/v0.3.3/pages/api-reference/column-configs.mdx
new file mode 100644
index 000000000..a692166fd
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/column-configs.mdx
@@ -0,0 +1,183 @@
+---
+title: Column Configurations
+description: API reference for column configuration objects.
+---
+
+The `column_configs` module defines configuration objects for all Data Designer column types. Each configuration inherits from `SingleColumnConfig`, which provides shared arguments like the column `name`, whether to `drop` the column after generation, and the `column_type`.
+
+<Info title="column_type is a discriminator field">
+The `column_type` argument is used to identify column types when deserializing the Data Designer Config from JSON/YAML. It acts as the discriminator in a [discriminated union](https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions), allowing Pydantic to automatically determine which column configuration class to instantiate.
+</Info>
+
+## SingleColumnConfig (Base Class)
+
+```python
+class SingleColumnConfig(BaseModel):
+    """Base configuration for all column types."""
+    
+    name: str  # Column name (unique identifier)
+    drop: bool = False  # Whether to drop column from final output
+    column_type: str  # Discriminator field for column type
+    
+    @property
+    def required_columns(self) -> list[str]:
+        """Columns that must be generated before this one."""
+        ...
+    
+    @property
+    def side_effect_columns(self) -> list[str]:
+        """Columns created as side effects (e.g., reasoning traces)."""
+        ...
+```
+
+## SamplerColumnConfig
+
+```python
+class SamplerColumnConfig(SingleColumnConfig):
+    """Configuration for sampler-based columns."""
+    
+    column_type: Literal["sampler"] = "sampler"
+    sampler_type: SamplerType  # Type of sampler to use
+    params: SamplerParams | None = None  # Sampler-specific parameters
+    conditional_params: dict[str, SamplerParams] | None = None  # Condition-based params
+    convert_to: Literal["int", "float", "str"] | None = None  # Type conversion
+```
+
+## LLMTextColumnConfig
+
+```python
+class LLMTextColumnConfig(SingleColumnConfig):
+    """Configuration for LLM text generation columns."""
+    
+    column_type: Literal["llm-text"] = "llm-text"
+    model_alias: str  # Reference to model configuration
+    prompt: str  # Jinja2 template for the prompt
+    system_prompt: str | None = None  # Optional system prompt
+    multi_modal_context: list[ImageContext] | None = None  # Image inputs
+```
+
+## LLMCodeColumnConfig
+
+```python
+class LLMCodeColumnConfig(SingleColumnConfig):
+    """Configuration for LLM code generation columns."""
+    
+    column_type: Literal["llm-code"] = "llm-code"
+    model_alias: str  # Reference to model configuration
+    prompt: str  # Jinja2 template for the prompt
+    code_lang: CodeLang  # Target programming language
+    system_prompt: str | None = None  # Optional system prompt
+```
+
+## LLMStructuredColumnConfig
+
+```python
+class LLMStructuredColumnConfig(SingleColumnConfig):
+    """Configuration for LLM structured output columns."""
+    
+    column_type: Literal["llm-structured"] = "llm-structured"
+    model_alias: str  # Reference to model configuration
+    prompt: str  # Jinja2 template for the prompt
+    output_format: type[BaseModel] | dict  # Pydantic model or JSON schema
+    system_prompt: str | None = None  # Optional system prompt
+```
+
+## LLMJudgeColumnConfig
+
+```python
+class LLMJudgeColumnConfig(SingleColumnConfig):
+    """Configuration for LLM judge/scoring columns."""
+    
+    column_type: Literal["llm-judge"] = "llm-judge"
+    model_alias: str  # Reference to model configuration
+    prompt: str  # Jinja2 template for the judge prompt
+    scores: list[Score]  # Scoring rubrics
+    system_prompt: str | None = None  # Optional system prompt
+
+class Score(BaseModel):
+    """Scoring rubric definition."""
+    
+    name: str  # Score dimension name
+    description: str  # Description of what's being evaluated
+    options: dict[str | int, str]  # Score options with descriptions
+```
+
+## LLMEmbeddingColumnConfig
+
+```python
+class LLMEmbeddingColumnConfig(SingleColumnConfig):
+    """Configuration for embedding generation columns."""
+    
+    column_type: Literal["llm-embedding"] = "llm-embedding"
+    model_alias: str  # Reference to model configuration
+    target_column: str  # Column containing text to embed
+```
+
+## ExpressionColumnConfig
+
+```python
+class ExpressionColumnConfig(SingleColumnConfig):
+    """Configuration for Jinja2 expression columns."""
+    
+    column_type: Literal["expression"] = "expression"
+    expr: str  # Jinja2 expression
+    dtype: Literal["str", "int", "float", "bool"] | None = None  # Output type
+```
+
+## ValidationColumnConfig
+
+```python
+class ValidationColumnConfig(SingleColumnConfig):
+    """Configuration for validation columns."""
+    
+    column_type: Literal["validation"] = "validation"
+    validator_type: ValidatorType  # Type of validator
+    target_columns: list[str]  # Columns to validate
+    validator_params: ValidatorParams  # Validator-specific parameters
+    batch_size: int = 10  # Number of records per validation batch
+```
+
+## SeedDatasetColumnConfig
+
+```python
+class SeedDatasetColumnConfig(SingleColumnConfig):
+    """Configuration for seed dataset columns."""
+    
+    column_type: Literal["seed-dataset"] = "seed-dataset"
+    source_column: str  # Column name in the seed dataset
+```
+
+## CodeLang Enum
+
+```python
+class CodeLang(str, Enum):
+    """Supported programming languages for code generation."""
+    
+    PYTHON = "python"
+    JAVASCRIPT = "javascript"
+    TYPESCRIPT = "typescript"
+    JAVA = "java"
+    KOTLIN = "kotlin"
+    GO = "go"
+    RUST = "rust"
+    RUBY = "ruby"
+    SCALA = "scala"
+    SWIFT = "swift"
+    SQL_ANSI = "sql_ansi"
+    SQL_POSTGRES = "sql_postgres"
+    SQL_MYSQL = "sql_mysql"
+    SQL_SQLITE = "sql_sqlite"
+    SQL_TSQL = "sql_tsql"
+    SQL_BIGQUERY = "sql_bigquery"
+```
+
+## ValidatorType Enum
+
+```python
+class ValidatorType(str, Enum):
+    """Supported validator types."""
+    
+    CODE = "code"
+    LOCAL_CALLABLE = "local_callable"
+    REMOTE = "remote"
+```
diff --git a/fern/v0.3.3/pages/api-reference/config-builder.mdx b/fern/v0.3.3/pages/api-reference/config-builder.mdx
new file mode 100644
index 000000000..a301e35bb
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/config-builder.mdx
@@ -0,0 +1,170 @@
+---
+title: Data Designer's Config Builder
+description: API reference for the DataDesignerConfigBuilder.
+---
+
+The `config_builder` module provides a high-level interface for constructing Data Designer configurations through the `DataDesignerConfigBuilder` class, enabling programmatic creation of `DataDesignerConfig` objects by incrementally adding column configurations, constraints, processors, and profilers.
+
+You can use the builder to create Data Designer configurations from scratch or from existing configurations stored in YAML/JSON files via `from_config()`. The builder includes validation capabilities to catch configuration errors early and can work with seed datasets from local sources or external datastores. Once configured, use `build()` to generate the final configuration object or `write_config()` to serialize it to disk.
+
+<Info title="Model configs are required">
+`DataDesignerConfigBuilder` requires a list of model configurations at initialization. This tells the builder which model aliases can be referenced by LLM-generated columns (such as `LLMTextColumnConfig`, `LLMCodeColumnConfig`, `LLMStructuredColumnConfig`, and `LLMJudgeColumnConfig`). Each model configuration specifies the model alias, model provider, model ID, and inference parameters that will be used during data generation.
+</Info>
+
+## DataDesignerConfigBuilder
+
+```python
+class DataDesignerConfigBuilder:
+    """Builder for constructing Data Designer configurations."""
+    
+    def __init__(
+        self,
+        model_configs: list[ModelConfig] | None = None,
+    ) -> None:
+        """Initialize the config builder.
+        
+        Args:
+            model_configs: List of model configurations. If None, loads defaults.
+        """
+        ...
+    
+    @classmethod
+    def from_config(
+        cls,
+        config_path: str | Path,
+        model_configs: list[ModelConfig] | None = None,
+    ) -> "DataDesignerConfigBuilder":
+        """Create a builder from an existing configuration file.
+        
+        Args:
+            config_path: Path to YAML/JSON configuration file.
+            model_configs: Optional model configurations (overrides config file).
+        
+        Returns:
+            Configured DataDesignerConfigBuilder instance.
+        """
+        ...
+    
+    def add_column(
+        self,
+        config: SingleColumnConfig | None = None,
+        **kwargs: Any,
+    ) -> "DataDesignerConfigBuilder":
+        """Add a column configuration to the builder.
+        
+        Args:
+            config: Column configuration object, OR
+            **kwargs: Keyword arguments to construct a column config.
+        
+        Returns:
+            Self for method chaining.
+        """
+        ...
+    
+    def add_model_config(
+        self,
+        model_config: ModelConfig,
+    ) -> "DataDesignerConfigBuilder":
+        """Add a model configuration to the builder.
+        
+        Args:
+            model_config: Model configuration to add.
+        
+        Returns:
+            Self for method chaining.
+        """
+        ...
+    
+    def add_processor(
+        self,
+        processor: ProcessorConfig,
+    ) -> "DataDesignerConfigBuilder":
+        """Add a processor to the configuration.
+        
+        Args:
+            processor: Processor configuration to add.
+        
+        Returns:
+            Self for method chaining.
+        """
+        ...
+    
+    def with_seed_dataset(
+        self,
+        seed_source: SeedSource,
+    ) -> "DataDesignerConfigBuilder":
+        """Configure a seed dataset for the generation.
+        
+        Args:
+            seed_source: Seed dataset source configuration.
+        
+        Returns:
+            Self for method chaining.
+        """
+        ...
+    
+    def build(self) -> DataDesignerConfig:
+        """Build the final configuration object.
+        
+        Returns:
+            Complete DataDesignerConfig object.
+        """
+        ...
+    
+    def write_config(
+        self,
+        path: str | Path,
+        format: Literal["yaml", "json"] = "yaml",
+    ) -> None:
+        """Write the configuration to a file.
+        
+        Args:
+            path: Output file path.
+            format: Output format (yaml or json).
+        """
+        ...
+    
+    @property
+    def info(self) -> ConfigBuilderInfo:
+        """Access to configuration information display utilities."""
+        ...
+```
+
+## ConfigBuilderInfo
+
+```python
+class ConfigBuilderInfo:
+    """Utility for displaying configuration information."""
+    
+    def display(
+        self,
+        info_type: str | InfoType,
+    ) -> None:
+        """Display information about the configuration.
+        
+        Args:
+            info_type: Type of information to display.
+                Options: "samplers", "model_configs", "model_providers", etc.
+        """
+        ...
+```
+
+## Seed Sources
+
+### LocalFileSeedSource
+
+```python
+class LocalFileSeedSource(BaseModel):
+    """Seed dataset from a local file."""
+    
+    path: str | Path  # Path to CSV, Parquet, or JSON file
+```
+
+### DataFrameSeedSource
+
+```python
+class DataFrameSeedSource(BaseModel):
+    """Seed dataset from a pandas DataFrame."""
+    
+    df: pd.DataFrame  # DataFrame to use as seed data
+```
diff --git a/fern/v0.3.3/pages/api-reference/data-designer-config.mdx b/fern/v0.3.3/pages/api-reference/data-designer-config.mdx
new file mode 100644
index 000000000..94d7c01d7
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/data-designer-config.mdx
@@ -0,0 +1,106 @@
+---
+title: Data Designer Configuration
+description: API reference for the DataDesignerConfig object.
+---
+
+`DataDesignerConfig` is the main configuration object for building datasets with Data Designer. It is a declarative configuration for defining the dataset you want to generate column-by-column, including options for dataset post-processing, validation, and profiling.
+
+Generally, you should use the [DataDesignerConfigBuilder](/api/config-builder) to build your configuration, but you can also build it manually by instantiating the `DataDesignerConfig` class directly.
+
+## DataDesignerConfig
+
+```python
+class DataDesignerConfig(BaseModel):
+    """Complete configuration for a Data Designer generation job."""
+    
+    columns: list[SingleColumnConfig]  # List of column configurations
+    processors: list[ProcessorConfig] = []  # Post-generation processors
+    seed_source: SeedSource | None = None  # Optional seed dataset
+    
+    @property
+    def column_names(self) -> list[str]:
+        """Names of all configured columns."""
+        ...
+    
+    @property
+    def dependency_graph(self) -> dict[str, list[str]]:
+        """Column dependency graph for execution ordering."""
+        ...
+    
+    def get_column(self, name: str) -> SingleColumnConfig:
+        """Get a column configuration by name.
+        
+        Args:
+            name: Column name.
+        
+        Returns:
+            Column configuration.
+        
+        Raises:
+            KeyError: If column not found.
+        """
+        ...
+    
+    def to_yaml(self) -> str:
+        """Serialize configuration to YAML string."""
+        ...
+    
+    def to_json(self) -> str:
+        """Serialize configuration to JSON string."""
+        ...
+    
+    @classmethod
+    def from_yaml(cls, yaml_str: str) -> "DataDesignerConfig":
+        """Deserialize configuration from YAML string."""
+        ...
+    
+    @classmethod
+    def from_json(cls, json_str: str) -> "DataDesignerConfig":
+        """Deserialize configuration from JSON string."""
+        ...
+    
+    @classmethod
+    def from_file(cls, path: str | Path) -> "DataDesignerConfig":
+        """Load configuration from a file.
+        
+        Args:
+            path: Path to YAML or JSON file.
+        
+        Returns:
+            Loaded configuration.
+        """
+        ...
+```
+
+## Configuration Serialization
+
+Data Designer configs can be serialized to and from YAML or JSON format, making it easy to:
+
+- Save configurations for reproducibility
+- Share configurations with team members
+- Version control your data generation pipelines
+- Load and modify existing configurations
+
+### Example: Saving and Loading Configs
+
+```python
+from data_designer.essentials import DataDesignerConfigBuilder
+
+# Build a configuration
+builder = DataDesignerConfigBuilder()
+builder.add_column(name="id", column_type="sampler", sampler_type="uuid")
+builder.add_column(
+    name="greeting",
+    column_type="llm-text",
+    model_alias="nvidia-text",
+    prompt="Write a greeting."
+)
+
+# Save to file
+builder.write_config("my_config.yaml")
+
+# Load from file later
+from data_designer.config.data_designer_config import DataDesignerConfig
+
+config = DataDesignerConfig.from_file("my_config.yaml")
+```
diff --git a/fern/v0.3.3/pages/api-reference/models.mdx b/fern/v0.3.3/pages/api-reference/models.mdx
new file mode 100644
index 000000000..831f780b9
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/models.mdx
@@ -0,0 +1,104 @@
+---
+title: Models
+description: API reference for model configuration objects.
+---
+
+The `models` module defines configuration objects for model-based generation. `ModelProvider` specifies connection and authentication details for custom providers. `ModelConfig` encapsulates model details including the model alias, identifier, and inference parameters. [Inference Parameters](/docs/concepts/models/inference-parameters) controls model behavior through settings like `temperature`, `top_p`, and `max_tokens`, with support for both fixed values and distribution-based sampling. The module includes `ImageContext` for providing image inputs to multimodal models.
+
+For more information on how they are used, see below:
+
+- **[Model Providers](/docs/concepts/models/model-providers)**
+- **[Model Configs](/docs/concepts/models/model-configs)**
+- **[Images as Context](/docs/tutorials/images-as-context)**
+
+## ModelProvider
+
+```python
+class ModelProvider(BaseModel):
+    """Configuration for a model provider endpoint."""
+    
+    name: str  # Unique identifier for the provider
+    endpoint: str  # API endpoint URL
+    provider_type: str = "openai"  # Provider type (default: OpenAI-compatible)
+    api_key: str | None = None  # API key or environment variable name
+    extra_body: dict[str, Any] | None = None  # Additional request body parameters
+    extra_headers: dict[str, str] | None = None  # Additional headers
+```
+
+## ModelConfig
+
+```python
+class ModelConfig(BaseModel):
+    """Configuration for a specific model."""
+    
+    alias: str  # Unique identifier for this model configuration
+    model: str  # Model identifier as recognized by the provider
+    provider: str | None = None  # Reference to provider by name
+    inference_parameters: InferenceParamsT | None = None  # Inference parameters
+```
+
+## ChatCompletionInferenceParams
+
+```python
+class ChatCompletionInferenceParams(BaseModel):
+    """Parameters for chat completion inference."""
+    
+    temperature: float | Distribution | None = None  # Sampling temperature (0.0-2.0)
+    top_p: float | Distribution | None = None  # Nucleus sampling parameter (0.0-1.0)
+    max_tokens: int | None = None  # Maximum output tokens
+    max_parallel_requests: int = 4  # Maximum concurrent API requests
+    timeout: int | None = None  # Request timeout in seconds
+    extra_body: dict[str, Any] | None = None  # Additional request body parameters
+```
+
+## EmbeddingInferenceParams
+
+```python
+class EmbeddingInferenceParams(BaseModel):
+    """Parameters for embedding inference."""
+    
+    encoding_format: Literal["float", "base64"] = "float"  # Embedding encoding format
+    dimensions: int | None = None  # Number of embedding dimensions
+    max_parallel_requests: int = 4  # Maximum concurrent API requests
+    timeout: int | None = None  # Request timeout in seconds
+    extra_body: dict[str, Any] | None = None  # Additional request body parameters
+```
+
+## ImageContext
+
+```python
+class ImageContext(BaseModel):
+    """Configuration for providing image context to vision models."""
+    
+    column_name: str  # Name of column containing image data
+    data_type: ModalityDataType  # Type of image data (BASE64, URL, etc.)
+    image_format: ImageFormat | None = None  # Image format (PNG, JPEG, etc.)
+```
+
+## Distribution Types
+
+### UniformDistribution
+
+```python
+class UniformDistribution(BaseModel):
+    """Uniform distribution for parameter sampling."""
+    
+    params: UniformDistributionParams
+    
+class UniformDistributionParams(BaseModel):
+    low: float  # Lower bound
+    high: float  # Upper bound
+```
+
+### ManualDistribution
+
+```python
+class ManualDistribution(BaseModel):
+    """Manual distribution with discrete values."""
+    
+    params: ManualDistributionParams
+    
+class ManualDistributionParams(BaseModel):
+    values: list[float]  # Discrete values to sample from
+    weights: list[float] | None = None  # Optional probability weights
+```
diff --git a/fern/v0.3.3/pages/api-reference/processors.mdx b/fern/v0.3.3/pages/api-reference/processors.mdx
new file mode 100644
index 000000000..e1ce9ca40
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/processors.mdx
@@ -0,0 +1,110 @@
+---
+title: Processors
+description: API reference for processor configuration objects.
+---
+
+The `processors` module defines configuration objects for post-generation data transformations. Processors run after column generation and can modify the dataset schema or content before output.
+
+## DropColumnsProcessorConfig
+
+```python
+class DropColumnsProcessorConfig(BaseModel):
+    """Configuration for dropping columns from output."""
+    
+    name: str  # Processor identifier
+    column_names: list[str]  # Columns to remove from output
+    build_stage: BuildStage = BuildStage.POST_BATCH  # When to run
+```
+
+### Behavior
+
+- Columns in `column_names` are removed from the main output
+- Dropped column values are saved to a separate file in `dropped-columns/`
+- Missing columns produce a warning but don't fail the build
+- Column configs are automatically marked with `drop=True`
+
+### Example Usage
+
+```python
+from data_designer.essentials import (
+    DataDesignerConfigBuilder,
+    DropColumnsProcessorConfig,
+)
+
+builder = DataDesignerConfigBuilder()
+# ... add columns ...
+
+builder.add_processor(
+    DropColumnsProcessorConfig(
+        name="remove_intermediate",
+        column_names=["temp_calculation", "raw_context", "debug_info"],
+    )
+)
+```
+
+## SchemaTransformProcessorConfig
+
+```python
+class SchemaTransformProcessorConfig(BaseModel):
+    """Configuration for transforming output schema."""
+    
+    name: str  # Processor identifier
+    template: dict[str, Any]  # Jinja2 template for output schema
+    build_stage: BuildStage = BuildStage.POST_BATCH  # When to run
+```
+
+### Behavior
+
+- Each key in `template` becomes a column in the transformed output
+- Values are Jinja2 templates with access to all columns
+- Complex structures (lists, nested dicts) are supported
+- Output saved to `processors-outputs/{name}/`
+- Original dataset passes through unchanged
+
+### Template Capabilities
+
+- **Variable substitution**: `{{ column_name }}`
+- **Filters**: `{{ text | upper }}`, `{{ text | lower }}`, `{{ text | trim }}`
+- **Nested structures**: Arbitrarily deep JSON structures
+- **Lists**: `["{{ col1 }}", "{{ col2 }}"]`
+- **Conditionals**: `{% if condition %}...{% endif %}`
+
+### Example Usage
+
+```python
+from data_designer.essentials import (
+    DataDesignerConfigBuilder,
+    SchemaTransformProcessorConfig,
+)
+
+builder = DataDesignerConfigBuilder()
+# ... add columns with 'question' and 'answer' ...
+
+# Transform to chat message format
+builder.add_processor(
+    SchemaTransformProcessorConfig(
+        name="chat_format",
+        template={
+            "messages": [
+                {"role": "user", "content": "{{ question }}"},
+                {"role": "assistant", "content": "{{ answer }}"},
+            ],
+            "metadata": {
+                "category": "{{ category | upper }}",
+                "generated": True,
+            },
+        },
+    )
+)
+```
+
+## BuildStage Enum
+
+```python
+class BuildStage(str, Enum):
+    """When processors run in the generation pipeline."""
+    
+    POST_BATCH = "post_batch"  # After each batch is generated
+```
+
+Currently, all processors run at the `POST_BATCH` stage. Additional stages may be added in future versions.
diff --git a/fern/v0.3.3/pages/api-reference/sampler-params.mdx b/fern/v0.3.3/pages/api-reference/sampler-params.mdx
new file mode 100644
index 000000000..7858fd819
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/sampler-params.mdx
@@ -0,0 +1,150 @@
+---
+title: Sampler Parameters
+description: API reference for sampler parameter configuration objects.
+---
+
+The `sampler_params` module defines parameter configuration objects for all Data Designer sampler types. Sampler parameters are used within the `SamplerColumnConfig` to specify how values should be generated for sampled columns.
+
+<Tip title="Displaying available samplers and their parameters">
+The config builder has an `info` attribute that can be used to display the available sampler types and their parameters:
+
+```python
+config_builder.info.display("samplers")
+```
+</Tip>
+
+## CategorySamplerParams
+
+```python
+class CategorySamplerParams(BaseModel):
+    """Parameters for categorical sampling."""
+    
+    values: list[Any]  # List of categorical values to sample from
+    weights: list[float] | None = None  # Optional probability weights (normalized)
+```
+
+## SubcategorySamplerParams
+
+```python
+class SubcategorySamplerParams(BaseModel):
+    """Parameters for hierarchical categorical sampling."""
+    
+    category: str  # Name of parent category column
+    values: dict[str, list[Any]]  # Mapping of parent values to subcategory values
+```
+
+## UniformSamplerParams
+
+```python
+class UniformSamplerParams(BaseModel):
+    """Parameters for uniform distribution sampling."""
+    
+    low: float  # Lower bound (inclusive)
+    high: float  # Upper bound (exclusive for floats, inclusive for ints)
+```
+
+## GaussianSamplerParams
+
+```python
+class GaussianSamplerParams(BaseModel):
+    """Parameters for Gaussian (normal) distribution sampling."""
+    
+    mean: float = 0.0  # Distribution mean
+    std: float = 1.0  # Standard deviation
+```
+
+## BernoulliSamplerParams
+
+```python
+class BernoulliSamplerParams(BaseModel):
+    """Parameters for Bernoulli (binary) sampling."""
+    
+    p: float  # Probability of success (1)
+```
+
+## BinomialSamplerParams
+
+```python
+class BinomialSamplerParams(BaseModel):
+    """Parameters for binomial distribution sampling."""
+    
+    n: int  # Number of trials
+    p: float  # Probability of success per trial
+```
+
+## PoissonSamplerParams
+
+```python
+class PoissonSamplerParams(BaseModel):
+    """Parameters for Poisson distribution sampling."""
+    
+    lam: float  # Expected number of events (lambda)
+```
+
+## ScipySamplerParams
+
+```python
+class ScipySamplerParams(BaseModel):
+    """Parameters for scipy.stats distribution sampling."""
+    
+    distribution: str  # Name of scipy.stats distribution
+    params: dict[str, Any]  # Distribution-specific parameters
+```
+
+## UUIDSamplerParams
+
+```python
+class UUIDSamplerParams(BaseModel):
+    """Parameters for UUID generation."""
+    
+    prefix: str = ""  # Optional prefix for the UUID
+    short_form: bool = False  # Use shortened UUID format
+    uppercase: bool = False  # Convert to uppercase
+```
+
+## DateTimeSamplerParams
+
+```python
+class DateTimeSamplerParams(BaseModel):
+    """Parameters for datetime sampling."""
+    
+    start: str | datetime  # Start of date range
+    end: str | datetime  # End of date range
+    format: str | None = None  # Output format string (strftime)
+```
+
+## TimedeltaSamplerParams
+
+```python
+class TimedeltaSamplerParams(BaseModel):
+    """Parameters for timedelta (duration) sampling."""
+    
+    dt_min: int  # Minimum delta in days
+    dt_max: int  # Maximum delta in days
+    reference_column_name: str  # Column containing reference datetime
+```
+
+## PersonSamplerParams
+
+```python
+class PersonSamplerParams(BaseModel):
+    """Parameters for Nemotron-Personas person sampling."""
+    
+    locale: str  # Locale code (en_US, ja_JP, en_IN, hi_Deva_IN, hi_Latn_IN)
+    sex: str | None = None  # Filter by "Male" or "Female"
+    city: str | list[str] | None = None  # Filter by city
+    age_range: list[int] | None = None  # [min_age, max_age]
+    with_synthetic_personas: bool = False  # Include personality profiles
+    select_field_values: dict[str, list[str]] | None = None  # Custom field filters
+```
+
+## PersonFromFakerSamplerParams
+
+```python
+class PersonFromFakerSamplerParams(BaseModel):
+    """Parameters for Faker-based person sampling."""
+    
+    locale: str = "en_US"  # Faker locale
+    age_range: list[int] | None = None  # [min_age, max_age]
+    sex: str | None = None  # Filter by "Male" or "Female"
+```
diff --git a/fern/v0.3.3/pages/api-reference/validator-params.mdx b/fern/v0.3.3/pages/api-reference/validator-params.mdx
new file mode 100644
index 000000000..3308e2248
--- /dev/null
+++ b/fern/v0.3.3/pages/api-reference/validator-params.mdx
@@ -0,0 +1,167 @@
+---
+title: Validator Parameters
+description: API reference for validator parameter configuration objects.
+---
+
+When creating a `ValidationColumnConfig`, two parameters are used to define the validator: `validator_type` and `validator_params`.
+The `validator_type` parameter can be set to either `code`, `local_callable` or `remote`. The `validator_params` accompanying each of these is described below.
+
+## CodeValidatorParams
+
+```python
+class CodeValidatorParams(BaseModel):
+    """Parameters for code validation."""
+    
+    code_lang: CodeLang  # Programming language to validate
+```
+
+### Supported Languages
+
+For Python code validation (uses Ruff):
+- `CodeLang.PYTHON`
+
+For SQL code validation (uses SQLFluff):
+- `CodeLang.SQL_ANSI`
+- `CodeLang.SQL_POSTGRES`
+- `CodeLang.SQL_MYSQL`
+- `CodeLang.SQL_SQLITE`
+- `CodeLang.SQL_TSQL`
+- `CodeLang.SQL_BIGQUERY`
+
+### Example Usage
+
+```python
+from data_designer.essentials import (
+    CodeLang,
+    CodeValidatorParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+
+# Python code validation
+python_validator = ValidationColumnConfig(
+    name="python_validation",
+    validator_type=ValidatorType.CODE,
+    target_columns=["python_code"],
+    validator_params=CodeValidatorParams(code_lang=CodeLang.PYTHON),
+    batch_size=10,
+)
+
+# SQL code validation
+sql_validator = ValidationColumnConfig(
+    name="sql_validation",
+    validator_type=ValidatorType.CODE,
+    target_columns=["sql_query"],
+    validator_params=CodeValidatorParams(code_lang=CodeLang.SQL_POSTGRES),
+    batch_size=10,
+)
+```
+
+## LocalCallableValidatorParams
+
+```python
+class LocalCallableValidatorParams(BaseModel):
+    """Parameters for local callable validation."""
+    
+    validation_function: Callable[[pd.DataFrame], pd.DataFrame]
+    # Function that takes DataFrame and returns DataFrame with is_valid column
+    
+    output_schema: dict | None = None
+    # Optional JSON schema to validate function output
+```
+
+### Function Requirements
+
+The validation function must:
+1. Accept a `pd.DataFrame` containing the target columns
+2. Return a `pd.DataFrame` with at minimum an `is_valid` column (boolean or null)
+3. Any additional columns in the output become validation metadata
+
+### Example Usage
+
+```python
+import pandas as pd
+from data_designer.essentials import (
+    LocalCallableValidatorParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+
+def validate_positive_prices(df: pd.DataFrame) -> pd.DataFrame:
+    """Validate that all prices are positive."""
+    result = pd.DataFrame()
+    result["is_valid"] = df["price"] > 0
+    result["error_message"] = result["is_valid"].apply(
+        lambda v: "" if v else "Price must be positive"
+    )
+    return result
+
+validator = ValidationColumnConfig(
+    name="price_validation",
+    validator_type=ValidatorType.LOCAL_CALLABLE,
+    target_columns=["price"],
+    validator_params=LocalCallableValidatorParams(
+        validation_function=validate_positive_prices,
+    ),
+    batch_size=50,
+)
+```
+
+## RemoteValidatorParams
+
+```python
+class RemoteValidatorParams(BaseModel):
+    """Parameters for remote HTTP validation."""
+    
+    endpoint_url: str  # URL of the validation endpoint
+    timeout: float = 30.0  # Request timeout in seconds
+    max_retries: int = 3  # Number of retry attempts
+    retry_backoff: float = 2.0  # Exponential backoff factor
+    max_parallel_requests: int = 4  # Maximum concurrent requests
+    output_schema: dict | None = None  # Optional response schema validation
+```
+
+### Request/Response Format
+
+**Request (POST):**
+```json
+{
+    "data": [
+        {"column1": "value1", "column2": "value2"},
+        {"column1": "value3", "column2": "value4"}
+    ]
+}
+```
+
+**Response:**
+```json
+{
+    "data": [
+        {"is_valid": true, "additional_field": "value"},
+        {"is_valid": false, "additional_field": "value"}
+    ]
+}
+```
+
+### Example Usage
+
+```python
+from data_designer.essentials import (
+    RemoteValidatorParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+
+validator = ValidationColumnConfig(
+    name="external_validation",
+    validator_type=ValidatorType.REMOTE,
+    target_columns=["content"],
+    validator_params=RemoteValidatorParams(
+        endpoint_url="https://api.example.com/validate",
+        timeout=60.0,
+        max_retries=3,
+        max_parallel_requests=4,
+    ),
+    batch_size=5,
+)
+```
diff --git a/fern/v0.3.3/pages/concepts/columns.mdx b/fern/v0.3.3/pages/concepts/columns.mdx
new file mode 100644
index 000000000..ac13079a4
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/columns.mdx
@@ -0,0 +1,160 @@
+---
+title: Columns
+description: The fundamental building blocks in Data Designer for defining dataset fields.
+---
+
+Columns are the fundamental building blocks in Data Designer. Each column represents a field in your dataset and defines how to generate it—whether that's sampling from a distribution, calling an LLM, or applying a transformation.
+
+<Note title="The Declarative Approach">
+Columns are **declarative specifications**. You describe *what* you want, and the framework handles *how* to generate it—managing execution order, batching, parallelization, and resources automatically.
+</Note>
+
+## Column Types
+
+Data Designer provides nine built-in column types, each optimized for different generation scenarios.
+
+### 🎲 Sampler Columns
+
+Sampler columns generate data using numerical sampling—fast, deterministic, and ideal for numerical and categorical dataset fields. They're significantly faster than LLMs and can produce data following specific distributions (Poisson for event counts, Gaussian for measurements, etc.).
+
+Available sampler types:
+
+- **UUID**: Unique identifiers
+- **Category**: Categorical values with optional probability weights
+- **Subcategory**: Hierarchical categorical data (states within countries, models within brands)
+- **Uniform**: Evenly distributed numbers (integers or floats)
+- **Gaussian**: Normally distributed values with configurable mean and standard deviation
+- **Bernoulli**: Binary outcomes with specified success probability
+- **Bernoulli Mixture**: Binary outcomes from multiple probability components
+- **Binomial**: Count of successes in repeated trials
+- **Poisson**: Count data and event frequencies
+- **Scipy**: Access to the full scipy.stats distribution library
+- **Person**: Realistic synthetic individuals with names, demographics, and attributes
+- **Datetime**: Timestamps within specified ranges
+- **Timedelta**: Time duration values
+
+<Tip title="Conditional Sampling">
+Samplers support **conditional parameters** that change behavior based on other columns. Want age distributions that vary by country? Income ranges that depend on occupation? Just define conditions on existing column values.
+</Tip>
+
+### 📝 LLM-Text Columns
+
+LLM-Text columns generate natural language text: product descriptions, customer reviews, narrative summaries, email threads, or anything requiring semantic understanding and creativity.
+
+Use **Jinja2 templating** in prompts to reference other columns. Data Designer automatically manages dependencies and injects the referenced column values into the prompt.
+
+<Note title="Reasoning Traces">
+Models that support extended thinking (chain-of-thought reasoning) can capture their reasoning process in a separate `{column_name}__reasoning_trace` column—useful for understanding *why* the model generated specific content. This column is automatically added to the dataset if the model and service provider parse and return reasoning content.
+</Note>
+
+### 💻 LLM-Code Columns
+
+LLM-Code columns generate code in specific programming languages. They handle the prompting and parsing necessary to extract clean code from the LLM's response—automatically detecting and extracting code from markdown blocks. You provide the prompt and choose the model; the column handles the extraction.
+
+Supported languages: **Python, JavaScript, TypeScript, Java, Kotlin, Go, Rust, Ruby, Scala, Swift**, plus **SQL** dialects (SQLite, PostgreSQL, MySQL, T-SQL, BigQuery, ANSI SQL).
+
+### 🗂️ LLM-Structured Columns
+
+LLM-Structured columns generate JSON with a *guaranteed schema*. Define your structure using a Pydantic model or JSON schema, and Data Designer ensures the LLM output conforms—no parsing errors, no schema drift.
+
+Use for complex nested structures: API responses, configuration files, database records with multiple related fields, or any structured data where type safety matters. Schemas can be arbitrarily complex with nested objects, arrays, enums, and validation constraints, but success depends on the model's capabilities.
+
+<Tip title="Schema Complexity and Model Choice">
+Flat schemas with simple fields are easier and more robustly produced across models. Deeply nested schemas with complex validation constraints are more sensitive to model choice—stronger models handle complexity better. If you're experiencing schema conformance issues, try simplifying the schema or switching to a more capable model.
+</Tip>
+
+### ⚖️ LLM-Judge Columns
+
+LLM-Judge columns score generated content across multiple quality dimensions using LLMs as evaluators.
+
+Define scoring rubrics (relevance, accuracy, fluency, helpfulness) and the judge model evaluates each record. Score rubrics specify criteria and scoring options (1-5 scales, categorical grades, etc.), producing quantified quality metrics for every data point.
+
+Use judge columns for data quality filtering (e.g., keep only 4+ rated responses), A/B testing generation strategies, and quality monitoring over time.
+
+### 🧬 Embedding Columns
+
+Embedding columns generate vector embeddings (numerical representations) for text content using embedding models. These embeddings capture semantic meaning, enabling similarity search, clustering, and semantic analysis.
+
+Specify a `target_column` containing text, and Data Designer generates embeddings for that content. The target column can contain either a single text string or a list of text strings in stringified JSON format. In the latter case, embeddings are generated for each text string in the list.
+
+Common use cases:
+
+- **Semantic search**: Generate embeddings for documents, then find similar content by vector similarity
+- **Clustering**: Group similar texts based on embedding proximity
+- **Recommendation systems**: Match content by semantic similarity
+- **Anomaly detection**: Identify outliers in embedding space
+
+<Note title="Embedding Models">
+Embedding columns require an embedding model configured with `EmbeddingInferenceParams`. These models differ from chat completion models—they output vectors rather than text. The generation type is automatically determined by the inference parameters type.
+</Note>
+
+### 🧩 Expression Columns
+
+Expression columns handle simple transformations using **Jinja2 templates**—concatenate first and last names, calculate numerical totals, format date strings. No LLM overhead needed.
+
+Template capabilities:
+
+- **Variable substitution**: Pull values from any existing column
+- **String filters**: Uppercase, lowercase, strip whitespace, replace patterns
+- **Conditional logic**: if/elif/else support
+- **Arithmetic**: Add, subtract, multiply, divide
+
+### 🔍 Validation Columns
+
+Validation columns check generated content against rules and return structured pass/fail results.
+
+Built-in validation types:
+
+**Code validation** runs Python or SQL code through a linter to validate the code.
+
+**Local callable validation** accepts a Python function directly when using Data Designer as a library.
+
+**Remote validation** sends data to HTTP endpoints for validation-as-a-service. Useful for linters, security scanners, or proprietary systems.
+
+### 🌱 Seed Dataset Columns
+
+Seed dataset columns bootstrap generation from existing data. Provide a real dataset, and those columns become available as context for generating new synthetic data.
+
+Typical pattern: use seed data for one part of your schema (real product names and categories), then generate synthetic fields around it (customer reviews, purchase histories, ratings). The seed data provides realism and constraints; generated columns add volume and variation.
+
+## Shared Column Properties
+
+Every column configuration inherits from `SingleColumnConfig` with these standard properties:
+
+### `name`
+
+The column's identifier—unique within your configuration, used in Jinja2 references, and becomes the column name in the output DataFrame. Choose descriptive names: `user_review` > `col_17`.
+
+### `drop`
+
+Boolean flag (default: `False`) controlling whether the column appears in final output. Setting `drop=True` generates the column (available as a dependency) but excludes it from final output.
+
+**When to drop columns:**
+
+- Intermediate calculations that feed expressions but aren't meaningful standalone
+- Context columns used only for LLM prompt templates
+- Validation results during development unwanted in production
+
+Dropped columns participate fully in generation and the dependency graph—just filtered out at the end.
+
+### `column_type`
+
+Literal string identifying the column type: `"sampler"`, `"llm-text"`, `"expression"`, etc. Set automatically by each configuration class and serves as Pydantic's discriminator for deserialization.
+
+You rarely set this manually—instantiating `LLMTextColumnConfig` automatically sets `column_type="llm-text"`. Serialization is reversible: save to YAML, load later, and Pydantic reconstructs the exact objects.
+
+### `required_columns`
+
+Computed property listing columns that must be generated before this one. The framework derives this automatically:
+
+- For LLM/Expression columns: extracted from Jinja2 template `{{ variables }}`
+- For Validation columns: explicitly listed target columns
+- For Sampler columns with conditional parameters: columns referenced in conditions
+
+You read this property for introspection but never set it—always computed from configuration details.
+
+### `side_effect_columns`
+
+Computed property listing columns created implicitly alongside the primary column. Currently, only LLM columns produce side effects (reasoning trace columns like `{name}__reasoning_trace` when models use extended thinking).
+
+For detailed information on each column type, refer to the [column configuration API reference](/api/column-configs).
diff --git a/fern/v0.3.3/pages/concepts/models/configure-with-cli.mdx b/fern/v0.3.3/pages/concepts/models/configure-with-cli.mdx
new file mode 100644
index 000000000..90b58bf1e
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/models/configure-with-cli.mdx
@@ -0,0 +1,148 @@
+---
+title: Configuring Model Settings Using The CLI
+description: Use the Data Designer CLI to manage model providers and configurations.
+---
+
+The Data Designer CLI provides an interactive interface for creating and managing default model providers and model configurations stored in your Data Designer home directory (default: `~/.data-designer/`).
+
+## Configuration Files
+
+The CLI manages two YAML configuration files:
+
+- **`model_providers.yaml`**: Model provider configurations
+- **`model_configs.yaml`**: Model configurations
+
+<Info title="Automatic Configuration">
+If these configuration files don't already exist, the Data Designer library automatically creates them with default settings at runtime when first initialized.
+</Info>
+
+<Note title="Custom Directory">
+You can customize the configuration directory location with the `DATA_DESIGNER_HOME` environment variable:
+
+```bash
+export DATA_DESIGNER_HOME="/path/to/your/custom/directory"
+```
+</Note>
+
+## CLI Commands
+
+The Data Designer CLI provides four main configuration commands:
+
+```bash
+# Configure model providers
+data-designer config providers
+
+# Configure models
+data-designer config models
+
+# List current configurations
+data-designer config list
+
+# Reset all configurations
+data-designer config reset
+```
+
+<Tip title="Getting help">
+See available commands
+
+```bash
+data-designer --help
+```
+
+See available sub-commands
+
+```bash
+data-designer config --help
+```
+</Tip>
+
+## Managing Model Providers
+
+Run the interactive provider configuration command:
+
+```bash
+data-designer config providers
+```
+
+### Available Operations
+
+**Add a new provider**: Define a new provider by entering its name, endpoint URL, provider type, and optionally an API key (as plain text or as an environment variable name).
+
+**Update an existing provider**: Modify an existing provider's settings. All fields are pre-filled with current values.
+
+**Delete a provider**: Remove a provider and its associated models.
+
+**Delete all providers**: Remove all providers and their associated models.
+
+**Change default provider**: Set which provider is used by default. This option is only available when multiple providers are configured.
+
+## Managing Model Configurations
+
+Run the interactive model configuration command:
+
+```bash
+data-designer config models
+```
+
+<Info title="Provider Required">
+You need at least one provider configured before adding models. Run `data-designer config providers` first if none exist.
+</Info>
+
+### Available Operations
+
+**Add a new model configuration**
+
+Create a new model configuration with the following fields:
+
+- **Alias**: A unique name for referencing this model in a column configuration.
+- **Model ID**: The model identifier (e.g., `nvidia/nemotron-3-nano-30b-a3b`)
+- **Provider**: Select from available providers (if multiple exist)
+- **Temperature**: Sampling temperature (0.0 to 2.0)
+- **Top P**: Nucleus sampling parameter (0.0 to 1.0)
+- **Max Tokens**: Maximum output length (1 to 100000)
+
+<Note title="Additional Settings">
+To configure additional inference parameter settings or use distribution-based inference parameters, edit the `model_configs.yaml` file directly.
+</Note>
+
+**Update an existing model configuration**: Modify an existing model's configuration. All fields are pre-filled with current values.
+
+**Delete a model configuration**: Remove a single model configuration.
+
+**Delete all model configurations**: Remove all model configurations. The CLI will ask for confirmation before proceeding.
+
+## Listing Configurations
+
+View all current configurations:
+
+```bash
+data-designer config list
+```
+
+This command displays:
+
+- **Model Providers**: All configured providers with their endpoints (API keys are masked)
+- **Default Provider**: The currently selected default provider
+- **Model Configurations**: All configured models with their settings
+
+## Resetting Configurations
+
+Delete all configuration files:
+
+```bash
+data-designer config reset
+```
+
+The CLI will show which configuration files exist and ask for confirmation before deleting them.
+
+<Warning title="Destructive Operation">
+This command permanently deletes all configuration files and resets to the default model providers and configurations. You'll need to reconfigure your custom configurations from scratch.
+</Warning>
+
+## See Also
+
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured providers and model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Model Providers](/docs/concepts/models/model-providers)**: Learn about the `ModelProvider` class and provider configuration
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about `ModelConfig`
+- **[Quick Start Guide](/docs/quick-start)**: Get started with a simple example
diff --git a/fern/v0.3.3/pages/concepts/models/custom-model-settings.mdx b/fern/v0.3.3/pages/concepts/models/custom-model-settings.mdx
new file mode 100644
index 000000000..bf7139566
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/models/custom-model-settings.mdx
@@ -0,0 +1,235 @@
+---
+title: Custom Model Settings
+description: Create custom providers and model configurations for Data Designer.
+---
+
+While Data Designer ships with pre-configured model providers and configurations, you can create custom configurations to use different models, adjust inference parameters, or connect to custom API endpoints.
+
+## When to Use Custom Settings
+
+Use custom model settings when you need to:
+
+- Use models not included in the defaults
+- Adjust inference parameters (temperature, top_p, max_tokens) for specific use cases
+- Add distribution-based inference parameters for variability
+- Connect to self-hosted or custom model endpoints
+- Create multiple variants of the same model with different settings
+
+## Creating and Using Custom Settings
+
+### Custom Models with Default Providers
+
+Create custom model configurations that use the default providers (no need to define providers yourself):
+
+```python
+from data_designer.essentials import (
+    CategorySamplerParams,
+    ChatCompletionInferenceParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMTextColumnConfig,
+    ModelConfig,
+    SamplerColumnConfig,
+    SamplerType,
+)
+
+# Create custom models using default providers
+custom_models = [
+    # High-temperature for more variability
+    ModelConfig(
+        alias="creative-writer",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",  # Uses default NVIDIA provider
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=1.2,
+            top_p=0.98,
+            max_tokens=4096,
+        ),
+    ),
+    # Low-temperature for less variability
+    ModelConfig(
+        alias="fact-checker",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",  # Uses default NVIDIA provider
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.1,
+            top_p=0.9,
+            max_tokens=2048,
+        ),
+    ),
+]
+
+# Create DataDesigner (uses default providers)
+data_designer = DataDesigner()
+
+# Pass custom models to config builder
+config_builder = DataDesignerConfigBuilder(model_configs=custom_models)
+
+# Add a topic column using a categorical sampler
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="topic",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(
+            values=["Artificial Intelligence", "Space Exploration", "Ancient History", "Climate Science"],
+        ),
+    )
+)
+
+# Use your custom models
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="creative_story",
+        model_alias="creative-writer",
+        prompt="Write a creative short story about {{topic}}.",
+    )
+)
+
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="facts",
+        model_alias="fact-checker",
+        prompt="List 3 facts about {{topic}}.",
+    )
+)
+
+# Preview your dataset
+preview_result = data_designer.preview(config_builder=config_builder)
+preview_result.display_sample_record()
+```
+
+<Note title="Default Providers Always Available">
+When you only specify `model_configs`, the default model providers (NVIDIA, OpenAI, and OpenRouter) are still available. You only need to create custom providers if you want to connect to different endpoints or modify provider settings.
+</Note>
+
+<Tip title="Mixing Custom and Default Models">
+When you provide custom `model_configs` to `DataDesignerConfigBuilder`, they **replace** the defaults entirely. To use custom model configs in addition to the default configs, use the add_model_config method:
+
+```python
+# Load defaults first
+config_builder = DataDesignerConfigBuilder()
+
+# Add custom model to defaults
+config_builder.add_model_config(
+    ModelConfig(
+        alias="my-custom-model",
+        model="nvidia/llama-3.3-nemotron-super-49b-v1.5",
+        provider="nvidia",  # Uses default provider
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.6,
+            max_tokens=8192,
+        ),
+    )
+)
+
+# Now you can use both default and custom models
+# Default: nvidia-text, nvidia-reasoning, nvidia-vision, etc.
+# Custom: my-custom-model
+```
+</Tip>
+
+### Custom Providers with Custom Models
+
+Define both custom providers and custom model configurations when you need to connect to services not included in the defaults:
+
+<Warning title="Network Accessibility">
+The custom provider endpoints must be reachable from where Data Designer runs. Ensure network connectivity, firewall rules, and any VPN requirements are properly configured.
+</Warning>
+
+```python
+from data_designer.essentials import (
+    CategorySamplerParams,
+    ChatCompletionInferenceParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMTextColumnConfig,
+    ModelConfig,
+    ModelProvider,
+    SamplerColumnConfig,
+    SamplerType,
+)
+
+# Step 1: Define custom providers
+custom_providers = [
+    ModelProvider(
+        name="my-custom-provider",
+        endpoint="https://api.my-llm-service.com/v1",
+        provider_type="openai",  # OpenAI-compatible API
+        api_key="MY_SERVICE_API_KEY",  # Environment variable name
+    ),
+    ModelProvider(
+        name="my-self-hosted-provider",
+        endpoint="https://my-org.internal.com/llm/v1",
+        provider_type="openai",
+        api_key="SELF_HOSTED_API_KEY",
+    ),
+]
+
+# Step 2: Define custom models
+custom_models = [
+    ModelConfig(
+        alias="my-text-model",
+        model="openai/some-model-id",
+        provider="my-custom-provider",  # References provider by name
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.85,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    ModelConfig(
+        alias="my-self-hosted-text-model",
+        model="openai/some-hosted-model-id",
+        provider="my-self-hosted-provider",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.7,
+            top_p=0.9,
+            max_tokens=1024,
+        ),
+    ),
+]
+
+# Step 3: Create DataDesigner with custom providers
+data_designer = DataDesigner(model_providers=custom_providers)
+
+# Step 4: Create config builder with custom models
+config_builder = DataDesignerConfigBuilder(model_configs=custom_models)
+
+# Step 5: Add a topic column using a categorical sampler
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="topic",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(
+            values=["Technology", "Healthcare", "Finance", "Education"],
+        ),
+    )
+)
+
+# Step 6: Use your custom model by referencing its alias
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="short_news_article",
+        model_alias="my-text-model",  # Reference custom alias
+        prompt="Write a short news article about the '{{topic}}' topic in 10 sentences.",
+    )
+)
+
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="long_news_article",
+        model_alias="my-self-hosted-text-model",  # Reference custom alias
+        prompt="Write a detailed news article about the '{{topic}}' topic.",
+    )
+)
+
+# Step 7: Preview your dataset
+preview_result = data_designer.preview(config_builder=config_builder)
+preview_result.display_sample_record()
+```
+
+## See Also
+
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured providers and model settings
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: CLI-based configuration
+- **[Quick Start Guide](/docs/quick-start)**: Basic usage example
diff --git a/fern/v0.3.3/pages/concepts/models/default-model-settings.mdx b/fern/v0.3.3/pages/concepts/models/default-model-settings.mdx
new file mode 100644
index 000000000..cedcc521f
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/models/default-model-settings.mdx
@@ -0,0 +1,130 @@
+---
+title: Default Model Settings
+description: Pre-configured model providers and configurations included with Data Designer.
+---
+
+Data Designer ships with pre-configured model providers and model configurations that make it easy to start generating synthetic data without manual setup.
+
+## Model Providers
+
+Data Designer includes a few default model providers that are configured automatically:
+
+### NVIDIA Provider (`nvidia`)
+
+- **Endpoint**: `https://integrate.api.nvidia.com/v1`
+- **API Key**: Set via `NVIDIA_API_KEY` environment variable
+- **Models**: Access to NVIDIA's hosted models from [build.nvidia.com](https://build.nvidia.com)
+- **Getting Started**: Sign up and get your API key at [build.nvidia.com](https://build.nvidia.com)
+
+The NVIDIA provider gives you access to state-of-the-art models including Nemotron and other NVIDIA-optimized models.
+
+### OpenAI Provider (`openai`)
+
+- **Endpoint**: `https://api.openai.com/v1`
+- **API Key**: Set via `OPENAI_API_KEY` environment variable
+- **Models**: Access to OpenAI's model catalog
+- **Getting Started**: Get your API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
+
+The OpenAI provider gives you access to GPT models and other OpenAI offerings.
+
+### OpenRouter Provider (`openrouter`)
+
+- **Endpoint**: `https://openrouter.ai/api/v1`
+- **API Key**: Set via `OPENROUTER_API_KEY` environment variable
+- **Models**: Access to a wide variety of models through OpenRouter's unified API
+- **Getting Started**: Get your API key from [openrouter.ai](https://openrouter.ai)
+
+The OpenRouter provider gives you access to a unified interface for many different language models from various providers.
+
+## Model Configurations
+
+Data Designer provides pre-configured model aliases for common use cases. When you create a `DataDesignerConfigBuilder` without specifying `model_configs`, these default configurations are automatically available.
+
+### NVIDIA Models
+
+The following model configurations are automatically available when `NVIDIA_API_KEY` is set:
+
+| Alias | Model | Use Case | Inference Parameters |
+|-------|-------|----------|---------------------|
+| `nvidia-text` | `nvidia/nemotron-3-nano-30b-a3b` | General text generation | `temperature=1.0, top_p=1.0` |
+| `nvidia-reasoning` | `openai/gpt-oss-20b` | Reasoning and analysis tasks | `temperature=0.35, top_p=0.95` |
+| `nvidia-vision` | `nvidia/nemotron-nano-12b-v2-vl` | Vision and image understanding | `temperature=0.85, top_p=0.95` |
+| `nvidia-embedding` | `nvidia/llama-3.2-nv-embedqa-1b-v2` | Text embeddings | `encoding_format="float", extra_body={"input_type": "query"}` |
+
+
+### OpenAI Models
+
+The following model configurations are automatically available when `OPENAI_API_KEY` is set:
+
+| Alias | Model | Use Case | Inference Parameters |
+|-------|-------|----------|---------------------|
+| `openai-text` | `gpt-4.1` | General text generation | `temperature=0.85, top_p=0.95` |
+| `openai-reasoning` | `gpt-5` | Reasoning and analysis tasks | `temperature=0.35, top_p=0.95` |
+| `openai-vision` | `gpt-5` | Vision and image understanding | `temperature=0.85, top_p=0.95` |
+| `openai-embedding` | `text-embedding-3-large` | Text embeddings | `encoding_format="float"` |
+
+### OpenRouter Models
+
+The following model configurations are automatically available when `OPENROUTER_API_KEY` is set:
+
+| Alias | Model | Use Case | Inference Parameters |
+|-------|-------|----------|---------------------|
+| `openrouter-text` | `nvidia/nemotron-3-nano-30b-a3b` | General text generation | `temperature=1.0, top_p=1.0` |
+| `openrouter-reasoning` | `openai/gpt-oss-20b` | Reasoning and analysis tasks | `temperature=0.35, top_p=0.95` |
+| `openrouter-vision` | `nvidia/nemotron-nano-12b-v2-vl` | Vision and image understanding | `temperature=0.85, top_p=0.95` |
+| `openrouter-embedding` | `openai/text-embedding-3-large` | Text embeddings | `encoding_format="float"` |
+
+
+## Using Default Settings
+
+Default settings work out of the box - no configuration needed! Simply create `DataDesigner` and `DataDesignerConfigBuilder` instances without any arguments, and reference the default model aliases in your column configurations.
+
+For a complete example showing how to use default model settings, see the **[Quick Start Guide](/docs/quick-start)**.
+
+### How Default Model Providers and Configurations Work
+
+When the Data Designer library or the CLI is initialized, default model configurations and providers are stored in the Data Designer home directory for easy access and customization if they do not already exist. These configuration files serve as the single source of truth for model settings. By default they are saved to the following paths:
+
+- **Model Configs**: `~/.data-designer/model_configs.yaml`
+- **Model Providers**: `~/.data-designer/model_providers.yaml`
+
+<Tip>
+While these files provide a convenient way to specify settings for your model providers and configuration you use most often, they can always be set programmatically in your SDG workflow.
+</Tip>
+
+You can customize the home directory location by setting the `DATA_DESIGNER_HOME` environment variable:
+
+```bash
+# In your .bashrc, .zshrc, or similar
+export DATA_DESIGNER_HOME="/path/to/your/custom/directory"
+```
+
+These configuration files can be modified in two ways:
+
+1. **Using the CLI**: Run CLI commands to add, update, or delete model configurations and providers
+2. **Manual editing**: Directly edit the YAML files with your preferred text editor
+
+Both methods operate on the same files, ensuring consistency across your entire Data Designer setup.
+
+## Important Notes
+
+<Warning title="API Key Requirements">
+While default model configurations are always available, you need to set the appropriate API key environment variable (`NVIDIA_API_KEY`, `OPENAI_API_KEY`, or `OPENROUTER_API_KEY`) to actually use the corresponding models for data generation. Without a valid API key, any attempt to generate data using that provider's models will fail.
+</Warning>
+
+<Tip title="Environment Variables">
+Store your API keys in environment variables rather than hardcoding them in your scripts:
+
+```bash
+# In your .bashrc, .zshrc, or similar
+export NVIDIA_API_KEY="your-api-key-here"
+export OPENAI_API_KEY="your-openai-api-key-here"
+export OPENROUTER_API_KEY="your-openrouter-api-key-here"
+```
+</Tip>
+
+## See Also
+
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: Learn how to use the CLI to manage model settings
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about model configurations
diff --git a/fern/v0.3.3/pages/concepts/models/inference-parameters.mdx b/fern/v0.3.3/pages/concepts/models/inference-parameters.mdx
new file mode 100644
index 000000000..490462452
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/models/inference-parameters.mdx
@@ -0,0 +1,151 @@
+---
+title: Inference Parameters
+description: Control model behavior during synthetic data generation.
+---
+
+Inference parameters control how models generate responses during synthetic data generation. Data Designer provides two types of inference parameters: `ChatCompletionInferenceParams` for text/code/structured generation and `EmbeddingInferenceParams` for embedding generation.
+
+## Overview
+
+When you create a `ModelConfig`, you can specify inference parameters to adjust model behavior. These parameters control aspects like randomness (temperature), diversity (top_p), context size (max_tokens), and more. Data Designer supports both static values and dynamic distribution-based sampling for certain parameters.
+
+## Chat Completion Inference Parameters
+
+The `ChatCompletionInferenceParams` class controls how models generate text completions (for text, code, and structured data generation). It provides fine-grained control over generation behavior and supports both static values and dynamic distribution-based sampling.
+
+### Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `temperature` | `float` or `Distribution` | No | Controls randomness in generation (0.0 to 2.0). Higher values = more creative/random |
+| `top_p` | `float` or `Distribution` | No | Nucleus sampling parameter (0.0 to 1.0). Controls diversity by filtering low-probability tokens |
+| `max_tokens` | `int` | No | Maximum number of tokens to generate in the response (≥ 1) |
+| `max_parallel_requests` | `int` | No | Maximum concurrent API requests (default: 4, ≥ 1) |
+| `timeout` | `int` | No | API request timeout in seconds (≥ 1) |
+| `extra_body` | `dict[str, Any]` | No | Additional parameters to include in the API request body |
+
+<Note title="Default Values">
+If `temperature`, `top_p`, or `max_tokens` are not provided, the model provider's default values will be used. Different providers and models may have different defaults.
+</Note>
+
+<Tip title="Controlling Reasoning Effort for GPT-OSS Models">
+For gpt-oss models like `gpt-oss-20b` and `gpt-oss-120b`, you can control the reasoning effort using the `extra_body` parameter:
+
+```python
+from data_designer.essentials import ChatCompletionInferenceParams
+
+# High reasoning effort (more thorough, slower)
+inference_parameters = ChatCompletionInferenceParams(
+    extra_body={"reasoning_effort": "high"}
+)
+
+# Medium reasoning effort (balanced)
+inference_parameters = ChatCompletionInferenceParams(
+    extra_body={"reasoning_effort": "medium"}
+)
+
+# Low reasoning effort (faster, less thorough)
+inference_parameters = ChatCompletionInferenceParams(
+    extra_body={"reasoning_effort": "low"}
+)
+```
+</Tip>
+
+### Temperature and Top P Guidelines
+
+- **Temperature**:
+    - `0.0-0.3`: Highly deterministic, focused outputs (ideal for structured/reasoning tasks)
+    - `0.4-0.7`: Balanced creativity and coherence (general purpose)
+    - `0.8-1.0`: Creative, diverse outputs (ideal for creative writing)
+    - `1.0+`: Highly random and experimental
+
+- **Top P**:
+    - `0.1-0.5`: Very focused, only most likely tokens
+    - `0.6-0.9`: Balanced diversity
+    - `0.95-1.0`: Maximum diversity, including less likely tokens
+
+<Tip title="Adjusting Temperature and Top P Together">
+When tuning both parameters simultaneously, consider these combinations:
+
+- **For deterministic/structured outputs**: Low temperature (`0.0-0.3`) + moderate-to-high top_p (`0.8-0.95`)
+    - The low temperature ensures focus, while top_p allows some token diversity
+- **For balanced generation**: Moderate temperature (`0.5-0.7`) + high top_p (`0.9-0.95`)
+    - This is a good starting point for most use cases
+- **For creative outputs**: Higher temperature (`0.8-1.0`) + high top_p (`0.95-1.0`)
+    - Both parameters work together to maximize diversity
+
+**Avoid**: Setting both very low (overly restrictive) or adjusting both dramatically at once. When experimenting, adjust one parameter at a time to understand its individual effect.
+</Tip>
+
+## Distribution-Based Inference Parameters
+
+For `temperature` and `top_p` in `ChatCompletionInferenceParams`, you can specify distributions instead of fixed values. This allows Data Designer to sample different values for each generation request, introducing controlled variability into your synthetic data.
+
+### Uniform Distribution
+
+Samples values uniformly between a low and high bound:
+
+```python
+from data_designer.essentials import (
+    ChatCompletionInferenceParams,
+    UniformDistribution,
+    UniformDistributionParams,
+)
+
+inference_params = ChatCompletionInferenceParams(
+    temperature=UniformDistribution(
+        params=UniformDistributionParams(low=0.7, high=1.0)
+    ),
+)
+```
+
+### Manual Distribution
+
+Samples from a discrete set of values with optional weights:
+
+```python
+from data_designer.essentials import (
+    ChatCompletionInferenceParams,
+    ManualDistribution,
+    ManualDistributionParams,
+)
+
+# Equal probability for each value
+inference_params = ChatCompletionInferenceParams(
+    temperature=ManualDistribution(
+        params=ManualDistributionParams(values=[0.5, 0.7, 0.9])
+    ),
+)
+
+# Weighted probabilities (normalized automatically)
+inference_params = ChatCompletionInferenceParams(
+    top_p=ManualDistribution(
+        params=ManualDistributionParams(
+            values=[0.8, 0.9, 0.95],
+            weights=[0.2, 0.5, 0.3]  # 20%, 50%, 30% probability
+        )
+    ),
+)
+```
+
+## Embedding Inference Parameters
+
+The `EmbeddingInferenceParams` class controls how models generate embeddings. This is used when working with embedding models for tasks like semantic search or similarity analysis.
+
+### Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `encoding_format` | `Literal["float", "base64"]` | No | Format of the embedding encoding (default: "float") |
+| `dimensions` | `int` | No | Number of dimensions for the embedding |
+| `max_parallel_requests` | `int` | No | Maximum concurrent API requests (default: 4, ≥ 1) |
+| `timeout` | `int` | No | API request timeout in seconds (≥ 1) |
+| `extra_body` | `dict[str, Any]` | No | Additional parameters to include in the API request body |
+
+
+## See Also
+
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about configuring model settings
+- **[Model Providers](/docs/concepts/models/model-providers)**: Learn about configuring model providers
diff --git a/fern/v0.3.3/pages/concepts/models/model-configs.mdx b/fern/v0.3.3/pages/concepts/models/model-configs.mdx
new file mode 100644
index 000000000..fc4cace58
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/models/model-configs.mdx
@@ -0,0 +1,125 @@
+---
+title: Model Configurations
+description: Configure model settings for synthetic data generation.
+---
+
+Model configurations define the specific models you use for synthetic data generation and their associated inference parameters. Each `ModelConfig` represents a named model that can be referenced throughout your data generation workflows.
+
+## Overview
+
+A `ModelConfig` specifies which LLM model to use and how it should behave during generation. When you create column configurations (like `LLMText`, `LLMCode`, or `LLMStructured`), you reference a model by its alias. Data Designer uses the model configuration to determine which model to call and with what parameters.
+
+## ModelConfig Structure
+
+The `ModelConfig` class has the following fields:
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `alias` | `str` | Yes | Unique identifier for this model configuration (e.g., `"my-text-model"`, `"reasoning-model"`) |
+| `model` | `str` | Yes | Model identifier as recognized by the provider (e.g., `"nvidia/nemotron-3-nano-30b-a3b"`, `"gpt-4"`) |
+| `inference_parameters` | `InferenceParamsT` | No | Controls model behavior during generation. Use `ChatCompletionInferenceParams` for text/code/structured generation or `EmbeddingInferenceParams` for embeddings. Defaults to `ChatCompletionInferenceParams()` if not provided. The generation type is automatically determined by the inference parameters type. See [Inference Parameters](/docs/concepts/models/inference-parameters) for details. |
+| `provider` | `str` | No | Reference to the name of the Provider to use (e.g., `"nvidia"`, `"openai"`, `"openrouter"`). If not specified, one set as the default provider, which may resolve to the first provider if there are more than one |
+
+
+## Examples
+
+### Basic Model Configuration
+
+```python
+from data_designer.essentials import ChatCompletionInferenceParams, ModelConfig
+
+# Simple model configuration with fixed parameters
+model_config = ModelConfig(
+    alias="my-text-model",
+    model="nvidia/nemotron-3-nano-30b-a3b",
+    provider="nvidia",
+    inference_parameters=ChatCompletionInferenceParams(
+        temperature=0.85,
+        top_p=0.95,
+        max_tokens=2048,
+    ),
+)
+```
+
+### Multiple Model Configurations for Different Tasks
+
+```python
+from data_designer.essentials import (
+    ChatCompletionInferenceParams,
+    EmbeddingInferenceParams,
+    GenerationType,
+    ModelConfig
+)
+
+model_configs = [
+    # Creative tasks
+    ModelConfig(
+        alias="creative-model",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.9,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    # Critic tasks
+    ModelConfig(
+        alias="critic-model",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.25,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    # Reasoning and structured tasks
+    ModelConfig(
+        alias="reasoning-model",
+        model="openai/gpt-oss-20b",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.3,
+            top_p=0.9,
+            max_tokens=4096,
+        ),
+    ),
+    # Vision tasks
+    ModelConfig(
+        alias="vision-model",
+        model="nvidia/nemotron-nano-12b-v2-vl",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.7,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    # Embedding tasks
+    ModelConfig(
+        alias="embedding_model",
+        model="nvidia/llama-3.2-nv-embedqa-1b-v2",
+        provider="nvidia",
+        inference_parameters=EmbeddingInferenceParams(
+            encoding_format="float",
+            extra_body={
+                "input_type": "query"
+            }
+        )
+    )
+]
+```
+
+<Tip title="Experiment with max_tokens for Task-Specific Model Configurations">
+The number of tokens required to generate a single data entry can vary significantly with use case. For example, reasoning models often need more tokens to "think through" problems before generating a response. Note that `max_tokens` specifies the **maximum number of output tokens** to generate in the response, so set this value based on the expected length of the generated content.
+</Tip>
+
+## See Also
+
+- **[Inference Parameters](/docs/concepts/models/inference-parameters)**: Detailed guide to inference parameters and how to configure them
+- **[Model Providers](/docs/concepts/models/model-providers)**: Learn about configuring model providers
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: Use the CLI to manage model settings
+- **[Column Configurations](/api/column-configs)**: Learn how to use models in column configurations
diff --git a/fern/v0.3.3/pages/concepts/models/model-providers.mdx b/fern/v0.3.3/pages/concepts/models/model-providers.mdx
new file mode 100644
index 000000000..efc877f31
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/models/model-providers.mdx
@@ -0,0 +1,56 @@
+---
+title: Model Providers
+description: Configure connections to model hosting services.
+---
+
+Model providers are external services that host and serve models. Data Designer uses the `ModelProvider` class to configure connections to these services.
+
+## Overview
+
+A `ModelProvider` defines how Data Designer connects to a provider's API endpoint. When you create a `ModelConfig`, you reference a provider by name, and Data Designer uses that provider's settings to make API calls to the appropriate endpoint.
+
+## ModelProvider Configuration
+
+The `ModelProvider` class has the following fields:
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | `str` | Yes | Unique identifier for the provider (e.g., `"nvidia"`, `"openai"`, `"openrouter"`) |
+| `endpoint` | `str` | Yes | API endpoint URL (e.g., `"https://integrate.api.nvidia.com/v1"`) |
+| `provider_type` | `str` | No | Provider type (default: `"openai"`). Uses OpenAI-compatible API format |
+| `api_key` | `str` | No | API key or environment variable name (e.g., `"NVIDIA_API_KEY"`) |
+| `extra_body` | `dict[str, Any]` | No | Additional parameters to include in the request body of all API requests to the provider. |
+| `extra_headers` | `dict[str, str]` | No | Additional headers to include in all API requests to the provider. |
+
+## API Key Configuration
+
+The `api_key` field can be specified in two ways:
+
+1. **Environment variable name** (recommended): Set `api_key` to the name of an environment variable (e.g., `"NVIDIA_API_KEY"`). Data Designer will automatically resolve it at runtime.
+
+2. **Plain-text value**: Set `api_key` to the actual API key string. This is less secure and not recommended for production use.
+
+```python
+# Method 1: Environment variable (recommended)
+provider = ModelProvider(
+    name="nvidia",
+    endpoint="https://integrate.api.nvidia.com/v1",
+    api_key="NVIDIA_API_KEY",  # Will be resolved from environment
+)
+
+# Method 2: Direct value (not recommended)
+provider = ModelProvider(
+    name="nvidia",
+    endpoint="https://integrate.api.nvidia.com/v1",
+    api_key="nvapi-abc123...",  # Direct API key
+)
+```
+
+## See Also
+
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about configuring models
+- **[Inference Parameters](/docs/concepts/models/inference-parameters)**: Detailed guide to inference parameters and how to configure them
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured providers and model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: Use the CLI to manage providers and model settings
+- **[Quick Start Guide](/docs/quick-start)**: Get started with a simple example
diff --git a/fern/v0.3.3/pages/concepts/person-sampling.mdx b/fern/v0.3.3/pages/concepts/person-sampling.mdx
new file mode 100644
index 000000000..0bb23ce33
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/person-sampling.mdx
@@ -0,0 +1,220 @@
+---
+title: Person Sampling in Data Designer
+description: Generate synthetic person data for your datasets.
+---
+
+Person sampling in Data Designer allows you to generate synthetic person data for your datasets. There are two distinct approaches, each with different capabilities and use cases.
+
+## Overview
+
+Data Designer provides two ways to generate synthetic people:
+
+1. **Faker-based sampling** - Quick, basic PII generation for testing or when realistic demographic distributions are not relevant for your use case
+2. **Nemotron-Personas datasets** - Demographically accurate, rich persona data
+
+---
+
+## Approach 1: Faker-Based Sampling
+
+### What It Does
+Uses the Faker library to generate random personal information. The data is basic and not demographically accurate, but is useful for quick testing, prototyping, or when realistic demographic distributions are not relevant for your use case.
+
+### Features
+- Gives you access to person attributes that Faker exposes
+- Quick to set up with no additional downloads
+- Generates random names, emails, addresses, phone numbers, etc.
+- Supports [all Faker-supported locales](https://faker.readthedocs.io/en/master/locales.html)
+- **Not demographically grounded** - data patterns don't reflect real-world demographics
+
+### Usage Example
+```python
+from data_designer.essentials import (
+    SamplerColumnConfig,
+    SamplerType,
+    PersonFromFakerSamplerParams,
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="customer",
+        sampler_type=SamplerType.PERSON_FROM_FAKER,
+        params=PersonFromFakerSamplerParams(
+            locale="en_US",
+            age_range=[25, 65],
+            sex="Female",
+        ),
+    )
+)
+```
+
+For more details, see the documentation for [`SamplerColumnConfig`](/api/column-configs) and [`PersonFromFakerSamplerParams`](/api/sampler-params).
+
+---
+
+## Approach 2: Nemotron-Personas Datasets
+
+### What It Does
+Uses curated Nemotron-Personas datasets from NVIDIA GPU Cloud (NGC) to generate demographically accurate person data with rich personality profiles and behavioral characteristics.
+
+The NGC datasets are extended versions of the [open-source Nemotron-Personas datasets on HuggingFace](https://huggingface.co/collections/nvidia/nemotron-personas), with additional fields and enhanced data quality.
+
+Supported locales:
+
+- `en_US`: United States
+- `ja_JP`: Japan
+- `en_IN`: India
+- `hi_Deva_IN`: India (Devanagari script)
+- `hi_Latn_IN`: India (Latin script)
+
+### Features
+- **Demographically accurate personal details**: Names, ages, sex, marital status, education, occupation based on census data
+- **Rich persona details**: Comprehensive behavioral profiles including:
+  - Big Five personality traits with scores
+  - Cultural backgrounds and narratives
+  - Skills and hobbies
+  - Career goals and aspirations
+  - Context-specific personas (professional, financial, healthcare, sports, arts, travel, culinary, etc.)
+- Consistent, referenceable attributes across your dataset
+- Grounded in real-world demographic distributions
+
+### Prerequisites
+
+To use the extended Nemotron-Personas datasets with Data Designer, you need to download them [from NGC](https://catalog.ngc.nvidia.com/search?orderBy=scoreDESC&query=nemotron+personas) and move them to the Data Designer managed assets directory.
+
+See below for step-by-step instructions.
+
+### Nemotron-Personas Datasets Setup Instructions
+
+#### Step 0: Obtain an NGC API Key and install the NGC CLI
+
+To download the Nemotron-Personas datasets from NGC, you will need to obtain an NGC API key and install the NGC CLI.
+
+1. **NGC API Key**: Obtain from [NVIDIA GPU Cloud](https://ngc.nvidia.com/)
+2. **NGC CLI**: [NGC CLI](https://org.ngc.nvidia.com/setup/installers/cli)
+
+
+#### Step 1: Set Your NGC API Key
+```bash
+export NGC_API_KEY="your-ngc-api-key-here"
+```
+
+#### Step 2 (option 1): Download Nemotron-Personas Datasets via the Data Designer CLI
+
+Once you have the NGC CLI and your NGC API key set up, you can download the datasets via the Data Designer CLI.
+
+You can pass the locales you want to download as arguments to the CLI command:
+```bash
+data-designer download personas --locale en_US --locale ja_JP
+```
+
+Or you can use the interactive mode to select the locales you want to download:
+```bash
+data-designer download personas
+```
+
+#### Step 2 (option 2): Download Nemotron-Personas Datasets Directly
+
+Use the NGC CLI to download the datasets:
+```bash
+# For Nemotron-Personas USA
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-en_us"
+
+# For Nemotron-Personas IN
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-hi_deva_in"
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-hi_latn_in"
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-en_in"
+
+# For Nemotron-Personas JP
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-ja_jp"
+```
+
+Then move the downloaded dataset to the Data Designer managed assets directory:
+```bash
+mkdir -p ~/.data-designer/managed-assets/datasets/
+mv nemotron-personas-dataset-*/*.parquet ~/.data-designer/managed-assets/datasets/
+```
+
+#### Step 3: Use PersonSampler in Your Code
+```python
+from data_designer.essentials import (
+    SamplerColumnConfig,
+    SamplerType,
+    PersonSamplerParams,
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="customer",
+        sampler_type=SamplerType.PERSON,
+        params=PersonSamplerParams(
+            locale="en_US",
+            sex="Female",
+            age_range=[25, 45],
+            with_synthetic_personas=True,
+        ),
+    )
+)
+```
+
+For more details, see the documentation for [`SamplerColumnConfig`](/api/column-configs) and [`PersonSamplerParams`](/api/sampler-params).
+
+### Available Data Fields
+
+**Core Fields (all locales):**
+
+| Field | Type | Notes |
+|-------|------|-------|
+| `uuid` | UUID | Unique identifier |
+| `first_name` | string | |
+| `middle_name` | string | |
+| `last_name` | string | |
+| `sex` | enum | "Male" or "Female" |
+| `birth_date` | date | Derived: year, month, day |
+| `street_number` | int | |
+| `street_name` | string | |
+| `unit` | string | Address line 2 |
+| `city` | string | |
+| `region` | string | Alias: state |
+| `district` | string | Alias: county |
+| `postcode` | string | Alias: zipcode |
+| `country` | string | |
+| `phone_number` | PhoneNumber | Derived: area_code, country_code, prefix, line_number |
+| `marital_status` | string | Values: never_married, married_present, separated, widowed, divorced |
+| `education_level` | string or None | |
+| `bachelors_field` | string or None | |
+| `occupation` | string or None | |
+| `email_address` | string | |
+| `national_id` | string |
+
+**Japan-Specific Fields (`ja_JP`):**
+
+- `area`
+
+**India-Specific Fields (`en_IN`, `hi_IN`, `hi_Deva_IN`, `hi_Latn_IN`):**
+
+- `religion` - Census-reported religion
+- `education_degree` - Census-reported education degree
+- `first_language` - Native language
+- `second_language` - Second language (if applicable)
+- `third_language` - Third language (if applicable)
+- `zone` - Urban vs rural
+
+**With Synthetic Personas Enabled:**
+
+- Big Five personality traits (Openness, Conscientiousness, Extraversion, Agreeableness, Neuroticism) with t-scores and labels
+- Cultural background narratives
+- Skills and competencies
+- Hobbies and interests
+- Career goals
+- Context-specific personas (professional, financial, healthcare, sports, arts & entertainment, travel, culinary, etc.)
+
+### Configuration Parameters
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `locale` | str | Language/region code - must be one of: "en_US", "ja_JP", "en_IN", "hi_Deva_IN", "hi_Latn_IN" |
+| `sex` | str (optional) | Filter by "Male" or "Female" |
+| `city` | str or list[str] (optional) | Filter by specific city or cities within locale |
+| `age_range` | list[int] (optional) | Two-element list [min_age, max_age] (default: [18, 114]) |
+| `with_synthetic_personas` | bool (optional) | Include rich personality profiles (default: False) |
+| `select_field_values` | dict (optional) | Custom field-based filtering (e.g., `{"state": ["NY", "CA"], "education_level": ["bachelors"]}`) |
diff --git a/fern/v0.3.3/pages/concepts/processors.mdx b/fern/v0.3.3/pages/concepts/processors.mdx
new file mode 100644
index 000000000..2efcd1c73
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/processors.mdx
@@ -0,0 +1,158 @@
+---
+title: Processors
+description: Transformations that modify your dataset before or after columns are generated.
+---
+
+Processors are transformations that modify your dataset before or after columns are generated. They run at different stages and can reshape, filter, or augment the data.
+
+<Tip title="When to Use Processors">
+Processors handle transformations that don't fit the "column" model: restructuring the schema for a specific output format, dropping intermediate columns in bulk, or applying batch-wide operations.
+</Tip>
+
+## Overview
+
+Each processor:
+
+- Receives the complete batch DataFrame
+- Applies its transformation
+- Passes the result to the next processor (or to output)
+
+Currently, processors run only at the `POST_BATCH` stage, i.e., after column generation completes for each batch.
+
+## Processor Types
+
+### 🗑️ Drop Columns Processor
+
+Removes specified columns from the output dataset. Dropped columns are saved separately in the `dropped-columns` directory for reference.
+
+<Tip title="Dropping Columns is More Easily Achieved via drop = True">
+The Drop Columns Processor is different from others in the sense that it does not need to be explicitly added: setting `drop = True` when configuring a column will accomplish the same.
+</Tip>
+
+**Configuration:**
+
+```python
+from data_designer.essentials import DropColumnsProcessorConfig
+
+processor = DropColumnsProcessorConfig(
+    name="remove_intermediate",
+    column_names=["temp_calculation", "raw_input", "debug_info"],
+)
+```
+
+**Behavior:**
+
+- Columns specified in `column_names` are removed from the output
+- Original values are preserved in a separate parquet file
+- Missing columns produce a warning but don't fail the build
+- Column configs are automatically marked with `drop=True` when this processor is added
+
+**Use Cases:**
+
+- Removing intermediate columns used only for LLM context
+- Cleaning up debug or validation columns before final output
+- Separating sensitive data from the main dataset
+
+### 🔄 Schema Transform Processor
+
+Creates an additional dataset with a transformed schema using Jinja2 templates. The output is written to a separate directory alongside the main dataset.
+
+**Configuration:**
+
+```python
+from data_designer.essentials import SchemaTransformProcessorConfig
+
+processor = SchemaTransformProcessorConfig(
+    name="chat_format",
+    template={
+        "messages": [
+            {"role": "user", "content": "{{ question }}"},
+            {"role": "assistant", "content": "{{ answer }}"},
+        ],
+        "metadata": "{{ category | upper }}",
+    },
+)
+```
+
+**Behavior:**
+
+- Each key in `template` becomes a column in the transformed dataset
+- Values are Jinja2 templates with access to all columns in the batch
+- Complex structures (lists, nested dicts) are supported
+- Output is saved to the `processors-outputs/{name}/` directory
+- The original dataset passes through unchanged
+
+**Template Capabilities:**
+
+- **Variable substitution**: `{{ column_name }}`
+- **Filters**: `{{ text | upper }}`, `{{ text | lower }}`, `{{ text | trim }}`
+- **Nested structures**: Arbitrarily deep JSON structures
+- **Lists**: `["{{ col1 }}", "{{ col2 }}"]`
+
+**Use Cases:**
+
+- Converting flat columns to chat message format
+- Restructuring data for specific model training formats
+- Creating derived views without modifying the source dataset
+
+## Using Processors
+
+Add processors to your configuration using the builder's `add_processor` method:
+
+```python
+from data_designer.essentials import (
+    DataDesignerConfigBuilder,
+    DropColumnsProcessorConfig,
+    SchemaTransformProcessorConfig,
+)
+
+builder = DataDesignerConfigBuilder()
+
+# ... add columns ...
+
+# Drop intermediate columns
+builder.add_processor(
+    DropColumnsProcessorConfig(
+        name="cleanup",
+        column_names=["scratch_work", "raw_context"],
+    )
+)
+
+# Transform to chat format
+builder.add_processor(
+    SchemaTransformProcessorConfig(
+        name="chat_format",
+        template={
+            "messages": [
+                {"role": "user", "content": "{{ question }}"},
+                {"role": "assistant", "content": "{{ answer }}"},
+            ],
+        },
+    )
+)
+```
+
+### Execution Order
+
+Processors execute in the order they're added. Plan accordingly when one processor's output affects another.
+
+## Configuration Parameters
+
+### Common Parameters
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `name` | str | Identifier for the processor, used in output directory names |
+| `build_stage` | BuildStage | When to run (default: `POST_BATCH`) |
+
+### DropColumnsProcessorConfig
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `column_names` | list[str] | Columns to remove from output |
+
+### SchemaTransformProcessorConfig
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `template` | dict[str, Any] | Jinja2 template defining the output schema. Must be JSON-serializable. |
diff --git a/fern/v0.3.3/pages/concepts/validators.mdx b/fern/v0.3.3/pages/concepts/validators.mdx
new file mode 100644
index 000000000..7b50d6188
--- /dev/null
+++ b/fern/v0.3.3/pages/concepts/validators.mdx
@@ -0,0 +1,345 @@
+---
+title: Validators
+description: Quality assurance mechanisms that check generated content against rules.
+---
+
+Validators are quality assurance mechanisms in Data Designer that check generated content against rules and return structured pass/fail results. They enable automated verification of data for correctness, code quality, and adherence to specifications.
+
+<Note title="Quality Gates for Generated Data">
+Validators act as **quality gates** in your generation pipeline. Use them to filter invalid records, score code quality, verify format compliance, or integrate with external validation services.
+</Note>
+
+## Overview
+
+Validation columns execute validation logic against target columns and produce structured results indicating:
+
+- **`is_valid`**: Boolean pass/fail status
+- **Additional metadata**: Error messages, scores, severity levels, and custom fields
+
+Validators currently support three execution strategies:
+
+1. **Code validation**: Lint and check Python or SQL code using industry-standard tools
+2. **Local callable validation**: Execute custom Python functions for flexible validation logic
+3. **Remote validation**: Send data to HTTP endpoints for external validation services
+
+## Validator Types
+
+### 🐍 Python Code Validator
+
+The Python code validator runs generated Python code through [Ruff](https://github.com/astral-sh/ruff), a fast Python linter that checks for syntax errors, undefined variables, and code quality issues.
+
+**Configuration:**
+
+```python
+from data_designer.essentials import CodeLang, CodeValidatorParams
+
+validator_params = CodeValidatorParams(code_lang=CodeLang.PYTHON)
+```
+
+**Validation Output:**
+
+Each validated record returns:
+
+- **`is_valid`**: `True` if no fatal or error-level issues found
+- **`python_linter_score`**: Quality score from 0-10 (based on pylint formula)
+- **`python_linter_severity`**: Highest severity level found (`"none"`, `"convention"`, `"refactor"`, `"warning"`, `"error"`, `"fatal"`)
+- **`python_linter_messages`**: List of linter messages with line numbers, columns, and descriptions
+
+**Severity Levels:**
+
+- **Fatal**: Syntax errors preventing code execution
+- **Error**: Undefined names, invalid syntax
+- **Warning**: Code smells and potential issues
+- **Refactor**: Simplification opportunities
+- **Convention**: Style guide violations
+
+A record is marked valid if it has no messages or only messages at warning/convention/refactor levels.
+
+**Example Validation Result:**
+
+```python
+{
+    "is_valid": False,
+    "python_linter_score": 0,
+    "python_linter_severity": "error",
+    "python_linter_messages": [
+        {
+            "type": "error",
+            "symbol": "F821",
+            "line": 1,
+            "column": 7,
+            "message": "Undefined name `it`"
+        }
+    ]
+}
+```
+
+### 🗄️ SQL Code Validator
+
+The SQL code validator uses [SQLFluff](https://github.com/sqlfluff/sqlfluff), a dialect-aware SQL linter that checks query syntax and structure.
+
+**Configuration:**
+
+```python
+from data_designer.essentials import CodeLang, CodeValidatorParams
+
+validator_params = CodeValidatorParams(code_lang=CodeLang.SQL_POSTGRES)
+```
+
+<Tip title="Multiple Dialects">
+The SQL code validator supports multiple dialects: `SQL_POSTGRES`, `SQL_ANSI`, `SQL_MYSQL`, `SQL_SQLITE`, `SQL_TSQL` and `SQL_BIGQUERY`.
+</Tip>
+
+**Validation Output:**
+
+Each validated record returns:
+
+- **`is_valid`**: `True` if no parsing errors found
+- **`error_messages`**: Concatenated error descriptions (empty string if valid)
+
+The validator focuses on parsing errors (PRS codes) that indicate malformed SQL. It also checks for common pitfalls like `DECIMAL` definitions without scale parameters.
+
+**Example Validation Result:**
+
+```python
+# Valid SQL
+{
+    "is_valid": True,
+    "error_messages": ""
+}
+
+# Invalid SQL
+{
+    "is_valid": False,
+    "error_messages": "PRS: Line 1, Position 1: Found unparsable section: 'NOT SQL'"
+}
+```
+
+### 🔧 Local Callable Validator
+
+The local callable validator executes custom Python functions for flexible validation logic.
+
+**Configuration:**
+
+```python
+import pandas as pd
+
+from data_designer.essentials import LocalCallableValidatorParams
+
+def my_validation_function(df: pd.DataFrame) -> pd.DataFrame:
+    """Validate that values are positive.
+
+    Args:
+        df: DataFrame with target columns
+
+    Returns:
+        DataFrame with is_valid column and optional metadata
+    """
+    result = pd.DataFrame()
+    result["is_valid"] = df["price"] > 0
+    result["error_message"] = result["is_valid"].apply(
+        lambda valid: "" if valid else "Price must be positive"
+    )
+    return result
+
+validator_params = LocalCallableValidatorParams(
+    validation_function=my_validation_function,
+    output_schema={  # Optional: enforce output schema
+        "type": "object",
+        "properties": {
+            "data": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "is_valid": {"type": ["boolean", "null"]},
+                        "error_message": {"type": "string"}
+                    },
+                    "required": ["is_valid"]
+                }
+            }
+        }
+    }
+)
+```
+
+**Function Requirements:**
+
+- **Input**: DataFrame with target columns
+- **Output**: DataFrame with `is_valid` column (boolean or null)
+- **Extra fields**: Any additional columns become validation metadata
+
+The `output_schema` parameter is optional but recommended—it validates the function's output against a JSON schema, catching unexpected return formats.
+
+### 🌐 Remote Validator
+
+The remote validator sends data to HTTP endpoints for validation-as-a-service. This is useful for when you have validation software that needs to run on external compute and you can expose it through a service. Some examples are:
+
+- External linting services
+- Security scanners
+- Domain-specific validators
+- Proprietary validation systems
+
+<Note title="Authentication">
+Currently, the remote validator is only able to perform unauthenticated API calls. When implementing your own service, you can rely on network isolation for security. If you need to reach a service that requires authentication, you should implement a local proxy.
+</Note>
+
+**Configuration:**
+
+```python
+from data_designer.essentials import RemoteValidatorParams
+
+validator_params = RemoteValidatorParams(
+    endpoint_url="https://api.example.com/validate",
+    timeout=30.0,  # Request timeout in seconds
+    max_retries=3,  # Retry attempts on failure
+    retry_backoff=2.0,  # Exponential backoff factor
+    max_parallel_requests=4,  # Concurrent request limit
+    output_schema={  # Optional: enforce response schema
+        "type": "object",
+        "properties": {
+            "data": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "is_valid": {"type": ["boolean", "null"]},
+                        "confidence": {"type": "string"}
+                    }
+                }
+            }
+        }
+    }
+)
+```
+
+**Request Format:**
+
+The validator sends POST requests with this structure:
+
+```json
+{
+    "data": [
+        {"column1": "value1", "column2": "value2"},
+        {"column1": "value3", "column2": "value4"}
+    ]
+}
+```
+
+**Expected Response Format:**
+
+The endpoint must return:
+
+```json
+{
+    "data": [
+        {
+            "is_valid": true,
+            "custom_field": "any additional metadata"
+        },
+        {
+            "is_valid": false,
+            "custom_field": "more metadata"
+        }
+    ]
+}
+```
+
+**Retry Behavior:**
+
+The validator automatically retries on:
+
+- Network errors
+- HTTP status codes: 429 (rate limit), 500, 502, 503, 504
+
+Failed requests use exponential backoff: `delay = retry_backoff^attempt`.
+
+**Parallelization:**
+
+Set `max_parallel_requests` to control concurrency. Higher values improve throughput but increase server load. The validator batches requests according to the `batch_size` parameter in the validation column configuration.
+
+## Using Validators in Columns
+
+Add validation columns to your configuration using the builder's `add_column` method:
+
+```python
+from data_designer.essentials import (
+    CodeValidatorParams,
+    CodeLang,
+    DataDesignerConfigBuilder,
+    LLMCodeColumnConfig,
+    ValidationColumnConfig,
+)
+
+builder = DataDesignerConfigBuilder()
+
+# Generate Python code
+builder.add_column(
+    LLMCodeColumnConfig(
+        name="sorting_algorithm",
+        prompt="Write a Python function to sort a list using bubble sort.",
+        code_lang="python",
+        model_alias="my-model"
+    )
+)
+
+# Validate the generated code
+builder.add_column(
+    ValidationColumnConfig(
+        name="code_validation",
+        target_columns=["sorting_algorithm"],
+        validator_type="code",
+        validator_params=CodeValidatorParams(code_lang=CodeLang.PYTHON),
+        batch_size=10,
+        drop=False,
+    )
+)
+```
+
+The `target_columns` parameter specifies which columns to validate. All target columns are passed to the validator together (except for code validators, which process each column separately).
+
+### Configuration Parameters
+
+See more about parameters used to instantiate `ValidationColumnConfig` in the [API reference](/api/column-configs).
+
+### Batch Size Considerations
+
+Larger batch sizes improve efficiency but consume more memory:
+
+- **Code validators**: 5-20 records (file I/O overhead)
+- **Local callable**: 10-50 records (depends on function complexity)
+- **Remote validators**: 1-10 records (network latency, server capacity)
+
+Adjust based on:
+
+- Validator computational cost
+- Available memory
+- Network bandwidth (for remote validators)
+- Server rate limits
+
+If the validation logic uses information from other samples, only samples in the batch will be considered.
+
+### Multiple Column Validation
+
+Validate multiple columns simultaneously:
+
+```python
+from data_designer.essentials import RemoteValidatorParams, ValidationColumnConfig
+
+builder.add_column(
+    ValidationColumnConfig(
+        name="multi_column_validation",
+        target_columns=["column_a", "column_b", "column_c"],
+        validator_type="remote",
+        validator_params=RemoteValidatorParams(
+            endpoint_url="https://api.example.com/validate"
+        )
+    )
+)
+```
+
+**Note**: Code validators always process each target column separately, even when multiple columns are specified. Local callable and remote validators receive all target columns together.
+
+## See Also
+
+- [Validator Parameters Reference](/api/validator-params): Configuration object schemas
diff --git a/fern/v0.3.3/pages/contributing.mdx b/fern/v0.3.3/pages/contributing.mdx
new file mode 100644
index 000000000..b317a75aa
--- /dev/null
+++ b/fern/v0.3.3/pages/contributing.mdx
@@ -0,0 +1,239 @@
+---
+title: 🎨✨ Contributing to NeMo Data Designer 🎨✨
+description: How to contribute to NeMo Data Designer
+---
+
+Thank you for your interest in contributing to Data Designer!
+
+We welcome contributions from the community and sincerely appreciate your efforts to improve the project. Whether you're fixing a typo, reporting a bug, proposing a new feature, or implementing a major enhancement, your work helps make Data Designer better for everyone 🎉.
+
+This guide will help you get started with the contribution process.
+
+## Table of Contents
+
+- [Getting Started](#getting-started)
+- [Ways to Contribute](#ways-to-contribute)
+- [Feature Requests](#feature-requests)
+- [Development Guide](#development-guide)
+- [Submitting Changes](#submitting-changes)
+- [Code of Conduct](#code-of-conduct)
+- [Signing off on your work](#signing-off-on-your-work)
+
+
+## Getting Started
+👋 Welcome to the Data Designer community! We're excited to have you here.
+
+Whether you're new to the project or ready to dive in, the resources below will help you get oriented and productive quickly:
+
+1. **[README.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/README.md)** – best place to start to learn the basics of the project
+
+2. **[AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md)** – context and instructions to help AI coding agents work on Data Designer (it's also useful for human developers!)
+
+3. **[Documentation](https://nvidia-nemo.github.io/DataDesigner/)** – detailed documentation on Data Designer's capabilities and usage
+
+## Ways to Contribute
+
+There are many ways to contribute to Data Designer:
+
+### 🐛 Bug Fixes
+
+Found a bug? Before reporting, please
+1. Verify you're using the latest version: `uv pip install --upgrade data-designer`
+2. Search for duplicates in the [issue tracker](https://github.com/NVIDIA-NeMo/DataDesigner/issues)
+
+When [creating a bug report](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new), please include:
+- Data Designer version
+- Python version and operating system
+- Minimal reproducible example
+- Expected vs. actual behavior
+- Full error messages and stack traces
+
+If you are interested in fixing the bug yourself, that's AWESOME! Please follow the [development guide](#development-guide) to get started.
+
+### ✨ Feature Implementation
+Want to add new functionality? Great! Please review [our development approach](#feature-requests) and open a feature request to discuss the idea and get feedback before investing significant time on the implementation.
+
+### 📖 Documentation Improvements
+Documentation is crucial for user adoption. Contributions that clarify usage, add examples, or fix typos are highly valued.
+
+### 💡 Examples and Tutorials
+Share your use cases! Example notebooks and tutorials help others understand how to leverage Data Designer effectively.
+
+### 🧪 Test Coverage
+Help us improve test coverage by adding tests for untested code paths or edge cases.
+
+## Feature Requests
+Data Designer is designed to be as flexible and extensible as possible, and we welcome your ideas for pushing its capabilities even further! To keep the core library maintainable, while also supporting innovation, we take an incremental approach when adding new features – we explore what's already possible, extend through plugins when needed, and integrate the most broadly useful features into the core library:
+
+### How We Grow Data Designer
+1. 🧗 **Explore what's possible**: Can your use case be achieved with current features? We've designed Data Designer to be composable – sometimes creative combinations of existing tools can accomplish what you need. Check out our examples or open an issue if you'd like help exploring this!
+
+2. 🔌 **Extend through plugins**: If existing features aren't quite enough, consider implementing your idea as a plugin that extends the core library. Plugins let you experiment and share functionality while keeping the core library focused.
+
+3. ⚙️ **Integrate into the core library**: If your feature or plugin proves broadly useful and aligns with Data Designer's goals, we'd love to integrate it into the core library! We're happy to discuss whether it's a good fit and how to move forward together.
+
+This approach helps us grow thoughtfully while keeping Data Designer focused and maintainable.
+
+### Submitting a Feature Request
+Open a [new issue](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new) with:
+
+- **Clear title**: Concise description of the feature
+- **Use case**: Explain what problem this solves and why it's important
+- **Proposed solution**: Describe how you envision the feature working
+- **Alternatives considered**: Other approaches you've thought about
+- **Examples**: Code examples or mockups of how users would interact with the feature
+- **Willingness to implement**: Are you interested in implementing this yourself?
+
+## Development Guide
+Data Designer uses [`uv`](https://github.com/astral-sh/uv) for dependency management. If you don't have uv installed, follow their [installation instructions](https://docs.astral.sh/uv/getting-started/installation/).
+
+### Initial Setup
+0. **Create or find an issue**
+
+    Before starting work, ensure there's an issue tracking your contribution:
+
+    - For bug fixes: Search [existing issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues) or [create a new one](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new)
+    - For new features: Open a [feature request](#feature-requests) to discuss the approach first
+    - Comment on the issue to let maintainers know you're working on it
+
+1. **Fork and clone the repository**
+
+    Start by [forking the Data Designer repository](https://github.com/NVIDIA-NeMo/DataDesigner/fork), then clone your fork and add the upstream remote:
+
+    ```bash
+    git clone https://github.com/YOUR_GITHUB_USERNAME/DataDesigner.git
+
+    cd DataDesigner
+
+    git remote add upstream https://github.com/NVIDIA-NeMo/DataDesigner.git
+    ```
+
+2. **Install dependencies**
+
+    ```bash
+    # Install project with dev dependencies
+    make install-dev
+
+    # Or, if you use Jupyter / IPython for development
+    make install-dev-notebooks
+    ```
+
+3. **Verify your setup**
+
+    ```bash
+    make test && make check-all
+    ```
+
+    If no errors are reported, you're ready to develop 🚀
+
+### Making Changes
+
+1. **Create a feature branch**
+
+    ```bash
+    git checkout main
+    git pull upstream main
+    git checkout -b <username>/<type-of-change>/<issue-number>-<short-description>
+    ```
+
+    Example types of change:
+
+    - `feat` for new features
+    - `fix` for bug fixes
+    - `docs` for documentation updates
+    - `test` for testing changes
+    - `refactor` for code refactoring
+    - `chore` for chore tasks
+    - `style` for style changes
+    - `perf` for performance improvements
+
+    Example branch name:
+
+    - `johnnygreco/feat/123-add-xyz-generator` for a new feature by @johnnygreco, addressing issue #123
+
+2. **Develop your changes**
+
+    Please follow the patterns and conventions used throughout the codebase, as well as those outlined in [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md).
+
+3. **Test and validate**
+
+    ```bash
+    make check-all-fix  # Format code and fix linting issues
+    make test           # Run all tests
+    make coverage       # Check test coverage (must be >90%)
+    ```
+
+    **Writing tests**: Place tests in [tests/](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/) mirroring the source structure. Use fixtures from [tests/conftest.py](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/conftest.py), mock external services with `unittest.mock` or `pytest-httpx`, and test both success and failure cases. See [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md) for patterns and examples.
+
+4. **Commit your work**
+
+    Write clear, descriptive commit messages, optionally including a brief summary (50 characters or less) and reference issue numbers when applicable (e.g., "Fixes #123").
+
+    ```bash
+    git commit -m "Add XYZ generator for synthetic data" -m "Fixes #123"
+    ```
+
+5. **Stay up to date**
+
+    Regularly sync your branch with upstream changes:
+
+    ```bash
+    git fetch upstream
+    git merge upstream/main
+    ```
+
+## Submitting Changes
+
+### Before Submitting
+
+Ensure your changes meet the following criteria:
+
+- All tests pass (`make test`)
+- Code is formatted and linted (`make check-all-fix`)
+- New functionality includes tests
+- Documentation is updated (README, docstrings, examples)
+- License headers are present on all new files
+- Commit messages are clear and descriptive
+
+### Creating a Pull Request
+
+1. **Push your changes** to your fork:
+
+    ```bash
+    git push origin <username>/<type-of-change>/<issue-number>-<short-description>
+    ```
+
+2. **Open a pull request** on GitHub from your fork to the main repository
+
+3. **Respond to review feedback** update your PR as needed
+
+### Pull Request Review Process
+
+- Maintainers will review your PR and may request changes
+- Address feedback by pushing additional commits to your branch
+- Reply to the feedback comment with a link to the commit that addresses it.
+- Once approved, a maintainer will merge your PR
+- Your contribution will be included in the next release!
+
+## Code of Conduct
+Data Designer follows the Contributor Covenant Code of Conduct. We are committed to providing a welcoming and inclusive environment for all contributors.
+
+**Please read our complete [Code of Conduct](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/CODE_OF_CONDUCT.md)** for full details on our standards and expectations.
+
+### License File Headers
+All code files that are added to this repository must include the appropriate NVIDIA copyright header:
+
+```python
+# SPDX-FileCopyrightText: Copyright (c) {YEAR} NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+```
+
+Use `make update-license-headers` to add headers automatically.
+
+## Signing off on your work
+
+When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. All contributors are asked to sign the Data Designer [Developer Certificate of Origin (DCO)](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/DCO) when submitting their first pull request. The process is automated by a bot that will comment on the pull request. Our DCO is the same as the Linux Foundation requires its contributors to sign.
+
+---
+
+Thank you for contributing to NeMo Data Designer! Your efforts help make synthetic data generation more accessible and powerful for everyone. 🎨✨
diff --git a/fern/v0.3.3/pages/index.mdx b/fern/v0.3.3/pages/index.mdx
new file mode 100644
index 000000000..218beadf0
--- /dev/null
+++ b/fern/v0.3.3/pages/index.mdx
@@ -0,0 +1,51 @@
+---
+title: 🎨 NeMo Data Designer Library
+description: A general framework for generating high-quality synthetic data from scratch or using seed data.
+---
+
+[![GitHub](https://img.shields.io/badge/github-repo-952fc6?logo=github)](https://github.com/NVIDIA-NeMo/DataDesigner) [![License](https://img.shields.io/badge/License-Apache_2.0-0074df.svg)](https://opensource.org/licenses/Apache-2.0) [![NeMo Microservices](https://img.shields.io/badge/NeMo-Microservices-76b900)](https://docs.nvidia.com/nemo/microservices/latest/index.html)
+
+👋 Welcome to the Data Designer community! We're excited to have you here.
+
+Data Designer is a **general framework** for generating **high-quality** synthetic data **from scratch** or using your own **seed data** as a starting point for domain-grounded data generation.
+
+## Why Data Designer?
+
+Generating high-quality synthetic data requires much more than iteratively calling an LLM.
+
+Data Designer is **purpose-built** to support large-scale, high-quality data generation, including
+
+  * **Diversity** – statistical distributions and variety that reflect real-world data patterns, not repetitive LLM outputs 
+  * **Correlations** – meaningful relationships between fields that LLMs cannot maintain across independent calls
+  * **Steerability** – flexible control over data characteristics throughout the generation process
+  * **Validation** – automated quality checks and verification that data meets specifications
+  * **Reproducibility** – shareable and reproducible generation workflows
+
+## How does it work?
+
+Data Designer helps you create datasets through an intuitive, **iterative** process:
+
+1.  **⚙️ Configure** your model settings
+    - Bring your own OpenAI-compatible model providers and models
+    - Or use the default model providers and models to get started quickly
+    - Learn more by reading the [model docs](/docs/concepts/models/default-model-settings)
+2.  **🏗️ Design** your dataset
+    - Iteratively design your dataset, column by column
+    - Leverage tools like statistical samplers and LLMs to generate a variety of data types
+    - Learn more by reading the [column docs](/docs/concepts/columns)
+
+3.  **🔁 Preview** your results and iterate
+    - Generate a preview dataset stored in memory for fast iteration
+    - Inspect sample records and analysis results to refine your configuration
+    - Try for yourself by running the [tutorial notebooks](/docs/tutorials/overview)
+4.  **🖼️ Create** your dataset
+    - Generate your full dataset and save results to disk
+    - Access the generated dataset and associated artifacts for downstream use
+    - Give it a try by running the [tutorial notebooks](/docs/tutorials/overview)
+
+## Library and Microservice
+
+Data Designer is available as both an open-source library and a NeMo microservice.
+
+  * **Open-source Library**: Purpose-built for flexibility and customization, prioritizing UX excellence, modularity, and extensibility.
+  * **NeMo Microservice**: An enterprise-grade solution that offers a seamless transition from the library, allowing you to leverage other NeMo microservices and generate datasets at scale. See the [microservice docs](https://docs.nvidia.com/nemo/microservices/latest/design-synthetic-data-from-scratch-or-seeds/index.html) for more details.
diff --git a/fern/v0.3.3/pages/installation.mdx b/fern/v0.3.3/pages/installation.mdx
new file mode 100644
index 000000000..ff7bbd6ec
--- /dev/null
+++ b/fern/v0.3.3/pages/installation.mdx
@@ -0,0 +1,36 @@
+---
+title: Installation
+description: How to install Data Designer
+---
+
+Installing Data Designer is as simple as:
+
+<Tabs>
+  <Tab title="pip">
+    ```bash
+    pip install data-designer
+    ```
+  </Tab>
+  <Tab title="uv">
+    ```bash
+    uv add data-designer
+    ```
+  </Tab>
+</Tabs>
+
+## Development Installation
+
+To install the latest development version from the GitHub repository:
+
+<Tabs>
+  <Tab title="pip">
+    ```bash
+    pip install 'git+https://github.com/NVIDIA-NeMo/DataDesigner@main'
+    ```
+  </Tab>
+  <Tab title="uv">
+    ```bash
+    uv add 'git+https://github.com/NVIDIA-NeMo/DataDesigner@main'
+    ```
+  </Tab>
+</Tabs>
diff --git a/fern/v0.3.3/pages/plugins/available.mdx b/fern/v0.3.3/pages/plugins/available.mdx
new file mode 100644
index 000000000..594e53acb
--- /dev/null
+++ b/fern/v0.3.3/pages/plugins/available.mdx
@@ -0,0 +1,6 @@
+---
+title: "🚧 Available Plugins: Coming Soon"
+description: List of available Data Designer plugins.
+---
+
+This page will list available Data Designer plugins. Stay tuned!
diff --git a/fern/v0.3.3/pages/plugins/example.mdx b/fern/v0.3.3/pages/plugins/example.mdx
new file mode 100644
index 000000000..0be568cbd
--- /dev/null
+++ b/fern/v0.3.3/pages/plugins/example.mdx
@@ -0,0 +1,310 @@
+---
+title: "Example Plugin: Index Multiplier"
+description: A complete walkthrough for creating a Data Designer plugin.
+---
+
+<Warning title="Experimental Feature">
+The plugin system is currently **experimental** and under active development. The documentation, examples, and plugin interface are subject to significant changes in future releases. If you encounter any issues, have questions, or have ideas for improvement, please consider starting [a discussion on GitHub](https://github.com/NVIDIA-NeMo/DataDesigner/discussions).
+</Warning>
+
+In this guide, we will build a simple plugin that generates values by multiplying the row index by a user-specified multiplier. Admittedly, not the most useful plugin, but it demonstrates the required steps 😜.
+
+A Data Designer plugin is implemented as a Python package with three main components:
+
+1. **Configuration Class**: Defines the parameters users can configure
+2. **Task Class**: Contains the core implementation of the plugin
+3. **Plugin Object**: Connects the config and task classes to make the plugin discoverable
+
+Let's build the `data-designer-index-multiplier` plugin step by step.
+
+## Step 1: Create a Python package
+
+Data Designer plugins are implemented as Python packages. We recommend using a standard structure for your plugin package.
+
+For example, here is the structure of a `data-designer-index-multiplier` plugin:
+
+```
+data-designer-index-multiplier/
+├── pyproject.toml
+└── src/
+    └── data_designer_index_multiplier/
+        ├── __init__.py
+        └── plugin.py
+```
+
+## Step 2: Create the config class
+
+The configuration class defines what parameters users can set when using your plugin. For column generator plugins, it must inherit from `SingleColumnConfig` and include a [discriminator field](https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions).
+
+```python
+from typing import Literal
+from data_designer.config.column_configs import SingleColumnConfig
+
+class IndexMultiplierColumnConfig(SingleColumnConfig):
+    """Configuration for the index multiplier column generator."""
+
+    # Configurable parameter for this plugin
+    multiplier: int = 2
+
+    # Required: discriminator field with a unique Literal type
+    # This value identifies your plugin and becomes its column_type
+    column_type: Literal["index-multiplier"] = "index-multiplier"
+```
+
+**Key points:**
+
+- The `column_type` field must be a `Literal` type with a string default
+- This value uniquely identifies your plugin (use kebab-case)
+- Add any custom parameters your plugin needs (here: `multiplier`)
+- `SingleColumnConfig` is a Pydantic model, so you can leverage all of Pydantic's validation features
+
+## Step 3: Create the implementation class
+
+The implementation class defines the actual business logic of the plugin. For column generator plugins, it inherits from `ColumnGenerator` and must implement a `metadata` static method and `generate` method:
+
+
+```python
+import logging
+import pandas as pd
+
+from data_designer.engine.column_generators.generators.base import (
+    ColumnGenerator,
+    GenerationStrategy,
+    GeneratorMetadata,
+)
+
+# Data Designer uses the standard Python logging module for logging
+logger = logging.getLogger(__name__)
+
+class IndexMultiplierColumnGenerator(ColumnGenerator[IndexMultiplierColumnConfig]):
+    @staticmethod
+    def metadata() -> GeneratorMetadata:
+        """Define metadata about this generator."""
+        return GeneratorMetadata(
+            name="index-multiplier",
+            description="Generates values by multiplying the row index by a user-specified multiplier",
+            generation_strategy=GenerationStrategy.FULL_COLUMN,
+        )
+
+    def generate(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Generate the column data.
+
+        Args:
+            data: The current DataFrame being built
+
+        Returns:
+            The DataFrame with the new column added
+        """
+        logger.info(
+            f"Generating column {self.config.name} "
+            f"with multiplier {self.config.multiplier}"
+        )
+
+        # Access config via self.config
+        data[self.config.name] = data.index * self.config.multiplier
+
+        return data
+```
+
+**Key points:**
+
+- Generic type `ColumnGenerator[IndexMultiplierColumnConfig]` connects the task to its config
+- `metadata()` describes your generator and its requirements
+- `generation_strategy` can be `FULL_COLUMN`, `CELL_BY_CELL`
+- You have access to the configuration parameters via `self.config`
+
+<Info title="Understanding generation_strategy">
+The `generation_strategy` specifies how the column generator will generate data.
+
+- **`FULL_COLUMN`**: Generates the full column (at the batch level) in a single call to `generate`
+    - `generate` must take as input a `pd.DataFrame` with all previous columns and return a `pd.DataFrame` with the generated column appended
+
+- **`CELL_BY_CELL`**: Generates one cell at a time
+    - `generate` must take as input a `dict` with key/value pairs for all previous columns and return a `dict` with an additional key/value for the generated cell
+    - Supports concurrent workers via a `max_parallel_requests` parameter on the configuration
+</Info>
+
+## Step 4: Create the plugin object
+
+Create a `Plugin` object that makes the plugin discoverable and connects the task and config classes.
+
+```python
+from data_designer.plugins import Plugin, PluginType
+
+# Plugin instance - this is what gets loaded via entry point
+plugin = Plugin(
+    impl_qualified_name="data_designer_index_multiplier.plugin.IndexMultiplierColumnGenerator",
+    config_qualified_name="data_designer_index_multiplier.plugin.IndexMultiplierColumnConfig",
+    plugin_type=PluginType.COLUMN_GENERATOR,
+    emoji="🔌",
+)
+```
+
+### Complete plugin code
+
+Pulling it all together, here is the complete plugin code for `src/data_designer_index_multiplier/plugin.py`:
+
+```python
+import logging
+from typing import Literal
+
+import pandas as pd
+
+from data_designer.config.column_configs import SingleColumnConfig
+from data_designer.engine.column_generators.generators.base import (
+    ColumnGenerator,
+    GenerationStrategy,
+    GeneratorMetadata,
+)
+from data_designer.plugins import Plugin, PluginType
+
+# Data Designer uses the standard Python logging module for logging
+logger = logging.getLogger(__name__)
+
+
+class IndexMultiplierColumnConfig(SingleColumnConfig):
+    """Configuration for the index multiplier column generator."""
+
+    # Configurable parameter for this plugin
+    multiplier: int = 2
+
+    # Required: discriminator field with a unique Literal type
+    # This value identifies your plugin and becomes its column_type
+    column_type: Literal["index-multiplier"] = "index-multiplier"
+
+
+class IndexMultiplierColumnGenerator(ColumnGenerator[IndexMultiplierColumnConfig]):
+    @staticmethod
+    def metadata() -> GeneratorMetadata:
+        """Define metadata about this generator."""
+        return GeneratorMetadata(
+            name="index-multiplier",
+            description="Generates values by multiplying the row index by a user-specified multiplier",
+            generation_strategy=GenerationStrategy.FULL_COLUMN,
+        )
+
+    def generate(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Generate the column data.
+
+        Args:
+            data: The current DataFrame being built
+
+        Returns:
+            The DataFrame with the new column added
+        """
+        logger.info(
+            f"Generating column {self.config.name} "
+            f"with multiplier {self.config.multiplier}"
+        )
+
+        # Access config via self.config
+        data[self.config.name] = data.index * self.config.multiplier
+
+        return data
+
+
+# Plugin instance - this is what gets loaded via entry point
+plugin = Plugin(
+    impl_qualified_name="data_designer_index_multiplier.plugin.IndexMultiplierColumnGenerator",
+    config_qualified_name="data_designer_index_multiplier.plugin.IndexMultiplierColumnConfig",
+    plugin_type=PluginType.COLUMN_GENERATOR,
+    emoji="🔌",
+)
+```
+
+## Step 5: Package your plugin
+
+Create a `pyproject.toml` file to define your package and register the entry point:
+
+```toml
+[project]
+name = "data-designer-index-multiplier"
+version = "1.0.0"
+description = "Data Designer index multiplier plugin"
+requires-python = ">=3.10"
+dependencies = [
+    "data-designer",
+]
+
+# Register this plugin via entry points
+[project.entry-points."data_designer.plugins"]
+index-multiplier = "data_designer_index_multiplier.plugin:plugin"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/data_designer_index_multiplier"]
+```
+
+<Info title="Entry Point Registration">
+Plugins are discovered automatically using [Python entry points](https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#using-package-metadata). It is important to register your plugin as an entry point under the `data_designer.plugins` group.
+
+The entry point format is:
+
+```toml
+[project.entry-points."data_designer.plugins"]
+<entry-point-name> = "<module.path>:<plugin-instance-name>"
+```
+</Info>
+
+## Step 6: Use your plugin
+
+Install your plugin in editable mode for testing:
+
+```bash
+# From the plugin directory
+uv pip install -e .
+```
+
+Once installed, your plugin works just like built-in column types:
+
+```python
+from data_designer_index_multiplier.plugin import IndexMultiplierColumnConfig
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    SamplerColumnConfig,
+)
+
+data_designer = DataDesigner()
+builder = DataDesignerConfigBuilder()
+
+# Add a regular column
+builder.add_column(
+    SamplerColumnConfig(
+        name="category",
+        sampler_type="category",
+        params=CategorySamplerParams(values=["A", "B", "C"]),
+    )
+)
+
+# Add your custom plugin column
+builder.add_column(
+    IndexMultiplierColumnConfig(
+        name="v",
+        multiplier=5,
+    )
+)
+
+# Generate data
+results = data_designer.create(builder, num_records=10)
+print(results.load_dataset())
+```
+
+Output:
+
+```
+  category  multiplied-index
+0        B                 0
+1        A                 5
+2        C                10
+3        A                15
+4        B                20
+...
+```
+
+That's it! You have now created and used your first Data Designer plugin. The last step is to package your plugin and share it with the community 🚀
diff --git a/fern/v0.3.3/pages/plugins/overview.mdx b/fern/v0.3.3/pages/plugins/overview.mdx
new file mode 100644
index 000000000..613f3b3f8
--- /dev/null
+++ b/fern/v0.3.3/pages/plugins/overview.mdx
@@ -0,0 +1,49 @@
+---
+title: Data Designer Plugins
+description: Extend Data Designer's capabilities with custom plugins.
+---
+
+<Warning title="Experimental Feature">
+The plugin system is currently **experimental** and under active development. The documentation, examples, and plugin interface are subject to significant changes in future releases. If you encounter any issues, have questions, or have ideas for improvement, please consider starting [a discussion on GitHub](https://github.com/NVIDIA-NeMo/DataDesigner/discussions).
+</Warning>
+
+## What are plugins?
+
+Plugins are Python packages that extend Data Designer's capabilities without modifying the core library. Similar to [VS Code extensions](https://marketplace.visualstudio.com/vscode) and [Pytest plugins](https://docs.pytest.org/en/stable/reference/plugin_list.html), the plugin system empowers you to build specialized extensions for your specific use cases and share them with the community.
+
+**Current capabilities**: Data Designer currently supports plugins for column generators (the column types you pass to the config builder's `add_column` method).
+
+**Coming soon**: Plugin support for processors, validators, and more!
+
+## How do you use plugins?
+
+A Data Designer plugin is just a Python package configured with an [entry point](https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#using-package-metadata) that points to a Data Designer `Plugin` object. Using a plugin is as simple as installing the package:
+
+```bash
+pip install data-designer-{plugin-name}
+```
+
+Once installed, plugins are automatically discovered and ready to use. See the [example plugin](/docs/plugins/example) for a complete walkthrough.
+
+## How do you create plugins?
+
+Creating a plugin involves three main steps:
+
+### 1. Implement the Plugin Components
+
+- Create a task class inheriting from `ColumnGenerator`
+- Create a config class inheriting from `SingleColumnConfig`
+- Instantiate a `Plugin` object connecting them
+
+### 2. Package Your Plugin
+
+- Set up a Python package with `pyproject.toml`
+- Register your plugin using entry points
+- Define dependencies (including `data-designer`)
+
+### 3. Share Your Plugin
+
+- Publish to PyPI or another package index
+- Share with the community!
+
+**Ready to get started?** See the [Example Plugin](/docs/plugins/example) for a complete walkthrough!
diff --git a/fern/v0.3.3/pages/quick-start.mdx b/fern/v0.3.3/pages/quick-start.mdx
new file mode 100644
index 000000000..e52b3c026
--- /dev/null
+++ b/fern/v0.3.3/pages/quick-start.mdx
@@ -0,0 +1,91 @@
+---
+title: Quick Start
+description: Get started with Data Designer using default model providers and configurations.
+---
+
+Get started with Data Designer using the default model providers and configurations. Data Designer ships with built-in model providers and configurations that make it easy to start generating synthetic data immediately.
+
+## Prerequisites
+
+Before you begin, you'll need an API key from one of the default providers:
+
+- **NVIDIA API Key**: Get yours from [build.nvidia.com](https://build.nvidia.com)
+- **OpenAI API Key** (optional): Get yours from [platform.openai.com](https://platform.openai.com/api-keys)
+- **OpenRouter API Key** (optional): Get yours from [openrouter.ai](https://openrouter.ai)
+
+Set your API key as an environment variable:
+
+```bash
+export NVIDIA_API_KEY="your-api-key-here"
+# Or for OpenAI
+export OPENAI_API_KEY="your-openai-api-key-here"
+# Or for OpenRouter
+export OPENROUTER_API_KEY="your-openrouter-api-key-here"
+```
+
+## Example
+
+Below we'll construct a simple Data Designer workflow that generates multilingual greetings.
+
+```python
+import os
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    InfoType,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+)
+
+# Set your API key from build.nvidia.com
+# Skip this step if you've already exported your key to the environemnt variable
+os.environ["NVIDIA_API_KEY"] = "your-api-key-here"
+
+# Create a DataDesigner instance
+# This automatically configures the default model providers
+data_designer = DataDesigner()
+
+# Print out all the model providers available
+data_designer.info.display(InfoType.MODEL_PROVIDERS)
+
+# Create a config builder
+# This automatically loads the default model configurations
+config_builder = DataDesignerConfigBuilder()
+
+# Print out all the model configurations available
+config_builder.info.display(InfoType.MODEL_CONFIGS)
+
+# Add a sampler column to randomly select a language
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="language",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(
+            values=["English", "Spanish", "French", "German", "Italian"],
+        ),
+    )
+)
+
+# Add an LLM text generation column
+# We'll use the built-in 'nvidia-text' model alias
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="greetings",
+        model_alias="nvidia-text",
+        prompt="""Write a casual and formal greeting in '{{language}}' language.""",
+    )
+)
+
+# Run a preview to generate sample records
+preview_results = data_designer.preview(config_builder=config_builder)
+
+# Display a sample record
+preview_results.display_sample_record()
+```
+
+🎉 Congratulations, you successfully ran one iteration designing your synthetic data. Follow along to learn more.
+
+To learn more about the default providers and model configurations available, see the [Default Model Settings](/docs/concepts/models/default-model-settings) guide.
diff --git a/fern/v0.3.3/pages/recipes/code-generation/text-to-python.mdx b/fern/v0.3.3/pages/recipes/code-generation/text-to-python.mdx
new file mode 100644
index 000000000..79e597a7e
--- /dev/null
+++ b/fern/v0.3.3/pages/recipes/code-generation/text-to-python.mdx
@@ -0,0 +1,292 @@
+---
+title: Text to Python
+description: Generate Python code from natural language descriptions.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_python.py)
+</Info>
+
+```python
+from pathlib import Path
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    CodeLang,
+    CodeValidatorParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMCodeColumnConfig,
+    LLMJudgeColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    SubcategorySamplerParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="industry_sector",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Healthcare",
+                    "Finance",
+                    "Technology",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="topic",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="industry_sector",
+                values={
+                    "Healthcare": [
+                        "Electronic Health Records (EHR) Systems",
+                        "Telemedicine Platforms",
+                        "AI-Powered Diagnostic Tools",
+                    ],
+                    "Finance": [
+                        "Fraud Detection Software",
+                        "Automated Trading Systems",
+                        "Personal Finance Apps",
+                    ],
+                    "Technology": [
+                        "Cloud Computing Platforms",
+                        "Artificial Intelligence and Machine Learning Platforms",
+                        "DevOps and CI/CD Tools",
+                    ],
+                },
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="code_complexity",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Beginner",
+                    "Intermediate",
+                    "Advanced",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="code_concept",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="code_complexity",
+                values={
+                    "Beginner": [
+                        "Variables",
+                        "Data Types",
+                        "Functions",
+                        "Loops",
+                        "Classes",
+                    ],
+                    "Intermediate": [
+                        "List Comprehensions",
+                        "Object-oriented programming",
+                        "Lambda Functions",
+                        "Web frameworks",
+                        "Pandas",
+                    ],
+                    "Advanced": [
+                        "Multithreading",
+                        "Context Managers",
+                        "Generators",
+                    ],
+                },
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="instruction_phrase",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Write a function that",
+                    "Create a class that",
+                    "Implement a script",
+                    "Can you create a function",
+                    "Develop a module that",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="instruction",
+            model_alias=model_alias,
+            system_prompt=("You are an expert at generating clear and specific programming tasks."),
+            prompt=(
+                "Generate an instruction to create Python code that solves a specific problem.\n"
+                "Each instruction should begin with one of the following phrases: {{ instruction_phrase }}.\n\n"
+                "Important Guidelines:\n"
+                "* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
+                "* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
+                "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
+                "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="code_implementation",
+            model_alias=model_alias,
+            code_lang=CodeLang.PYTHON,
+            system_prompt=(
+                "You are an expert Python programmer who writes clean, efficient, and well-documented code."
+            ),
+            prompt=(
+                "Write Python code for the following instruction:\n"
+                "Instruction: {{ instruction }}\n\n"
+                "Important Guidelines:\n"
+                "* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
+                "* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
+                "* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
+                "* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{ code_concept }}.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="code_judge_result",
+            model_alias=model_alias,
+            prompt=TEXT_TO_PYTHON_JUDGE_TEMPLATE,
+            scores=python_scoring,
+        )
+    )
+
+    config_builder.add_column(
+        ValidationColumnConfig(
+            name="code_validity_result",
+            validator_type=ValidatorType.CODE,
+            target_columns=["code_implementation"],
+            validator_params=CodeValidatorParams(
+                code_lang=CodeLang.PYTHON,
+            ),
+            batch_size=100,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+TEXT_TO_PYTHON_JUDGE_TEMPLATE = """\
+You are an expert in Python programming, with specialized knowledge in software engineering, data science, and algorithmic problem-solving.
+
+You think about potential flaws and errors in the code. You are a tough critic, but a fair one.
+
+Take a deep breath and use the Python Code Quality Rubric below to score the **Generated Python Code** based on the INSTRUCTIONS.
+
+#### INSTRUCTIONS
+The Generated Python Code should be a valid response to the Natural Language Prompt below
+
+Natural Language Prompt:
+{{ instruction }}
+
+Generated Python Code
+{{ code_implementation }}
+"""
+
+
+python_scoring = [
+    Score(
+        name="Relevance",
+        description="Adherence to INSTRUCTIONS and CONTEXT",
+        options={
+            4: "Perfectly meets all specified requirements.",
+            3: "Meets most requirements with minor deviations.",
+            2: "Moderate deviation from the instructions.",
+            1: "Significant deviations from the instructions.",
+            0: "Does not adhere to the instructions.",
+        },
+    ),
+    Score(
+        name="Pythonic",
+        description="Pythonic Code and Best Practices (Does the code follow Python conventions and best practices?)",
+        options={
+            4: "The code exemplifies Pythonic principles, making excellent use of Python-specific constructs, standard library modules and programming idioms; follows all relevant PEPs.",
+            3: "The code closely follows Python conventions and adheres to many best practices; good use of Python-specific constructs, standard library modules and programming idioms.",
+            2: "The code generally follows Python conventions but has room for better alignment with Pythonic practices.",
+            1: "The code loosely follows Python conventions, with several deviations from best practices.",
+            0: "The code does not follow Python conventions or best practices, using non-Pythonic approaches.",
+        },
+    ),
+    Score(
+        name="Readability",
+        description="Readability and Maintainability (Is the Python code easy to understand and maintain?)",
+        options={
+            4: (
+                "The code is excellently formatted, follows PEP 8 guidelines, is elegantly concise and clear, uses meaningful variable names, "
+                "ensuring high readability and ease of maintenance; organizes complex logic well. Docstrings are given in a Google Docstring format."
+            ),
+            3: "The code is well-formatted in the sense of code-as-documentation, making it relatively easy to understand and maintain; uses descriptive names and organizes logic clearly.",
+            2: "The code is somewhat readable with basic formatting and some comments, but improvements are needed; needs better use of descriptive names and organization.",
+            1: "The code has minimal formatting, making it hard to understand; lacks meaningful names and organization.",
+            0: "The code is unreadable, with no attempt at formatting or description.",
+        },
+    ),
+    Score(
+        name="Efficiency",
+        description="Efficiency and Performance (Is the code optimized for performance?)",
+        options={
+            4: "The solution is highly efficient, using appropriate data structures and algorithms; avoids unnecessary computations and optimizes for both time and space complexity.",
+            3: "The solution is efficient, with good use of Python's built-in functions and libraries; minor areas for optimization.",
+            2: "The solution is moderately efficient, but misses some opportunities for optimization; uses some inefficient patterns.",
+            1: "The solution shows poor efficiency, with notable performance issues; lacks effective optimization techniques.",
+            0: "The solution is highly inefficient; overlooks fundamental optimization practices, resulting in significant performance issues.",
+        },
+    ),
+]
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.3.3/pages/recipes/code-generation/text-to-sql.mdx b/fern/v0.3.3/pages/recipes/code-generation/text-to-sql.mdx
new file mode 100644
index 000000000..26ddd1abe
--- /dev/null
+++ b/fern/v0.3.3/pages/recipes/code-generation/text-to-sql.mdx
@@ -0,0 +1,334 @@
+---
+title: Text to SQL
+description: Generate SQL queries from natural language descriptions.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_sql.py)
+</Info>
+
+```python
+from pathlib import Path
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    CodeLang,
+    CodeValidatorParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMCodeColumnConfig,
+    LLMJudgeColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    SubcategorySamplerParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="industry_sector",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=["Healthcare", "Finance", "Technology"],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="topic",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="industry_sector",
+                values={
+                    "Healthcare": [
+                        "Electronic Health Records (EHR) Systems",
+                        "Telemedicine Platforms",
+                        "AI-Powered Diagnostic Tools",
+                    ],
+                    "Finance": [
+                        "Fraud Detection Software",
+                        "Automated Trading Systems",
+                        "Personal Finance Apps",
+                    ],
+                    "Technology": [
+                        "Cloud Computing Platforms",
+                        "Artificial Intelligence and Machine Learning Platforms",
+                        "DevOps and CI/CD Tools",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="sql_complexity",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=["Beginner", "Intermediate", "Advanced"],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="sql_concept",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="sql_complexity",
+                values={
+                    "Beginner": [
+                        "Basic SELECT Statements",
+                        "WHERE Clauses",
+                        "Basic JOINs",
+                        "INSERT, UPDATE, DELETE",
+                    ],
+                    "Intermediate": [
+                        "Aggregation Functions",
+                        "Multiple JOINs",
+                        "Subqueries",
+                        "Views",
+                    ],
+                    "Advanced": [
+                        "Window Functions",
+                        "Common Table Expressions (CTEs)",
+                        "Stored Procedures",
+                        "Query Optimization",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="sql_task_type",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Data Retrieval",
+                    "Data Manipulation",
+                    "Analytics and Reporting",
+                    "Data Transformation",
+                ],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="instruction_phrase",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Write an SQL query that",
+                    "Create an SQL statement to",
+                    "Develop an SQL query to",
+                    "Can you write SQL that",
+                    "Formulate an SQL query that",
+                ],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="sql_prompt",
+            model_alias=model_alias,
+            system_prompt="You are an expert at generating clear and specific SQL tasks.",
+            prompt=SQL_PROMPT_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="sql_context",
+            model_alias=model_alias,
+            code_lang=CodeLang.SQL_ANSI,
+            system_prompt=(
+                "You are an expert SQL database designer who creates clean, efficient, and "
+                "well-structured database schemas."
+            ),
+            prompt=SQL_CONTEXT_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="sql",
+            model_alias=model_alias,
+            code_lang=CodeLang.SQL_ANSI,
+            system_prompt="You are an expert SQL programmer who writes clean, efficient, and well-structured queries.",
+            prompt=SQL_CODE_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        ValidationColumnConfig(
+            name="code_validity_result",
+            validator_type=ValidatorType.CODE,
+            target_columns=["sql"],
+            validator_params=CodeValidatorParams(
+                code_lang=CodeLang.SQL_ANSI,
+            ),
+            batch_size=100,
+        )
+    )
+
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="code_judge_result",
+            model_alias=model_alias,
+            prompt=TEXT_TO_SQL_JUDGE_TEMPLATE,
+            scores=sql_scoring,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+SQL_PROMPT_TEXT = (
+    "Generate an instruction to create SQL code that solves a specific problem.\n"
+    "Each instruction should begin with one of the following phrases: {{instruction_phrase}}.\n\n"
+    "Important Guidelines:\n"
+    "* Industry Relevance: Ensure the instruction pertains to the {{industry_sector}} sector and {{topic}} topic.\n"
+    "* SQL Complexity: Tailor the instruction to the {{sql_complexity}} level. Utilize relevant {{sql_concept}} "
+    "where appropriate to match the complexity level.\n"
+    "* Task Type: The instruction should involve a {{sql_task_type}} task.\n"
+    "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to "
+    "understand the requirements without being overly verbose.\n"
+    "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
+)
+
+SQL_CONTEXT_TEXT = (
+    "Generate the SQL for creating database tables that would be relevant for the following instruction:\n"
+    "Instruction: {{sql_prompt}}\n\n"
+    "Important Guidelines:\n"
+    "* Relevance: Ensure all tables are directly related to the {{industry_sector}} sector and {{topic}} topic.\n"
+    "* Completeness: Include all essential columns with appropriate data types, primary/foreign keys, and necessary constraints.\n"
+    "* Realism: Use realistic table structures typical for the specified industry.\n"
+    "* Executable SQL: Provide complete CREATE TABLE statements that can be run without modification.\n"
+    "* Consistency: Use consistent naming conventions (e.g., snake_case for table and column names).\n"
+    "* Sample Data: Include INSERT statements with sample data that makes sense for the tables (at least 5-10 rows per table)."
+)
+
+SQL_CODE_TEXT = (
+    "Write SQL code for the following instruction based on the provided database context:\n"
+    "Instruction: {{sql_prompt}}\n\n"
+    "Database Context:\n"
+    "{{sql_context}}\n\n"
+    "Important Guidelines:\n"
+    "* Code Quality: Your SQL should be clean, complete, self-contained and accurate.\n"
+    "* Code Validity: Please ensure that your SQL code is executable and does not contain any errors.\n"
+    "* Context: Base your query on the provided database context. Only reference tables and columns that "
+    "exist in the context.\n"
+    "* Complexity & Concepts: The SQL should be written at a {{sql_complexity}} level, making use of "
+    "concepts such as {{sql_concept}}.\n"
+    "* Task Type: Ensure your solution implements the appropriate {{sql_task_type}} operation.\n"
+    "* Comments: Include brief comments explaining the key parts of your query.\n"
+)
+
+
+TEXT_TO_SQL_JUDGE_TEMPLATE = """\
+You are an expert in SQL with deep knowledge of relational modeling, query semantics,
+and performance tuning across common dialects (e.g., PostgreSQL, MySQL, SQLite, SQL Server).
+You think critically about correctness, readability, and efficiency.
+
+Use the SQL Query Quality Rubric below to score the **Generated SQL Query** based on the INSTRUCTIONS.
+
+#### INSTRUCTIONS
+The Generated SQL Query should be a valid response to the Natural Language Prompt below
+
+Natural Language Prompt:
+{{ sql_prompt }}
+
+Database Context:
+{{ sql_context }}
+
+Generated SQL Query
+{{ sql }}
+"""
+
+
+sql_scoring = [
+    Score(
+        name="Relevance",
+        description="Adherence to INSTRUCTIONS and CONTEXT",
+        options={
+            4: "Perfectly meets all specified requirements.",
+            3: "Meets most requirements with minor deviations.",
+            2: "Moderate deviation from the instructions.",
+            1: "Significant deviations from the instructions.",
+            0: "Does not adhere to the instructions.",
+        },
+    ),
+    Score(
+        name="SQL Correctness",
+        description="Syntax and semantic correctness; returns the intended result",
+        options={
+            4: "Valid SQL with correct joins, filters, grouping/aggregation, and NULL handling; produces the intended result set under the stated/implicit dialect.",
+            3: "Generally correct with minor issues (e.g., edge-case NULLs, minor grouping detail) but still likely yields the intended result.",
+            2: "Partially correct; noticeable semantic mistakes (joins, grouping, filters) that may change results or fail in edge cases.",
+            1: "Largely incorrect; major semantic or syntactic errors likely causing failure or wrong results.",
+            0: "Invalid SQL or unrelated to the task; will not run or cannot produce a meaningful result.",
+        },
+    ),
+    Score(
+        name="Readability",
+        description="Formatting, clarity, and maintainability",
+        options={
+            4: "Cleanly formatted (keywords/clauses consistently styled), clear structure (CTEs/subqueries where helpful), meaningful table/column aliases, and concise.",
+            3: "Generally readable with consistent formatting and understandable aliases; could be organized slightly better.",
+            2: "Somewhat readable but inconsistent formatting or confusing aliasing; structure is harder to follow.",
+            1: "Poorly formatted and hard to read; unclear structure and aliasing.",
+            0: "Unreadable or chaotic; no meaningful structure or styling.",
+        },
+    ),
+    Score(
+        name="Efficiency",
+        description="Query performance best practices",
+        options={
+            4: "Uses sargable predicates, appropriate joins, selective filters early, avoids SELECT *, unnecessary DISTINCT, and wasteful subqueries; likely to use indexes effectively.",
+            3: "Mostly efficient; minor opportunities for improvement (e.g., simplifying expressions, reducing data early).",
+            2: "Moderate inefficiencies (e.g., non-sargable filters, unnecessary nested subqueries, broad SELECT *).",
+            1: "Notably inefficient patterns likely causing large scans or poor plans.",
+            0: "Highly inefficient; ignores basic best practices and likely to perform very poorly.",
+        },
+    ),
+]
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.3.3/pages/recipes/index.mdx b/fern/v0.3.3/pages/recipes/index.mdx
new file mode 100644
index 000000000..34d15a59e
--- /dev/null
+++ b/fern/v0.3.3/pages/recipes/index.mdx
@@ -0,0 +1,70 @@
+---
+title: Use Case Recipes
+description: Ready-to-use code examples for common Data Designer use cases.
+---
+
+Recipes are a collection of code examples that demonstrate how to leverage Data Designer in specific use cases.
+Each recipe is a self-contained example that can be run independently.
+
+<Info title="New to Data Designer?">
+Recipes provide working code for specific use cases without detailed explanations. If you're learning Data Designer for the first time, we recommend starting with our [tutorial notebooks](/docs/tutorials/overview), which offer step-by-step guidance and explain core concepts. Once you're familiar with the basics, return here for practical, ready-to-use implementations.
+</Info>
+
+<Cards>
+  <Card
+    title="Text to Python"
+    icon="code"
+    href="/docs/recipes/code-generation/text-to-python"
+  >
+    Generate a dataset of natural language instructions paired with Python code implementations, with varying complexity levels and industry focuses.
+
+    **Demonstrates:**
+    - Python code generation
+    - Python code validation
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_python.py)
+  </Card>
+  <Card
+    title="Text to SQL"
+    icon="database"
+    href="/docs/recipes/code-generation/text-to-sql"
+  >
+    Generate a dataset of natural language instructions paired with SQL code implementations, with varying complexity levels and industry focuses.
+
+    **Demonstrates:**
+    - SQL code generation
+    - SQL code validation
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_sql.py)
+  </Card>
+  <Card
+    title="Product Info QA"
+    icon="message-circle"
+    href="/docs/recipes/qa-and-chat/product-info-qa"
+  >
+    Generate a dataset that contains information about products and associated question/answer pairs.
+
+    **Demonstrates:**
+    - Structured outputs
+    - Expression columns
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/product_info_qa.py)
+  </Card>
+  <Card
+    title="Multi-Turn Chat"
+    icon="messages-square"
+    href="/docs/recipes/qa-and-chat/multi-turn-chat"
+  >
+    Generate a dataset of multi-turn chat conversations between a user and an AI assistant.
+
+    **Demonstrates:**
+    - Structured outputs
+    - Expression columns
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/multi_turn_chat.py)
+  </Card>
+</Cards>
diff --git a/fern/v0.3.3/pages/recipes/qa-and-chat/multi-turn-chat.mdx b/fern/v0.3.3/pages/recipes/qa-and-chat/multi-turn-chat.mdx
new file mode 100644
index 000000000..1b2735924
--- /dev/null
+++ b/fern/v0.3.3/pages/recipes/qa-and-chat/multi-turn-chat.mdx
@@ -0,0 +1,215 @@
+---
+title: Multi-Turn Chat
+description: Generate multi-turn conversational dialogues.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/multi_turn_chat.py)
+</Info>
+
+```python
+from pathlib import Path
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMJudgeColumnConfig,
+    LLMStructuredColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    SubcategorySamplerParams,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="domain",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=["Tech Support", "Personal Finances", "Educational Guidance"]),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="topic",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="domain",
+                values={
+                    "Tech Support": [
+                        "Troubleshooting a Laptop",
+                        "Setting Up a Home Wi-Fi Network",
+                        "Installing Software Updates",
+                    ],
+                    "Personal Finances": [
+                        "Budgeting Advice",
+                        "Understanding Taxes",
+                        "Investment Strategies",
+                    ],
+                    "Educational Guidance": [
+                        "Choosing a College Major",
+                        "Effective Studying Techniques",
+                        "Learning a New Language",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="complexity",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=["Basic", "Intermediate", "Advanced"]),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="conversation_length",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=[2, 4, 6, 8]),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="user_mood",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=["happy", "silly", "sarcastic", "combative", "disappointed", "toxic"]),
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="assistant_system_prompt",
+            prompt=(
+                "Write a reasonable system prompt for a helpful AI assistant with expertise in "
+                "{{domain}} and {{topic}}. The AI assistant must not engage in harmful behaviors."
+            ),
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="user_task",
+            prompt="Define a simple task related to {{topic}} of {{complexity}} complexity for the user.",
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        LLMStructuredColumnConfig(
+            name="conversation",
+            prompt=(
+                "<task>\n{{user_task}}\n</task>\n\n"
+                "<system_prompt>{{assistant_system_prompt}}</system_prompt>\n\n"
+                "Generate a conversation between a user and an AI assistant with <system_prompt> about <task>.\n"
+                "User is asking the assistant for advice and is in a {{user_mood}} mood.\n"
+                "The conversation must be {{conversation_length}} messages in length.\n"
+                "The conversation must come to a natural end in {{conversation_length}} messages and if the assistant is unable "
+                "to solve the user's needs by then, they should offer to continue the conversation later or redirect them to additional resources.\n"
+                "The conversation must be realistic and natural, incorporating details from both the user's and assistant's profiles.\n"
+                "The AI assistant role has the system prompt defined in <system_prompt>. All of its responses must conform to <system_prompt>:\n"
+            ),
+            output_format=ChatConversation,
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="toxicity_evaluation",
+            prompt=TOXICITY_PROMPT_TEMPLATE,
+            scores=[toxicity_score],
+            model_alias=model_alias,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+toxicity_score = Score(
+    name="Toxicity",
+    description="Evaluation of harmful, harassing, or hateful language in conversations",
+    options={
+        "4": "No toxicity detected; conversation is respectful and appropriate.",
+        "3": "Mild toxicity; slightly rude or sarcastic but not hateful or harmful.",
+        "2": "Moderate toxicity; contains disrespectful or harassing language.",
+        "1": "Severe toxicity; contains overt hate, harassment, or harmful content.",
+        "0": "Extreme toxicity; contains dangerous, threatening, or abusive content.",
+    },
+)
+
+TOXICITY_PROMPT_TEMPLATE = (
+    "You are an expert in content moderation and digital communication ethics. "
+    "You are evaluating the toxicity level in user messages.\n"
+    "Take a deep breath and carefully analyze the conversation provided. \n"
+    "Use the rubric to score the level of toxicity in the USER'S messages only.\n"
+    "Consider:\n"
+    "- Focus primarily on harassment, hate, or harmful language\n"
+    "- Evaluate the full context of the conversation\n"
+    "- Be objective and consistent in your assessment\n"
+    "- If any single message exhibits toxicity, it should influence the final score\n"
+    "## CONVERSATION\n"
+    "{{ conversation }}"
+)
+
+
+class Message(BaseModel):
+    """A single message turn in the conversation."""
+
+    role: Literal["user", "assistant"] = Field(..., description="Which role is writing the message.")
+    content: str = Field(..., description="Message contents.")
+
+
+class ChatConversation(BaseModel):
+    """A chat conversation between a specific user and an AI assistant.
+    * All conversations are initiated by the user role.
+    * The assistant role always responds to the user message.
+    * Turns alternate between user and assistant roles.
+    * The last message is always from the assistant role.
+    * Message content can be long or short.
+    * All assistant messages are faithful responses and must be answered fully.
+    """
+
+    conversation: list[Message] = Field(..., description="List of all messages in the conversation.")
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.3.3/pages/recipes/qa-and-chat/product-info-qa.mdx b/fern/v0.3.3/pages/recipes/qa-and-chat/product-info-qa.mdx
new file mode 100644
index 000000000..f3cdf7c49
--- /dev/null
+++ b/fern/v0.3.3/pages/recipes/qa-and-chat/product-info-qa.mdx
@@ -0,0 +1,235 @@
+---
+title: Product Info QA
+description: Generate question-answer pairs for product information.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/product_info_qa.py)
+</Info>
+
+```python
+import string
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+
+from data_designer.essentials import (
+    BernoulliSamplerParams,
+    CategorySamplerParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    ExpressionColumnConfig,
+    LLMJudgeColumnConfig,
+    LLMStructuredColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    UniformSamplerParams,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="category",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Electronics",
+                    "Clothing",
+                    "Home Appliances",
+                    "Groceries",
+                    "Toiletries",
+                    "Sports Equipment",
+                    "Toys",
+                    "Books",
+                    "Pet Supplies",
+                    "Tools & Home Improvement",
+                    "Beauty",
+                    "Health & Wellness",
+                    "Outdoor Gear",
+                    "Automotive",
+                    "Jewelry",
+                    "Watches",
+                    "Office Supplies",
+                    "Gifts",
+                    "Arts & Crafts",
+                    "Baby & Kids",
+                    "Music",
+                    "Video Games",
+                    "Movies",
+                    "Software",
+                    "Tech Devices",
+                ]
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="price_tens_of_dollars",
+            sampler_type=SamplerType.UNIFORM,
+            params=UniformSamplerParams(low=1, high=200),
+        )
+    )
+
+    config_builder.add_column(
+        ExpressionColumnConfig(
+            name="product_price",
+            expr="{{ (price_tens_of_dollars * 10) - 0.01 | round(2) }}",
+            dtype="float",
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="first_letter",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(values=list(string.ascii_uppercase)),
+        )
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="is_hallucination",
+            sampler_type=SamplerType.BERNOULLI,
+            params=BernoulliSamplerParams(p=0.5),
+        )
+    )
+
+    config_builder.add_column(
+        LLMStructuredColumnConfig(
+            name="product_info",
+            model_alias=model_alias,
+            prompt=(
+                "Generate a realistic product description for a product in the {{ category }} "
+                "category that costs {{ product_price }}.\n"
+                "The name of the product MUST start with the letter {{ first_letter }}.\n"
+            ),
+            output_format=ProductInfo,
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="question",
+            model_alias=model_alias,
+            prompt=("Ask a question about the following product:\n\n {{ product_info }}"),
+        )
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="answer",
+            model_alias=model_alias,
+            prompt=(
+                "{%- if is_hallucination == 0 -%}\n"
+                "<product_info>\n"
+                "{{ product_info }}\n"
+                "</product_info>\n"
+                "{%- endif -%}\n"
+                "User Question: {{ question }}\n"
+                "Directly and succinctly answer the user's question.\n"
+                "{%- if is_hallucination == 1 -%}\n"
+                "Make up whatever information you need to in order to answer the user's request.\n"
+                "{%- endif -%}"
+            ),
+        )
+    )
+
+    # Evaluate answer quality
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="llm_answer_metrics",
+            model_alias=model_alias,
+            prompt=(
+                "<product_info>\n"
+                "{{ product_info }}\n"
+                "</product_info>\n"
+                "User Question: {{question }}\n"
+                "AI Assistant Answer: {{ answer }}\n"
+                "Judge the AI assistant's response to the user's question about the product described in <product_info>."
+            ),
+            scores=answer_quality_scores,
+        )
+    )
+
+    config_builder.add_column(
+        ExpressionColumnConfig(
+            name="completeness_result",
+            expr="{{ llm_answer_metrics.Completeness.score }}",
+        )
+    )
+
+    config_builder.add_column(
+        ExpressionColumnConfig(
+            name="accuracy_result",
+            expr="{{ llm_answer_metrics.Accuracy.score }}",
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+class ProductInfo(BaseModel):
+    product_name: str = Field(..., description="A realistic product name for the market.")
+    key_features: list[str] = Field(..., min_length=1, max_length=3, description="Key product features.")
+    description: str = Field(
+        ...,
+        description="A short, engaging description of what the product does, highlighting a unique but believable feature.",
+    )
+    price_usd: float = Field(..., description="The price of the product", ge=10, le=1000, decimal_places=2)
+
+
+completeness_score = Score(
+    name="Completeness",
+    description="Evaluation of AI assistant's thoroughness in addressing all aspects of the user's query.",
+    options={
+        "Complete": "The response thoroughly covers all key points requested in the question, providing sufficient detail to satisfy the user's information needs.",
+        "PartiallyComplete": "The response addresses the core question but omits certain important details or fails to elaborate on relevant aspects that were requested.",
+        "Incomplete": "The response significantly lacks necessary information, missing major components of what was asked and leaving the query largely unanswered.",
+    },
+)
+
+accuracy_score = Score(
+    name="Accuracy",
+    description="Evaluation of how factually correct the AI assistant's response is relative to the product information.",
+    options={
+        "Accurate": "The information provided aligns perfectly with the product specifications without introducing any misleading or incorrect details.",
+        "PartiallyAccurate": "While some information is correctly stated, the response contains minor factual errors or potentially misleading statements about the product.",
+        "Inaccurate": "The response presents significantly wrong information about the product, with claims that contradict the actual product details.",
+    },
+)
+
+answer_quality_scores = [completeness_score, accuracy_score]
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.3.3/pages/tutorials/images-as-context.mdx b/fern/v0.3.3/pages/tutorials/images-as-context.mdx
new file mode 100644
index 000000000..5897ace25
--- /dev/null
+++ b/fern/v0.3.3/pages/tutorials/images-as-context.mdx
@@ -0,0 +1,280 @@
+---
+title: "🎨 Data Designer Tutorial: Images as Context for Vision-Based Generation"
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/4-providing-images-as-context.ipynb).
+</Info>
+
+#### 📚 What you'll learn
+
+This notebook demonstrates how to provide images as context to generate text descriptions using vision-language models.
+
+- ✨ **Visual Document Processing**: Converting images to chat-ready format for model consumption
+- 🔍 **Vision-Language Generation**: Using vision models to generate detailed summaries from images
+
+If this is your first time using Data Designer, we recommend starting with the [first tutorial](/docs/tutorials/the-basics) in this series.
+
+### 📦 Import the essentials
+
+- The `essentials` module provides quick access to the most commonly used objects.
+
+```python
+# Standard library imports
+import base64
+import io
+import uuid
+
+# Third-party imports
+import pandas as pd
+import rich
+from datasets import load_dataset
+from IPython.display import display
+from rich.panel import Panel
+
+# Data Designer imports
+from data_designer.essentials import (
+    ChatCompletionInferenceParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    DataFrameSeedSource,
+    ImageContext,
+    ImageFormat,
+    LLMTextColumnConfig,
+    ModalityDataType,
+    ModelConfig,
+)
+```
+
+### ⚙️ Initialize the Data Designer interface
+
+- `DataDesigner` is the main object is responsible for managing the data generation process.
+- When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+### 🎛️ Define model configurations
+
+- Each `ModelConfig` defines a model that can be used during the generation process.
+- The "model alias" is used to reference the model in the Data Designer config (as we will see below).
+- The "model provider" is the external service that hosts the model (see the [model config](/docs/concepts/models/default-model-settings) docs for more details).
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.
+
+```python
+# This name is set in the model provider configuration.
+MODEL_PROVIDER = "nvidia"
+
+model_configs = [
+    ModelConfig(
+        alias="vision",
+        model="meta/llama-4-scout-17b-16e-instruct",
+        provider=MODEL_PROVIDER,
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.60,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+]
+```
+
+### 🏗️ Initialize the Data Designer Config Builder
+
+- The Data Designer config defines the dataset schema and generation process.
+- The config builder provides an intuitive interface for building this configuration.
+- The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+### 🌱 Seed Dataset Creation
+
+In this section, we'll prepare our visual documents as a seed dataset for summarization:
+
+- **Loading Visual Documents**: We use the ColPali dataset containing document images
+- **Image Processing**: Convert images to base64 format for vision model consumption
+- **Metadata Extraction**: Preserve relevant document information (filename, page number, source, etc.)
+
+The seed dataset will be used to generate detailed text summaries of each document image.
+
+```python
+# Dataset processing configuration
+IMG_COUNT = 512  # Number of images to process
+BASE64_IMAGE_HEIGHT = 512  # Standardized height for model input
+
+# Load ColPali dataset for visual documents
+img_dataset_cfg = {"path": "vidore/colpali_train_set", "split": "train", "streaming": True}
+```
+
+```python
+def resize_image(image, height: int):
+    """
+    Resize image while maintaining aspect ratio.
+
+    Args:
+        image: PIL Image object
+        height: Target height in pixels
+
+    Returns:
+        Resized PIL Image object
+    """
+    original_width, original_height = image.size
+    width = int(original_width * (height / original_height))
+    return image.resize((width, height))
+
+
+def convert_image_to_chat_format(record, height: int) -> dict:
+    """
+    Convert PIL image to base64 format for chat template usage.
+
+    Args:
+        record: Dataset record containing image and metadata
+        height: Target height for image resizing
+
+    Returns:
+        Updated record with base64_image and uuid fields
+    """
+    # Resize image for consistent processing
+    image = resize_image(record["image"], height)
+
+    # Convert to base64 string
+    img_buffer = io.BytesIO()
+    image.save(img_buffer, format="PNG")
+    byte_data = img_buffer.getvalue()
+    base64_encoded_data = base64.b64encode(byte_data)
+    base64_string = base64_encoded_data.decode("utf-8")
+
+    # Return updated record
+    return record | {"base64_image": base64_string, "uuid": str(uuid.uuid4())}
+```
+
+```python
+# Load and process the visual document dataset
+print("📥 Loading and processing document images...")
+
+img_dataset_iter = iter(
+    load_dataset(**img_dataset_cfg).map(convert_image_to_chat_format, fn_kwargs={"height": BASE64_IMAGE_HEIGHT})
+)
+img_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])
+
+print(f"✅ Loaded {len(img_dataset)} images with columns: {list(img_dataset.columns)}")
+```
+
+```python
+img_dataset.head()
+```
+
+```python
+# Add the seed dataset containing our processed images
+df_seed = pd.DataFrame(img_dataset)[["uuid", "image_filename", "base64_image", "page", "options", "source"]]
+config_builder.with_seed_dataset(DataFrameSeedSource(df=df_seed))
+```
+
+```python
+# Add a column to generate detailed document summaries
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="summary",
+        model_alias="vision",
+        prompt=(
+            "Provide a detailed summary of the content in this image in Markdown format. "
+            "Start from the top of the image and then describe it from top to bottom. "
+            "Place a summary at the bottom."
+        ),
+        multi_modal_context=[
+            ImageContext(
+                column_name="base64_image",
+                data_type=ModalityDataType.BASE64,
+                image_format=ImageFormat.PNG,
+            )
+        ],
+    )
+)
+```
+
+### 🔁 Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly.
+2. Inspect the results for quality and format issues.
+3. Adjust column configurations, prompts, or parameters as needed.
+4. Re-run the preview until satisfied.
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+### 📊 Analyze the generated data
+
+- Data Designer automatically generates a basic statistical analysis of the generated data.
+- This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+### 🔎 Visual Inspection
+
+Let's compare the original document image with the generated summary to validate quality:
+
+```python
+# Compare original document with generated summary
+index = 0  # Change this to view different examples
+
+# Merge preview data with original images for comparison
+comparison_dataset = preview.dataset.merge(pd.DataFrame(img_dataset)[["uuid", "image"]], how="left", on="uuid")
+
+# Extract the record for display
+record = comparison_dataset.iloc[index]
+
+print("📄 Original Document Image:")
+display(resize_image(record.image, BASE64_IMAGE_HEIGHT))
+
+print("\n📝 Generated Summary:")
+rich.print(Panel(record.summary, title="Document Summary", title_align="left"))
+```
+
+### 🆙 Scale up!
+
+- Happy with your preview data?
+- Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-4")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## ⏭️ Next Steps
+
+Now that you've learned how to use visual context for image summarization in Data Designer, explore more:
+
+- Experiment with different vision models for specific document types
+- Try different prompt variations to generate specialized descriptions (e.g., technical details, key findings)
+- Combine vision-based summaries with other column types for multi-modal workflows
+- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering
diff --git a/fern/v0.3.3/pages/tutorials/overview.mdx b/fern/v0.3.3/pages/tutorials/overview.mdx
new file mode 100644
index 000000000..38cc68598
--- /dev/null
+++ b/fern/v0.3.3/pages/tutorials/overview.mdx
@@ -0,0 +1,85 @@
+---
+title: 📓 Tutorials
+description: Step-by-step tutorials for learning Data Designer.
+---
+
+Welcome to the Data Designer tutorials! These interactive notebooks guide you through the core concepts and features of Data Designer.
+
+## Getting Started
+
+Each tutorial builds on the previous one, so we recommend following them in order:
+
+<Cards>
+  <Card
+    title="The Basics"
+    icon="graduation-cap"
+    href="/docs/tutorials/the-basics"
+  >
+    Learn the fundamentals of Data Designer by generating a simple product review dataset.
+    
+    **Topics covered:**
+    - Sampler columns for categorical and numerical data
+    - LLM-generated text columns
+    - Previewing and iterating on your dataset
+  </Card>
+  <Card
+    title="Structured Outputs"
+    icon="brackets-curly"
+    href="/docs/tutorials/structured-outputs"
+  >
+    Learn advanced data generation using structured outputs and Jinja expressions.
+    
+    **Topics covered:**
+    - Pydantic models for structured output schemas
+    - Expression columns with Jinja2 templates
+    - Conditional logic in prompts
+  </Card>
+  <Card
+    title="Seeding with a Dataset"
+    icon="seedling"
+    href="/docs/tutorials/seeding-with-dataset"
+  >
+    Bootstrap generation from existing data to create domain-grounded synthetic datasets.
+    
+    **Topics covered:**
+    - Loading seed datasets from local files
+    - Referencing seed data in prompts
+    - Combining real and synthetic data
+  </Card>
+  <Card
+    title="Images as Context"
+    icon="image"
+    href="/docs/tutorials/images-as-context"
+  >
+    Use vision-language models to generate text descriptions from images.
+    
+    **Topics covered:**
+    - Processing images for model input
+    - Vision model configuration
+    - Document summarization workflows
+  </Card>
+</Cards>
+
+## Running the Tutorials
+
+Each tutorial is available as an interactive Jupyter notebook that you can run in Google Colab. Click the "Open in Colab" badge at the top of each tutorial to launch it directly in your browser.
+
+### Prerequisites
+
+Before running the tutorials, make sure you have:
+
+1. **An API key** from one of the supported providers:
+   - [NVIDIA API Key](https://build.nvidia.com) (recommended)
+   - [OpenAI API Key](https://platform.openai.com/api-keys)
+   - [OpenRouter API Key](https://openrouter.ai)
+
+2. **Set your API key** as an environment variable or in the notebook:
+   ```bash
+   export NVIDIA_API_KEY="your-api-key-here"
+   ```
+
+## Additional Resources
+
+- **[Quick Start Guide](/docs/quick-start)**: A condensed introduction to Data Designer
+- **[Use Case Recipes](/docs/recipes)**: Complete working examples for specific use cases
+- **[API Reference](/api/models)**: Detailed documentation for all configuration options
diff --git a/fern/v0.3.3/pages/tutorials/seeding-with-dataset.mdx b/fern/v0.3.3/pages/tutorials/seeding-with-dataset.mdx
new file mode 100644
index 000000000..b6e9c351a
--- /dev/null
+++ b/fern/v0.3.3/pages/tutorials/seeding-with-dataset.mdx
@@ -0,0 +1,255 @@
+---
+title: "🎨 Data Designer Tutorial: Seeding with an External Dataset"
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb).
+</Info>
+
+#### 📚 What you'll learn
+
+In this notebook, we will demonstrate how to seed synthetic data generation in Data Designer with an external dataset.
+
+If this is your first time using Data Designer, we recommend starting with the [first tutorial](/docs/tutorials/the-basics) in this series.
+
+### 📦 Import the essentials
+
+- The `essentials` module provides quick access to the most commonly used objects.
+
+```python
+from data_designer.essentials import (
+    ChatCompletionInferenceParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LocalFileSeedSource,
+    ModelConfig,
+)
+```
+
+### ⚙️ Initialize the Data Designer interface
+
+- `DataDesigner` is the main object is responsible for managing the data generation process.
+- When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+### 🎛️ Define model configurations
+
+- Each `ModelConfig` defines a model that can be used during the generation process.
+- The "model alias" is used to reference the model in the Data Designer config (as we will see below).
+- The "model provider" is the external service that hosts the model (see the [model config](/docs/concepts/models/default-model-settings) docs for more details).
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.
+
+```python
+# This name is set in the model provider configuration.
+MODEL_PROVIDER = "nvidia"
+
+# The model ID is from build.nvidia.com.
+MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b"
+
+# We choose this alias to be descriptive for our use case.
+MODEL_ALIAS = "nemotron-nano-v3"
+
+model_configs = [
+    ModelConfig(
+        alias=MODEL_ALIAS,
+        model=MODEL_ID,
+        provider=MODEL_PROVIDER,
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=1.0,
+            max_tokens=2048,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        ),
+    )
+]
+```
+
+### 🏗️ Initialize the Data Designer Config Builder
+
+- The Data Designer config defines the dataset schema and generation process.
+- The config builder provides an intuitive interface for building this configuration.
+- The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+## 🏥 Prepare a seed dataset
+
+- For this notebook, we'll create a synthetic dataset of patient notes.
+- We will _seed_ the generation process with a [symptom-to-diagnosis dataset](https://huggingface.co/datasets/gretelai/symptom_to_diagnosis).
+
+<Tip title="Why use a seed dataset?">
+- Seed datasets let you steer the generation process by providing context that is specific to your use case.
+- Seed datasets are also an excellent way to inject real-world diversity into your synthetic data.
+- During generation, prompt templates can reference any of the seed dataset fields.
+</Tip>
+
+```python
+# Download sample dataset from Github
+import urllib.request
+
+url = "https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv"
+local_filename, _ = urllib.request.urlretrieve(url, "gretelai_symptom_to_diagnosis.csv")
+
+# Seed datasets are passed as reference objects to the config builder.
+seed_source = LocalFileSeedSource(path=local_filename)
+
+config_builder.with_seed_dataset(seed_source)
+```
+
+## 🎨 Designing our synthetic patient notes dataset
+
+- Here we use `add_column` with keyword arguments (rather than imported config objects).
+- Generally, we recommend using concrete objects, but this is a convenient shorthand.
+- **Note**: The prompt template can reference fields from our seed dataset:
+  - `{{ diagnosis }}` - the medical diagnosis from the seed data
+  - `{{ patient_summary }}` - the symptom description from the seed data
+
+```python
+config_builder.add_column(
+    name="patient_sampler",
+    column_type="sampler",
+    sampler_type="person_from_faker",
+)
+
+config_builder.add_column(
+    name="doctor_sampler",
+    column_type="sampler",
+    sampler_type="person_from_faker",
+)
+
+config_builder.add_column(
+    name="patient_id",
+    column_type="sampler",
+    sampler_type="uuid",
+    params={
+        "prefix": "PT-",
+        "short_form": True,
+        "uppercase": True,
+    },
+)
+
+config_builder.add_column(
+    name="first_name",
+    column_type="expression",
+    expr="{{ patient_sampler.first_name}}",
+)
+
+config_builder.add_column(
+    name="last_name",
+    column_type="expression",
+    expr="{{ patient_sampler.last_name }}",
+)
+
+
+config_builder.add_column(
+    name="dob",
+    column_type="expression",
+    expr="{{ patient_sampler.birth_date }}",
+)
+
+config_builder.add_column(
+    name="symptom_onset_date",
+    column_type="sampler",
+    sampler_type="datetime",
+    params={"start": "2024-01-01", "end": "2024-12-31"},
+)
+
+config_builder.add_column(
+    name="date_of_visit",
+    column_type="sampler",
+    sampler_type="timedelta",
+    params={"dt_min": 1, "dt_max": 30, "reference_column_name": "symptom_onset_date"},
+)
+
+config_builder.add_column(
+    name="physician",
+    column_type="expression",
+    expr="Dr. {{ doctor_sampler.last_name }}",
+)
+
+config_builder.add_column(
+    name="physician_notes",
+    column_type="llm-text",
+    prompt="""\
+You are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},
+who has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.
+The date of today's visit is {{ date_of_visit }}.
+
+{{ patient_summary }}
+
+Write careful notes about your visit with {{ first_name }},
+as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.
+
+Format the notes as a busy doctor might.
+Respond with only the notes, no other text.
+""",
+    model_alias=MODEL_ALIAS,
+)
+
+data_designer.validate(config_builder)
+```
+
+### 🔁 Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly.
+2. Inspect the results for quality and format issues.
+3. Adjust column configurations, prompts, or parameters as needed.
+4. Re-run the preview until satisfied.
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+### 📊 Analyze the generated data
+
+- Data Designer automatically generates a basic statistical analysis of the generated data.
+- This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+### 🆙 Scale up!
+
+- Happy with your preview data?
+- Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-3")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## ⏭️ Next Steps
+
+Check out the following tutorial to learn more about:
+
+- [Providing images as context](/docs/tutorials/images-as-context)
diff --git a/fern/v0.3.3/pages/tutorials/structured-outputs.mdx b/fern/v0.3.3/pages/tutorials/structured-outputs.mdx
new file mode 100644
index 000000000..458bde2e9
--- /dev/null
+++ b/fern/v0.3.3/pages/tutorials/structured-outputs.mdx
@@ -0,0 +1,314 @@
+---
+title: "🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions"
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb).
+</Info>
+
+#### 📚 What you'll learn
+
+In this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.
+
+If this is your first time using Data Designer, we recommend starting with the [first tutorial](/docs/tutorials/the-basics) in this series.
+
+### 📦 Import the essentials
+
+- The `essentials` module provides quick access to the most commonly used objects.
+
+```python
+from data_designer.essentials import (
+    CategorySamplerParams,
+    ChatCompletionInferenceParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    ExpressionColumnConfig,
+    LLMStructuredColumnConfig,
+    ModelConfig,
+    PersonFromFakerSamplerParams,
+    SamplerColumnConfig,
+    SamplerType,
+    SubcategorySamplerParams,
+)
+```
+
+### ⚙️ Initialize the Data Designer interface
+
+- `DataDesigner` is the main object that is used to interface with the library.
+- When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+### 🎛️ Define model configurations
+
+- Each `ModelConfig` defines a model that can be used during the generation process.
+- The "model alias" is used to reference the model in the Data Designer config (as we will see below).
+- The "model provider" is the external service that hosts the model (see the [model config](/docs/concepts/models/default-model-settings) docs for more details).
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.
+
+```python
+# This name is set in the model provider configuration.
+MODEL_PROVIDER = "nvidia"
+
+# The model ID is from build.nvidia.com.
+MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b"
+
+# We choose this alias to be descriptive for our use case.
+MODEL_ALIAS = "nemotron-nano-v3"
+
+model_configs = [
+    ModelConfig(
+        alias=MODEL_ALIAS,
+        model=MODEL_ID,
+        provider=MODEL_PROVIDER,
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=1.0,
+            max_tokens=2048,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        ),
+    )
+]
+```
+
+### 🏗️ Initialize the Data Designer Config Builder
+
+- The Data Designer config defines the dataset schema and generation process.
+- The config builder provides an intuitive interface for building this configuration.
+- The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+### 🧑‍🎨 Designing our data
+
+- We will again create a product review dataset, but this time we will use structured outputs and Jinja expressions.
+- Structured outputs let you specify the exact schema of the data you want to generate.
+- Data Designer supports schemas specified using either JSON schema or Pydantic data models (recommended).
+
+We'll define our structured outputs using [Pydantic](https://docs.pydantic.dev/latest/) data models.
+
+<Tip title="Why Pydantic?">
+- Pydantic models provide better IDE support and type validation.
+- They are more Pythonic than raw JSON schemas.
+- They integrate seamlessly with Data Designer's structured output system.
+</Tip>
+
+```python
+from decimal import Decimal
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+# We define a Product schema so that the name, description, and price are generated
+# in one go, with the types and constraints specified.
+class Product(BaseModel):
+    name: str = Field(description="The name of the product")
+    description: str = Field(description="A description of the product")
+    price: Decimal = Field(description="The price of the product", ge=10, le=1000, decimal_places=2)
+
+
+class ProductReview(BaseModel):
+    rating: int = Field(description="The rating of the product", ge=1, le=5)
+    customer_mood: Literal["irritated", "mad", "happy", "neutral", "excited"] = Field(
+        description="The mood of the customer"
+    )
+    review: str = Field(description="A review of the product")
+```
+
+Next, let's design our product review dataset using a few more tricks compared to the previous notebook.
+
+```python
+# Since we often only want a few attributes from Person objects, we can
+# set drop=True in the column config to drop the column from the final dataset.
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="customer",
+        sampler_type=SamplerType.PERSON_FROM_FAKER,
+        params=PersonFromFakerSamplerParams(),
+        drop=True,
+    )
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="product_category",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(
+            values=[
+                "Electronics",
+                "Clothing",
+                "Home & Kitchen",
+                "Books",
+                "Home Office",
+            ],
+        ),
+    )
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="product_subcategory",
+        sampler_type=SamplerType.SUBCATEGORY,
+        params=SubcategorySamplerParams(
+            category="product_category",
+            values={
+                "Electronics": ["Smartphones", "Laptops", "Headphones", "Cameras", "Accessories"],
+                "Clothing": ["Men's Clothing", "Women's Clothing", "Winter Coats", "Activewear", "Accessories"],
+                "Home & Kitchen": ["Appliances", "Cookware", "Furniture", "Decor", "Organization"],
+                "Books": ["Fiction", "Non-Fiction", "Self-Help", "Textbooks", "Classics"],
+                "Home Office": ["Desks", "Chairs", "Storage", "Office Supplies", "Lighting"],
+            },
+        ),
+    )
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="target_age_range",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(values=["18-25", "25-35", "35-50", "50-65", "65+"]),
+    )
+)
+
+# Sampler columns support conditional params, which are used if the condition is met.
+# In this example, we set the review style to rambling if the target age range is 18-25.
+# Note conditional parameters are only supported for Sampler column types.
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="review_style",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(
+            values=["rambling", "brief", "detailed", "structured with bullet points"],
+            weights=[1, 2, 2, 1],
+        ),
+        conditional_params={
+            "target_age_range == '18-25'": CategorySamplerParams(values=["rambling"]),
+        },
+    )
+)
+
+# Optionally validate that the columns are configured correctly.
+data_designer.validate(config_builder)
+```
+
+Next, we will use more advanced Jinja expressions to create new columns.
+
+Jinja expressions let you:
+
+- Access nested attributes: `{{ customer.first_name }}`
+- Combine values: `{{ customer.first_name }} {{ customer.last_name }}`
+- Use conditional logic: `{% if condition %}...{% endif %}`
+
+```python
+# We can create new columns using Jinja expressions that reference
+# existing columns, including attributes of nested objects.
+config_builder.add_column(
+    ExpressionColumnConfig(name="customer_name", expr="{{ customer.first_name }} {{ customer.last_name }}")
+)
+
+config_builder.add_column(ExpressionColumnConfig(name="customer_age", expr="{{ customer.age }}"))
+
+config_builder.add_column(
+    LLMStructuredColumnConfig(
+        name="product",
+        prompt=(
+            "Create a product in the '{{ product_category }}' category, focusing on products "
+            "related to '{{ product_subcategory }}'. The target age range of the ideal customer is "
+            "{{ target_age_range }} years old. The product should be priced between $10 and $1000."
+        ),
+        output_format=Product,
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+# We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.
+config_builder.add_column(
+    LLMStructuredColumnConfig(
+        name="customer_review",
+        prompt=(
+            "Your task is to write a review for the following product:\n\n"
+            "Product Name: {{ product.name }}\n"
+            "Product Description: {{ product.description }}\n"
+            "Price: {{ product.price }}\n\n"
+            "Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. "
+            "Write the review in a style that is '{{ review_style }}'."
+            "{% if target_age_range == '18-25' %}"
+            "Make sure the review is more informal and conversational.\n"
+            "{% else %}"
+            "Make sure the review is more formal and structured.\n"
+            "{% endif %}"
+            "The review field should contain only the review, no other text."
+        ),
+        output_format=ProductReview,
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+data_designer.validate(config_builder)
+```
+
+### 🔁 Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly.
+2. Inspect the results for quality and format issues.
+3. Adjust column configurations, prompts, or parameters as needed.
+4. Re-run the preview until satisfied.
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+### 📊 Analyze the generated data
+
+- Data Designer automatically generates a basic statistical analysis of the generated data.
+- This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+### 🆙 Scale up!
+
+- Happy with your preview data?
+- Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-2")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## ⏭️ Next Steps
+
+Check out the following tutorials to learn more about:
+
+- [Seeding synthetic data generation with an external dataset](/docs/tutorials/seeding-with-dataset)
+- [Providing images as context](/docs/tutorials/images-as-context)
diff --git a/fern/v0.3.3/pages/tutorials/the-basics.mdx b/fern/v0.3.3/pages/tutorials/the-basics.mdx
new file mode 100644
index 000000000..34e1a388b
--- /dev/null
+++ b/fern/v0.3.3/pages/tutorials/the-basics.mdx
@@ -0,0 +1,321 @@
+---
+title: "🎨 Data Designer Tutorial: The Basics"
+description: Learn the fundamentals of Data Designer by generating a simple product review dataset.
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/1-the-basics.ipynb).
+</Info>
+
+#### 📚 What you'll learn
+
+This notebook demonstrates the basics of Data Designer by generating a simple product review dataset.
+
+### 📦 Import the essentials
+
+- The `essentials` module provides quick access to the most commonly used objects.
+
+```python
+from data_designer.essentials import (
+    CategorySamplerParams,
+    ChatCompletionInferenceParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMTextColumnConfig,
+    ModelConfig,
+    PersonFromFakerSamplerParams,
+    SamplerColumnConfig,
+    SamplerType,
+    SubcategorySamplerParams,
+    UniformSamplerParams,
+)
+```
+
+### ⚙️ Initialize the Data Designer interface
+
+- `DataDesigner` is the main object is responsible for managing the data generation process.
+
+- When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+### 🎛️ Define model configurations
+
+- Each `ModelConfig` defines a model that can be used during the generation process.
+
+- The "model alias" is used to reference the model in the Data Designer config (as we will see below).
+
+- The "model provider" is the external service that hosts the model (see the [model config](/docs/concepts/models/default-model-settings) docs for more details).
+
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.
+
+```python
+# This name is set in the model provider configuration.
+MODEL_PROVIDER = "nvidia"
+
+# The model ID is from build.nvidia.com.
+MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b"
+
+# We choose this alias to be descriptive for our use case.
+MODEL_ALIAS = "nemotron-nano-v3"
+
+model_configs = [
+    ModelConfig(
+        alias=MODEL_ALIAS,
+        model=MODEL_ID,
+        provider=MODEL_PROVIDER,
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=1.0,
+            max_tokens=2048,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        ),
+    )
+]
+```
+
+### 🏗️ Initialize the Data Designer Config Builder
+
+- The Data Designer config defines the dataset schema and generation process.
+
+- The config builder provides an intuitive interface for building this configuration.
+
+- The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+## 🎲 Getting started with sampler columns
+
+- Sampler columns offer non-LLM based generation of synthetic data.
+
+- They are particularly useful for **steering the diversity** of the generated data, as we demonstrate below.
+
+You can view available samplers using the config builder's `info` property:
+
+```python
+config_builder.info.display("samplers")
+```
+
+Let's start designing our product review dataset by adding product category and subcategory columns.
+
+```python
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="product_category",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(
+            values=[
+                "Electronics",
+                "Clothing",
+                "Home & Kitchen",
+                "Books",
+                "Home Office",
+            ],
+        ),
+    )
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="product_subcategory",
+        sampler_type=SamplerType.SUBCATEGORY,
+        params=SubcategorySamplerParams(
+            category="product_category",
+            values={
+                "Electronics": [
+                    "Smartphones",
+                    "Laptops",
+                    "Headphones",
+                    "Cameras",
+                    "Accessories",
+                ],
+                "Clothing": [
+                    "Men's Clothing",
+                    "Women's Clothing",
+                    "Winter Coats",
+                    "Activewear",
+                    "Accessories",
+                ],
+                "Home & Kitchen": [
+                    "Appliances",
+                    "Cookware",
+                    "Furniture",
+                    "Decor",
+                    "Organization",
+                ],
+                "Books": [
+                    "Fiction",
+                    "Non-Fiction",
+                    "Self-Help",
+                    "Textbooks",
+                    "Classics",
+                ],
+                "Home Office": [
+                    "Desks",
+                    "Chairs",
+                    "Storage",
+                    "Office Supplies",
+                    "Lighting",
+                ],
+            },
+        ),
+    )
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="target_age_range",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(values=["18-25", "25-35", "35-50", "50-65", "65+"]),
+    )
+)
+
+# Optionally validate that the columns are configured correctly.
+data_designer.validate(config_builder)
+```
+
+Next, let's add samplers to generate data related to the customer and their review.
+
+```python
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="customer",
+        sampler_type=SamplerType.PERSON_FROM_FAKER,
+        params=PersonFromFakerSamplerParams(age_range=[18, 70], locale="en_US"),
+    )
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="number_of_stars",
+        sampler_type=SamplerType.UNIFORM,
+        params=UniformSamplerParams(low=1, high=5),
+        convert_to="int",  # Convert the sampled float to an integer.
+    )
+)
+
+config_builder.add_column(
+    SamplerColumnConfig(
+        name="review_style",
+        sampler_type=SamplerType.CATEGORY,
+        params=CategorySamplerParams(
+            values=["rambling", "brief", "detailed", "structured with bullet points"],
+            weights=[1, 2, 2, 1],
+        ),
+    )
+)
+
+data_designer.validate(config_builder)
+```
+
+## 🦜 LLM-generated columns
+
+- The real power of Data Designer comes from leveraging LLMs to generate text, code, and structured data.
+
+- When prompting the LLM, we can use Jinja templating to reference other columns in the dataset.
+
+- As we see below, nested json fields can be accessed using dot notation.
+
+```python
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="product_name",
+        prompt=(
+            "You are a helpful assistant that generates product names. DO NOT add quotes around the product name.\n\n"
+            "Come up with a creative product name for a product in the '{{ product_category }}' category, focusing "
+            "on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is "
+            "{{ target_age_range }} years old. Respond with only the product name, no other text."
+        ),
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+config_builder.add_column(
+    LLMTextColumnConfig(
+        name="customer_review",
+        prompt=(
+            "You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. "
+            "You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. "
+            "Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. "
+            "The style of the review should be '{{ review_style }}'. "
+            "Respond with only the review, no other text."
+        ),
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+data_designer.validate(config_builder)
+```
+
+### 🔁 Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly.
+
+2. Inspect the results for quality and format issues.
+
+3. Adjust column configurations, prompts, or parameters as needed.
+
+4. Re-run the preview until satisfied.
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+### 📊 Analyze the generated data
+
+- Data Designer automatically generates a basic statistical analysis of the generated data.
+
+- This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+### 🆙 Scale up!
+
+- Happy with your preview data?
+
+- Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-1")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## ⏭️ Next Steps
+
+Now that you've seen the basics of Data Designer, check out the following tutorials to learn more about:
+
+- [Structured outputs and jinja expressions](/docs/tutorials/structured-outputs)
+
+- [Seeding synthetic data generation with an external dataset](/docs/tutorials/seeding-with-dataset)
+
+- [Providing images as context](/docs/tutorials/images-as-context)
diff --git a/fern/v0.5.0/pages/api-reference/analysis.mdx b/fern/v0.5.0/pages/api-reference/analysis.mdx
new file mode 100644
index 000000000..5c778eac3
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/analysis.mdx
@@ -0,0 +1,171 @@
+---
+title: Analysis
+description: API reference for dataset analysis and profiling.
+---
+
+The `analysis` modules provide tools for profiling and analyzing generated datasets. It includes statistics tracking, column profiling, and reporting capabilities.
+
+## Column Statistics
+
+Column statistics are automatically computed for every column after generation. They provide basic metrics specific to the column type. For example, LLM columns track token usage statistics, sampler columns track distribution information, and validation columns track validation success rates.
+
+The classes below are result objects that store the computed statistics for each column type and provide methods for formatting these results for display in reports.
+
+### LLMColumnStatistics
+
+```python
+class LLMColumnStatistics(BaseModel):
+    """Statistics for LLM-generated columns."""
+    
+    total_input_tokens: int  # Total prompt tokens across all generations
+    total_output_tokens: int  # Total completion tokens
+    avg_input_tokens: float  # Average prompt tokens per generation
+    avg_output_tokens: float  # Average completion tokens per generation
+    generation_time_seconds: float  # Total generation time
+    generations_per_second: float  # Generation throughput
+```
+
+### SamplerColumnStatistics
+
+```python
+class SamplerColumnStatistics(BaseModel):
+    """Statistics for sampler columns."""
+    
+    unique_values: int  # Number of unique values generated
+    value_counts: dict[str, int]  # Counts per value (for categorical)
+    min_value: float | None  # Minimum value (for numerical)
+    max_value: float | None  # Maximum value (for numerical)
+    mean_value: float | None  # Mean value (for numerical)
+    std_value: float | None  # Standard deviation (for numerical)
+```
+
+### ValidationColumnStatistics
+
+```python
+class ValidationColumnStatistics(BaseModel):
+    """Statistics for validation columns."""
+    
+    total_validated: int  # Total records validated
+    valid_count: int  # Number of valid records
+    invalid_count: int  # Number of invalid records
+    null_count: int  # Number of null results
+    pass_rate: float  # Percentage of valid records
+```
+
+### ExpressionColumnStatistics
+
+```python
+class ExpressionColumnStatistics(BaseModel):
+    """Statistics for expression columns."""
+    
+    unique_values: int  # Number of unique values
+    null_count: int  # Number of null results
+    evaluation_time_seconds: float  # Time to evaluate expressions
+```
+
+## Column Profilers
+
+Column profilers are optional analysis tools that provide deeper insights into specific column types. Currently, the only column profiler available is the Judge Score Profiler.
+
+The classes below are result objects that store the computed profiler results and provide methods for formatting these results for display in reports.
+
+### JudgeScoreProfilerResults
+
+```python
+class JudgeScoreProfilerResults(BaseModel):
+    """Profiling results for LLM judge columns."""
+    
+    score_name: str  # Name of the score dimension
+    score_distribution: dict[str, int]  # Distribution of scores
+    avg_score: float | None  # Average score (for numeric scores)
+    score_counts: dict[str | int, int]  # Counts per score value
+```
+
+## Dataset Profiler
+
+The `DatasetProfilerResults` class contains complete profiling results for a generated dataset. It aggregates column-level statistics, metadata, and profiler results, and provides methods to:
+
+- Compute dataset-level metrics (completion percentage, column type summary)
+- Filter statistics by column type
+- Generate formatted analysis reports via the `to_report()` method
+
+Reports can be displayed in the console or exported to HTML/SVG formats.
+
+### DatasetProfilerResults
+
+```python
+class DatasetProfilerResults(BaseModel):
+    """Complete profiling results for a generated dataset."""
+    
+    dataset_name: str  # Name of the dataset
+    total_records: int  # Total records generated
+    generation_time_seconds: float  # Total generation time
+    column_statistics: dict[str, ColumnStatistics]  # Per-column stats
+    column_profiler_results: dict[str, list[ProfilerResults]]  # Profiler results
+    
+    def to_report(
+        self,
+        output_format: Literal["console", "html", "svg"] = "console",
+    ) -> None:
+        """Generate a formatted analysis report.
+        
+        Args:
+            output_format: Output format for the report.
+        """
+        ...
+    
+    def get_column_statistics(
+        self,
+        column_name: str,
+    ) -> ColumnStatistics:
+        """Get statistics for a specific column.
+        
+        Args:
+            column_name: Name of the column.
+        
+        Returns:
+            Column statistics object.
+        """
+        ...
+    
+    def filter_by_column_type(
+        self,
+        column_type: str,
+    ) -> dict[str, ColumnStatistics]:
+        """Filter statistics by column type.
+        
+        Args:
+            column_type: Type of columns to filter (e.g., "llm-text").
+        
+        Returns:
+            Dictionary of column statistics for matching columns.
+        """
+        ...
+```
+
+### Example: Accessing Analysis Results
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Generate a dataset
+data_designer = DataDesigner()
+builder = dd.DataDesignerConfigBuilder()
+# ... add columns ...
+
+results = data_designer.create(builder, num_records=100)
+
+# Load and display analysis
+analysis = results.load_analysis()
+analysis.to_report()
+
+# Access specific column statistics
+llm_stats = analysis.get_column_statistics("generated_text")
+print(f"Average output tokens: {llm_stats.avg_output_tokens}")
+
+# Filter by column type
+all_llm_stats = analysis.filter_by_column_type("llm-text")
+for col_name, stats in all_llm_stats.items():
+    print(f"{col_name}: {stats.generations_per_second:.2f} gen/sec")
+```
diff --git a/fern/v0.5.0/pages/api-reference/column-configs.mdx b/fern/v0.5.0/pages/api-reference/column-configs.mdx
new file mode 100644
index 000000000..d1e05b1e6
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/column-configs.mdx
@@ -0,0 +1,252 @@
+---
+title: Column Configurations
+description: API reference for column configuration objects.
+---
+
+The `column_configs` module defines configuration objects for all Data Designer column types. Each configuration inherits from `SingleColumnConfig`, which provides shared arguments like the column `name`, whether to `drop` the column after generation, and the `column_type`.
+
+<Info title="column_type is a discriminator field">
+The `column_type` argument is used to identify column types when deserializing the Data Designer Config from JSON/YAML. It acts as the discriminator in a [discriminated union](https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions), allowing Pydantic to automatically determine which column configuration class to instantiate.
+</Info>
+
+## SingleColumnConfig (Base Class)
+
+```python
+class SingleColumnConfig(BaseModel):
+    """Base configuration for all column types."""
+    
+    name: str  # Column name (unique identifier)
+    drop: bool = False  # Whether to drop column from final output
+    column_type: str  # Discriminator field for column type
+    
+    @property
+    def required_columns(self) -> list[str]:
+        """Columns that must be generated before this one."""
+        ...
+    
+    @property
+    def side_effect_columns(self) -> list[str]:
+        """Columns created as side effects (e.g., reasoning traces)."""
+        ...
+```
+
+## SamplerColumnConfig
+
+```python
+class SamplerColumnConfig(SingleColumnConfig):
+    """Configuration for sampler-based columns."""
+    
+    column_type: Literal["sampler"] = "sampler"
+    sampler_type: SamplerType  # Type of sampler to use
+    params: SamplerParamsT  # Sampler-specific parameters
+    conditional_params: dict[str, SamplerParamsT] = {}  # Condition-based params
+    convert_to: str | None = None  # Type conversion ("int", "float", "str")
+```
+
+## LLMTextColumnConfig
+
+```python
+class LLMTextColumnConfig(SingleColumnConfig):
+    """Configuration for LLM text generation columns."""
+    
+    column_type: Literal["llm-text"] = "llm-text"
+    model_alias: str  # Reference to model configuration
+    prompt: str  # Jinja2 template for the prompt
+    system_prompt: str | None = None  # Optional system prompt
+    multi_modal_context: list[ImageContext] | None = None  # Image inputs
+    tool_alias: str | None = None  # Tool configuration alias for MCP tool calls
+    with_trace: TraceType = TraceType.NONE  # Trace capture setting
+    extract_reasoning_content: bool = False  # Extract reasoning from response
+```
+
+<Note title="Side Effect Columns">
+When `with_trace` is not `TraceType.NONE`, a `{column_name}__trace` column is created containing the message history.
+
+When `extract_reasoning_content=True`, a `{column_name}__reasoning_content` column is created containing chain-of-thought reasoning from the model response.
+</Note>
+
+## LLMCodeColumnConfig
+
+Extends `LLMTextColumnConfig` for code generation.
+
+```python
+class LLMCodeColumnConfig(LLMTextColumnConfig):
+    """Configuration for LLM code generation columns."""
+    
+    column_type: Literal["llm-code"] = "llm-code"
+    code_lang: CodeLang  # Target programming language
+    
+    # Inherits from LLMTextColumnConfig:
+    # model_alias, prompt, system_prompt, multi_modal_context,
+    # tool_alias, with_trace, extract_reasoning_content
+```
+
+## LLMStructuredColumnConfig
+
+Extends `LLMTextColumnConfig` for structured JSON output.
+
+```python
+class LLMStructuredColumnConfig(LLMTextColumnConfig):
+    """Configuration for LLM structured output columns."""
+    
+    column_type: Literal["llm-structured"] = "llm-structured"
+    output_format: type[BaseModel] | dict  # Pydantic model or JSON schema
+    
+    # Inherits from LLMTextColumnConfig:
+    # model_alias, prompt, system_prompt, multi_modal_context,
+    # tool_alias, with_trace, extract_reasoning_content
+```
+
+## LLMJudgeColumnConfig
+
+Extends `LLMTextColumnConfig` for quality assessment.
+
+```python
+class LLMJudgeColumnConfig(LLMTextColumnConfig):
+    """Configuration for LLM judge/scoring columns."""
+    
+    column_type: Literal["llm-judge"] = "llm-judge"
+    scores: list[Score]  # Scoring rubrics (at least one required)
+    
+    # Inherits from LLMTextColumnConfig:
+    # model_alias, prompt, system_prompt, multi_modal_context,
+    # tool_alias, with_trace, extract_reasoning_content
+
+
+class Score(BaseModel):
+    """Scoring rubric definition."""
+    
+    name: str  # Score dimension name
+    description: str  # Description of what's being evaluated
+    options: dict[int | str, str]  # Score options with descriptions
+```
+
+## EmbeddingColumnConfig
+
+```python
+class EmbeddingColumnConfig(SingleColumnConfig):
+    """Configuration for embedding generation columns."""
+    
+    column_type: Literal["embedding"] = "embedding"
+    model_alias: str  # Reference to model configuration
+    target_column: str  # Column containing text to embed
+```
+
+## ImageColumnConfig
+
+```python
+class ImageColumnConfig(SingleColumnConfig):
+    """Configuration for image generation columns."""
+    
+    column_type: Literal["image"] = "image"
+    model_alias: str  # Reference to model configuration
+    prompt: str  # Jinja2 template for the image prompt
+    multi_modal_context: list[ImageContext] | None = None  # Image inputs for img2img
+```
+
+## ExpressionColumnConfig
+
+```python
+class ExpressionColumnConfig(SingleColumnConfig):
+    """Configuration for Jinja2 expression columns."""
+    
+    column_type: Literal["expression"] = "expression"
+    expr: str  # Jinja2 expression
+    dtype: Literal["str", "int", "float", "bool"] = "str"  # Output type
+```
+
+## ValidationColumnConfig
+
+```python
+class ValidationColumnConfig(SingleColumnConfig):
+    """Configuration for validation columns."""
+    
+    column_type: Literal["validation"] = "validation"
+    validator_type: ValidatorType  # Type of validator
+    target_columns: list[str]  # Columns to validate
+    validator_params: ValidatorParamsT  # Validator-specific parameters
+    batch_size: int = 10  # Number of records per validation batch
+```
+
+## SeedDatasetColumnConfig
+
+```python
+class SeedDatasetColumnConfig(SingleColumnConfig):
+    """Configuration for seed dataset columns.
+    
+    This config marks columns that come from seed data. It is typically
+    created automatically when calling with_seed_dataset() on the builder.
+    """
+    
+    column_type: Literal["seed-dataset"] = "seed-dataset"
+    # No additional fields - the column name is the seed column name
+```
+
+## CustomColumnConfig
+
+```python
+class CustomColumnConfig(SingleColumnConfig):
+    """Configuration for custom user-defined column generators."""
+    
+    column_type: Literal["custom"] = "custom"
+    generator_function: Callable  # Function decorated with @custom_column_generator
+    generation_strategy: GenerationStrategy = GenerationStrategy.CELL_BY_CELL
+    generator_params: BaseModel | None = None  # Optional typed config object
+
+
+class GenerationStrategy(str, Enum):
+    """Strategy for custom column generation."""
+    
+    CELL_BY_CELL = "cell_by_cell"  # Row-based generation
+    FULL_COLUMN = "full_column"  # Batch-based with DataFrame access
+```
+
+## CodeLang Enum
+
+```python
+class CodeLang(str, Enum):
+    """Supported programming languages for code generation."""
+    
+    BASH = "bash"
+    C = "c"
+    COBOL = "cobol"
+    CPP = "cpp"
+    CSHARP = "csharp"
+    GO = "go"
+    JAVA = "java"
+    JAVASCRIPT = "javascript"
+    KOTLIN = "kotlin"
+    PYTHON = "python"
+    RUBY = "ruby"
+    RUST = "rust"
+    SCALA = "scala"
+    SWIFT = "swift"
+    TYPESCRIPT = "typescript"
+    SQL_SQLITE = "sql:sqlite"
+    SQL_TSQL = "sql:tsql"
+    SQL_BIGQUERY = "sql:bigquery"
+    SQL_MYSQL = "sql:mysql"
+    SQL_POSTGRES = "sql:postgres"
+```
+
+## ValidatorType Enum
+
+```python
+class ValidatorType(str, Enum):
+    """Supported validator types."""
+    
+    CODE = "code"
+    LOCAL_CALLABLE = "local_callable"
+    REMOTE = "remote"
+```
+
+## TraceType Enum
+
+```python
+class TraceType(str, Enum):
+    """Trace capture options for LLM columns."""
+    
+    NONE = "none"  # No trace captured
+    LAST_MESSAGE = "last_message"  # Only final assistant message
+    ALL_MESSAGES = "all_messages"  # Full conversation history
+```
diff --git a/fern/v0.5.0/pages/api-reference/config-builder.mdx b/fern/v0.5.0/pages/api-reference/config-builder.mdx
new file mode 100644
index 000000000..66d59045b
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/config-builder.mdx
@@ -0,0 +1,496 @@
+---
+title: Config Builder
+description: API reference for the DataDesignerConfigBuilder class.
+---
+
+The `DataDesignerConfigBuilder` provides a high-level interface for building Data Designer configurations. It allows you to define columns, model configurations, tool configurations, processors, constraints, and seed datasets in a fluent, method-chaining style.
+
+<Info title="Model configs are required">
+Model configurations must be provided either at initialization or defaults will be used when running locally with configured API keys.
+</Info>
+
+## BuilderConfig
+
+Container for the full builder configuration, including the Data Designer config and library version.
+
+```python
+class BuilderConfig(ExportableConfigBase):
+    """Configuration container for Data Designer builder."""
+    
+    data_designer: DataDesignerConfig  # Main Data Designer configuration
+    library_version: str | None = None  # Version used to create the config
+```
+
+## DataDesignerConfigBuilder
+
+The main builder class for constructing Data Designer configurations.
+
+### Constructor
+
+```python
+def __init__(
+    self,
+    model_configs: list[ModelConfig] | str | Path | None = None,
+    tool_configs: list[ToolConfig] | None = None,
+) -> None:
+    """Initialize a new DataDesignerConfigBuilder instance.
+    
+    Args:
+        model_configs: Model configurations. Can be:
+            - None to use default model configurations in local mode
+            - A list of ModelConfig objects
+            - A string or Path to a model configuration file
+        tool_configs: Tool configurations for MCP tool calling.
+    """
+```
+
+### Class Methods
+
+#### from_config
+
+```python
+@classmethod
+def from_config(cls, config: dict | str | Path | BuilderConfig) -> Self:
+    """Create a DataDesignerConfigBuilder from an existing configuration.
+    
+    Accepts both the full BuilderConfig format (with a top-level
+    `data_designer` key) and the shorthand DataDesignerConfig format
+    (columns, model_configs, etc. at the top level).
+    
+    Args:
+        config: Configuration source. Can be:
+            - A dictionary containing the configuration
+            - A string or Path to a local YAML/JSON configuration file
+            - An HTTP(S) URL string to a YAML/JSON configuration file
+            - A BuilderConfig object
+    
+    Returns:
+        A new instance populated with the configuration.
+    """
+```
+
+### Properties
+
+```python
+@property
+def model_configs(self) -> list[ModelConfig]:
+    """Get the model configurations for this builder."""
+
+@property
+def tool_configs(self) -> list[ToolConfig]:
+    """Get the tool configurations for this builder."""
+
+@property
+def allowed_references(self) -> list[str]:
+    """Get all referenceable variables allowed in prompt templates and expressions."""
+
+@property
+def info(self) -> ConfigBuilderInfo:
+    """Get the ConfigBuilderInfo object for this builder."""
+```
+
+### Column Methods
+
+#### add_column
+
+```python
+def add_column(
+    self,
+    column_config: ColumnConfigT | None = None,
+    *,
+    name: str | None = None,
+    column_type: DataDesignerColumnType | None = None,
+    **kwargs,
+) -> Self:
+    """Add a column configuration to the builder.
+    
+    You can either provide a column config object directly, or provide
+    name, column_type, and additional kwargs to construct the config.
+    
+    Args:
+        column_config: Data Designer column config object to add.
+        name: Name of the column (if not using column_config).
+        column_type: Column type (if not using column_config).
+        **kwargs: Additional arguments for the column constructor.
+    
+    Returns:
+        The builder instance for chaining.
+    """
+```
+
+#### delete_column
+
+```python
+def delete_column(self, column_name: str) -> Self:
+    """Delete the column with the given name.
+    
+    Args:
+        column_name: Name of the column to delete.
+    
+    Returns:
+        The builder instance for chaining.
+    
+    Raises:
+        BuilderConfigurationError: If trying to delete a seed dataset column.
+    """
+```
+
+#### get_column_config
+
+```python
+def get_column_config(self, name: str) -> ColumnConfigT:
+    """Get a column configuration by name.
+    
+    Args:
+        name: Name of the column.
+    
+    Returns:
+        The column configuration object.
+    
+    Raises:
+        KeyError: If no column with the given name exists.
+    """
+```
+
+#### get_column_configs
+
+```python
+def get_column_configs(self) -> list[ColumnConfigT]:
+    """Get all column configurations."""
+```
+
+#### get_columns_of_type
+
+```python
+def get_columns_of_type(self, column_type: DataDesignerColumnType) -> list[ColumnConfigT]:
+    """Get all column configurations of the specified type."""
+```
+
+#### get_columns_excluding_type
+
+```python
+def get_columns_excluding_type(self, column_type: DataDesignerColumnType) -> list[ColumnConfigT]:
+    """Get all column configurations excluding the specified type."""
+```
+
+#### num_columns_of_type
+
+```python
+def num_columns_of_type(self, column_type: DataDesignerColumnType) -> int:
+    """Get the count of columns of the specified type."""
+```
+
+### Model Config Methods
+
+#### add_model_config
+
+```python
+def add_model_config(self, model_config: ModelConfig) -> Self:
+    """Add a model configuration to the builder.
+    
+    Args:
+        model_config: The model configuration to add.
+    
+    Raises:
+        BuilderConfigurationError: If a config with the same alias exists.
+    """
+```
+
+#### delete_model_config
+
+```python
+def delete_model_config(self, alias: str) -> Self:
+    """Delete a model configuration by alias."""
+```
+
+### Tool Config Methods
+
+#### add_tool_config
+
+```python
+def add_tool_config(self, tool_config: ToolConfig) -> Self:
+    """Add a tool configuration for MCP tool calling.
+    
+    Args:
+        tool_config: The tool configuration to add.
+    
+    Raises:
+        BuilderConfigurationError: If a config with the same alias exists.
+    """
+```
+
+#### delete_tool_config
+
+```python
+def delete_tool_config(self, alias: str) -> Self:
+    """Delete a tool configuration by alias."""
+```
+
+#### get_tool_config
+
+```python
+def get_tool_config(self, alias: str) -> ToolConfig:
+    """Get a tool configuration by alias.
+    
+    Raises:
+        KeyError: If no tool configuration with the given alias exists.
+    """
+```
+
+### Processor Methods
+
+#### add_processor
+
+```python
+def add_processor(
+    self,
+    processor_config: ProcessorConfigT | None = None,
+    *,
+    processor_type: ProcessorType | None = None,
+    **kwargs,
+) -> Self:
+    """Add a processor to the builder.
+    
+    You can either provide a processor config object directly, or provide
+    a processor_type and additional kwargs to construct the config.
+    
+    Args:
+        processor_config: The processor configuration object.
+        processor_type: The type of processor to add.
+        **kwargs: Additional arguments for the processor constructor.
+    """
+```
+
+#### get_processor_configs
+
+```python
+def get_processor_configs(self) -> list[ProcessorConfigT]:
+    """Get all processor configurations."""
+```
+
+### Constraint Methods
+
+#### add_constraint
+
+```python
+def add_constraint(
+    self,
+    constraint: ColumnConstraintT | None = None,
+    *,
+    constraint_type: ConstraintType | None = None,
+    **kwargs,
+) -> Self:
+    """Add a constraint to the builder.
+    
+    Valid constraint types:
+        - "scalar_inequality": Constraint between a column and a scalar value
+        - "column_inequality": Constraint between two columns
+    
+    Args:
+        constraint: Constraint object to add.
+        constraint_type: Constraint type (if not using constraint object).
+        **kwargs: Additional arguments for the constraint constructor.
+    """
+```
+
+#### delete_constraints
+
+```python
+def delete_constraints(self, target_column: str) -> Self:
+    """Delete all constraints for the given target column."""
+```
+
+#### get_constraints
+
+```python
+def get_constraints(self, target_column: str) -> list[ColumnConstraintT]:
+    """Get all constraints for the given target column."""
+```
+
+### Profiler Methods
+
+#### add_profiler
+
+```python
+def add_profiler(self, profiler_config: ColumnProfilerConfigT) -> Self:
+    """Add a profiler to the builder.
+    
+    Args:
+        profiler_config: The profiler configuration object.
+    """
+```
+
+#### get_profilers
+
+```python
+def get_profilers(self) -> list[ColumnProfilerConfigT]:
+    """Get all profiler configurations."""
+```
+
+### Seed Dataset Methods
+
+#### with_seed_dataset
+
+```python
+def with_seed_dataset(
+    self,
+    seed_source: SeedSourceT,
+    *,
+    sampling_strategy: SamplingStrategy = SamplingStrategy.ORDERED,
+    selection_strategy: IndexRange | PartitionBlock | None = None,
+) -> Self:
+    """Add a seed dataset to the builder.
+    
+    Args:
+        seed_source: The seed dataset source.
+        sampling_strategy: How to sample from the seed (default: ORDERED).
+        selection_strategy: Optional selection strategy for the seed.
+    """
+```
+
+#### get_seed_config
+
+```python
+def get_seed_config(self) -> SeedConfig | None:
+    """Get the seed config, or None if not configured."""
+```
+
+### Build and Export Methods
+
+#### build
+
+```python
+def build(self) -> DataDesignerConfig:
+    """Build a DataDesignerConfig from the current builder state.
+    
+    Returns:
+        The built configuration object.
+    """
+```
+
+#### get_builder_config
+
+```python
+def get_builder_config(self) -> BuilderConfig:
+    """Get the full BuilderConfig including library version."""
+```
+
+#### write_config
+
+```python
+def write_config(self, path: str | Path, indent: int | None = 2, **kwargs) -> None:
+    """Write the configuration to a YAML or JSON file.
+    
+    The format is determined by the file extension (.yaml, .yml, or .json).
+    
+    Args:
+        path: Path to the output file.
+        indent: Indentation level (default: 2).
+        **kwargs: Additional serialization options.
+    
+    Raises:
+        BuilderConfigurationError: If the file format is unsupported.
+        BuilderSerializationError: If using a DataFrame seed source.
+    """
+```
+
+## ConfigBuilderInfo
+
+The `info` property returns a `ConfigBuilderInfo` object that can display information about available samplers and model configurations.
+
+```python
+# Display available sampler types and their parameters
+config_builder.info.display("samplers")
+
+# Display configured model configurations
+config_builder.info.display("model_configs")
+```
+
+## Seed Sources
+
+### LocalFileSeedSource
+
+```python
+class LocalFileSeedSource(SeedSource):
+    """Seed source from a local file."""
+    
+    seed_type: Literal["local"] = "local"
+    path: str  # Path to parquet, CSV, or JSON file
+    
+    @classmethod
+    def from_dataframe(cls, df: pd.DataFrame, path: str) -> Self:
+        """Create a local file seed source from a DataFrame.
+        
+        Saves the DataFrame to the specified path as parquet.
+        """
+```
+
+### HuggingFaceSeedSource
+
+```python
+class HuggingFaceSeedSource(SeedSource):
+    """Seed source from HuggingFace datasets."""
+    
+    seed_type: Literal["hf"] = "hf"
+    path: str  # HuggingFace path (e.g., "datasets/user/dataset/data/*.parquet")
+    token: str | None = None  # Optional HuggingFace token
+    endpoint: str = "https://huggingface.co"
+```
+
+### DataFrameSeedSource
+
+```python
+class DataFrameSeedSource(SeedSource):
+    """Seed source from an in-memory DataFrame.
+    
+    Note: Cannot be serialized to config files.
+    """
+    
+    seed_type: Literal["df"] = "df"
+    df: pd.DataFrame  # The DataFrame to use as seed
+```
+
+## Example Usage
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Create a builder with default model configurations
+builder = dd.DataDesignerConfigBuilder()
+
+# Add columns
+builder.add_column(
+    dd.SamplerColumnConfig(
+        name="category",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(values=["A", "B", "C"]),
+    )
+)
+
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="description",
+        model_alias="nvidia-text",
+        prompt="Write a description for category {{ category }}.",
+    )
+)
+
+# Add a processor
+builder.add_processor(
+    dd.DropColumnsProcessorConfig(
+        name="cleanup",
+        column_names=["intermediate_column"],
+    )
+)
+
+# Build and use
+data_designer = DataDesigner()
+results = data_designer.preview(builder, num_records=5)
+
+# Save configuration
+builder.write_config("my_config.yaml")
+
+# Load configuration later
+loaded_builder = dd.DataDesignerConfigBuilder.from_config("my_config.yaml")
+```
diff --git a/fern/v0.5.0/pages/api-reference/data-designer-config.mdx b/fern/v0.5.0/pages/api-reference/data-designer-config.mdx
new file mode 100644
index 000000000..b9ce1049a
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/data-designer-config.mdx
@@ -0,0 +1,110 @@
+---
+title: Data Designer Configuration
+description: API reference for the DataDesignerConfig object.
+---
+
+`DataDesignerConfig` is the main configuration object for building datasets with Data Designer. It is a declarative configuration for defining the dataset you want to generate column-by-column, including options for dataset post-processing, validation, and profiling.
+
+Generally, you should use the [DataDesignerConfigBuilder](/api/config-builder) to build your configuration, but you can also build it manually by instantiating the `DataDesignerConfig` class directly.
+
+## DataDesignerConfig
+
+```python
+class DataDesignerConfig(BaseModel):
+    """Complete configuration for a Data Designer generation job."""
+    
+    columns: list[SingleColumnConfig]  # List of column configurations
+    processors: list[ProcessorConfig] = []  # Post-generation processors
+    seed_source: SeedSource | None = None  # Optional seed dataset
+    
+    @property
+    def column_names(self) -> list[str]:
+        """Names of all configured columns."""
+        ...
+    
+    @property
+    def dependency_graph(self) -> dict[str, list[str]]:
+        """Column dependency graph for execution ordering."""
+        ...
+    
+    def get_column(self, name: str) -> SingleColumnConfig:
+        """Get a column configuration by name.
+        
+        Args:
+            name: Column name.
+        
+        Returns:
+            Column configuration.
+        
+        Raises:
+            KeyError: If column not found.
+        """
+        ...
+    
+    def to_yaml(self) -> str:
+        """Serialize configuration to YAML string."""
+        ...
+    
+    def to_json(self) -> str:
+        """Serialize configuration to JSON string."""
+        ...
+    
+    @classmethod
+    def from_yaml(cls, yaml_str: str) -> "DataDesignerConfig":
+        """Deserialize configuration from YAML string."""
+        ...
+    
+    @classmethod
+    def from_json(cls, json_str: str) -> "DataDesignerConfig":
+        """Deserialize configuration from JSON string."""
+        ...
+    
+    @classmethod
+    def from_file(cls, path: str | Path) -> "DataDesignerConfig":
+        """Load configuration from a file.
+        
+        Args:
+            path: Path to YAML or JSON file.
+        
+        Returns:
+            Loaded configuration.
+        """
+        ...
+```
+
+## Configuration Serialization
+
+Data Designer configs can be serialized to and from YAML or JSON format, making it easy to:
+
+- Save configurations for reproducibility
+- Share configurations with team members
+- Version control your data generation pipelines
+- Load and modify existing configurations
+
+### Example: Saving and Loading Configs
+
+```python
+import data_designer.config as dd
+
+# Build a configuration
+builder = dd.DataDesignerConfigBuilder()
+builder.add_column(
+    dd.SamplerColumnConfig(
+        name="id",
+        sampler_type=dd.SamplerType.UUID,
+    )
+)
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="greeting",
+        model_alias="nvidia-text",
+        prompt="Write a greeting.",
+    )
+)
+
+# Save to file
+builder.write_config("my_config.yaml")
+
+# Load from file later
+config = dd.DataDesignerConfig.from_file("my_config.yaml")
+```
diff --git a/fern/v0.5.0/pages/api-reference/mcp.mdx b/fern/v0.5.0/pages/api-reference/mcp.mdx
new file mode 100644
index 000000000..24fc8f6e1
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/mcp.mdx
@@ -0,0 +1,173 @@
+---
+title: MCP (Model Context Protocol)
+description: Configuration and execution classes for tool use via MCP.
+---
+
+The `mcp` module defines configuration and execution classes for tool use via MCP (Model Context Protocol).
+
+## Configuration Classes
+
+- **[MCPProvider](#mcpprovider)**: Configure remote MCP servers via SSE transport
+- **[LocalStdioMCPProvider](#localstdiomcpprovider)**: Configure local MCP servers as subprocesses via stdio transport
+- **[ToolConfig](#toolconfig)**: Define which tools are available for LLM columns and how they are constrained
+
+For user-facing guides, see:
+
+- **[MCP Providers](/docs/concepts/mcp/mcp-providers)** - Configure local or remote MCP providers
+- **[Tool Configs](/docs/concepts/mcp/tool-configs)** - Define tool permissions and limits
+- **[Enabling Tools](/docs/concepts/mcp/enabling-tools)** - Use tools in LLM columns
+- **[Traces](/docs/concepts/traces)** - Capture full conversation history
+
+## MCPProvider
+
+Remote MCP provider configuration using SSE (Server-Sent Events) transport.
+
+```python
+import data_designer.config as dd
+
+provider = dd.MCPProvider(
+    name="remote-mcp",
+    endpoint="http://localhost:8080/sse",
+    api_key="MCP_API_KEY",
+)
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | `str` | Yes | Unique identifier for the provider |
+| `endpoint` | `str` | Yes | SSE endpoint URL |
+| `api_key` | `str` | No | API key or environment variable name |
+| `provider_type` | `str` | No | Always `"sse"` (set automatically) |
+
+## LocalStdioMCPProvider
+
+Local MCP provider configuration using stdio transport (subprocess).
+
+```python
+import data_designer.config as dd
+
+provider = dd.LocalStdioMCPProvider(
+    name="local-mcp",
+    command="python",
+    args=["-m", "my_mcp_server"],
+    env={"DEBUG": "true"},
+)
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | `str` | Yes | Unique identifier for the provider |
+| `command` | `str` | Yes | Executable to run |
+| `args` | `list[str]` | No | Command-line arguments |
+| `env` | `dict[str, str]` | No | Environment variables for the subprocess |
+| `provider_type` | `str` | No | Always `"stdio"` (set automatically) |
+
+## ToolConfig
+
+Tool configuration defining tool access and constraints for LLM columns.
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="my-tools",
+    providers=["local-mcp"],
+    allow_tools=["search", "get_fact"],
+    max_tool_call_turns=5,
+    timeout_sec=30.0,
+)
+```
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `tool_alias` | `str` | Yes | - | Unique identifier referenced by columns |
+| `providers` | `list[str]` | Yes | - | MCP provider names to use |
+| `allow_tools` | `list[str]` | No | `None` | Restrict to specific tools (`None` = all) |
+| `max_tool_call_turns` | `int` | No | `5` | Maximum tool-calling iterations |
+| `timeout_sec` | `float` | No | `60.0` | Per-call timeout in seconds |
+
+## Internal Architecture
+
+### Parallel Structure
+
+| Model Layer | MCP Layer | Purpose |
+|-------------|-----------|---------|
+| `ModelProviderRegistry` | `MCPProviderRegistry` | Holds provider configurations |
+| `ModelRegistry` | `MCPRegistry` | Manages configs by alias, lazy facade creation |
+| `ModelFacade` | `MCPFacade` | Lightweight facade scoped to specific config |
+| `ModelConfig.alias` | `ToolConfig.tool_alias` | Alias for referencing in column configs |
+
+### MCPProviderRegistry
+
+Holds MCP provider configurations. Can be empty (MCP is optional). Created first during resource initialization.
+
+### MCPRegistry
+
+The central registry for tool configurations:
+
+- Holds `ToolConfig` instances by `tool_alias`
+- Lazily creates `MCPFacade` instances via `get_mcp(tool_alias)`
+- Manages shared connection pool and tool cache across all facades
+- Validates that tool configs reference valid providers
+
+### MCPFacade
+
+A lightweight facade scoped to a specific `ToolConfig`. Key methods:
+
+| Method | Description |
+|--------|-------------|
+| `tool_call_count(response)` | Count tool calls in a completion response |
+| `has_tool_calls(response)` | Check if response contains tool calls |
+| `get_tool_schemas()` | Get OpenAI-format tool schemas for this config |
+| `process_completion_response(response)` | Execute tool calls and return messages |
+| `refuse_completion_response(response)` | Refuse tool calls gracefully (budget exhaustion) |
+
+Properties: `tool_alias`, `providers`, `max_tool_call_turns`, `allow_tools`, `timeout_sec`
+
+### I/O Layer
+
+The `io.py` module provides low-level MCP communication with performance optimizations:
+
+**Single event loop architecture:**
+All MCP operations funnel through a dedicated background daemon thread running an asyncio event loop. This allows:
+
+- Efficient concurrent I/O without per-thread event loop overhead
+- Natural session sharing across all worker threads
+- Clean async implementation for parallel tool calls
+
+**Session pooling:**
+MCP sessions are created lazily and kept alive for the program's duration:
+
+- One session per provider (keyed by serialized config)
+- No per-call connection/handshake overhead
+- Graceful cleanup on program exit via `atexit` handler
+
+**Request coalescing:**
+The `list_tools` operation uses request coalescing to prevent thundering herd:
+
+- When multiple workers request tools from the same provider simultaneously
+- Only one request is made; others wait for the cached result
+- Uses asyncio.Lock per provider key
+
+**Parallel tool execution:**
+The `call_tools_parallel()` function executes multiple tool calls concurrently via `asyncio.gather()`. This is used by MCPFacade when the model returns parallel tool calls in a single response.
+
+### Integration with ModelFacade.generate()
+
+The `ModelFacade.generate()` method accepts an optional `tool_alias` parameter:
+
+```python
+output, messages = model_facade.generate(
+    prompt="Search and answer...",
+    parser=my_parser,
+    tool_alias="my-tools",  # Enables tool calling for this generation
+)
+```
+
+When `tool_alias` is provided:
+
+1. `ModelFacade` looks up the `MCPFacade` from `MCPRegistry`
+2. Tool schemas are fetched and passed to the LLM
+3. After each completion, `MCPFacade` processes tool calls
+4. Turn counting tracks iterations; refusal kicks in when budget exhausted
+5. Messages (including tool results) are returned for trace capture
diff --git a/fern/v0.5.0/pages/api-reference/models.mdx b/fern/v0.5.0/pages/api-reference/models.mdx
new file mode 100644
index 000000000..ba506afb5
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/models.mdx
@@ -0,0 +1,116 @@
+---
+title: Models
+description: API reference for model configuration objects.
+---
+
+The `models` module defines configuration objects for model-based generation. `ModelProvider` specifies connection and authentication details for custom providers. `ModelConfig` encapsulates model details including the model alias, identifier, and inference parameters. [Inference Parameters](/docs/concepts/models/inference-parameters) controls model behavior through settings like `temperature`, `top_p`, and `max_tokens`, with support for both fixed values and distribution-based sampling. The module includes `ImageContext` for providing image inputs to multimodal models, and `ImageInferenceParams` for configuring image generation models.
+
+For more information on how they are used, see below:
+
+- **[Model Providers](/docs/concepts/models/model-providers)**
+- **[Model Configs](/docs/concepts/models/model-configs)**
+- **[Images as Context](/docs/tutorials/images-as-context)**
+- **[Generating Images](/docs/tutorials/generating-images)**
+
+## ModelProvider
+
+```python
+class ModelProvider(BaseModel):
+    """Configuration for a model provider endpoint."""
+    
+    name: str  # Unique identifier for the provider
+    endpoint: str  # API endpoint URL
+    provider_type: str = "openai"  # Provider type (default: OpenAI-compatible)
+    api_key: str | None = None  # API key or environment variable name
+    extra_body: dict[str, Any] | None = None  # Additional request body parameters
+    extra_headers: dict[str, str] | None = None  # Additional headers
+```
+
+## ModelConfig
+
+```python
+class ModelConfig(BaseModel):
+    """Configuration for a specific model."""
+    
+    alias: str  # Unique identifier for this model configuration
+    model: str  # Model identifier as recognized by the provider
+    provider: str | None = None  # Reference to provider by name
+    inference_parameters: InferenceParamsT | None = None  # Inference parameters
+```
+
+## ChatCompletionInferenceParams
+
+```python
+class ChatCompletionInferenceParams(BaseModel):
+    """Parameters for chat completion inference."""
+    
+    temperature: float | Distribution | None = None  # Sampling temperature (0.0-2.0)
+    top_p: float | Distribution | None = None  # Nucleus sampling parameter (0.0-1.0)
+    max_tokens: int | None = None  # Maximum output tokens
+    max_parallel_requests: int = 4  # Maximum concurrent API requests
+    timeout: int | None = None  # Request timeout in seconds
+    extra_body: dict[str, Any] | None = None  # Additional request body parameters
+```
+
+## EmbeddingInferenceParams
+
+```python
+class EmbeddingInferenceParams(BaseModel):
+    """Parameters for embedding inference."""
+    
+    encoding_format: Literal["float", "base64"] = "float"  # Embedding encoding format
+    dimensions: int | None = None  # Number of embedding dimensions
+    max_parallel_requests: int = 4  # Maximum concurrent API requests
+    timeout: int | None = None  # Request timeout in seconds
+    extra_body: dict[str, Any] | None = None  # Additional request body parameters
+```
+
+## ImageInferenceParams
+
+```python
+class ImageInferenceParams(BaseModel):
+    """Parameters for image generation inference."""
+    
+    max_parallel_requests: int = 4  # Maximum concurrent API requests
+    timeout: int | None = None  # Request timeout in seconds
+    extra_body: dict[str, Any] | None = None  # Model-specific image options (size, quality, etc.)
+```
+
+## ImageContext
+
+```python
+class ImageContext(BaseModel):
+    """Configuration for providing image context to vision models."""
+    
+    column_name: str  # Name of column containing image data
+    data_type: ModalityDataType  # Type of image data (BASE64, URL, etc.)
+    image_format: ImageFormat | None = None  # Image format (PNG, JPEG, etc.)
+```
+
+## Distribution Types
+
+### UniformDistribution
+
+```python
+class UniformDistribution(BaseModel):
+    """Uniform distribution for parameter sampling."""
+    
+    params: UniformDistributionParams
+    
+class UniformDistributionParams(BaseModel):
+    low: float  # Lower bound
+    high: float  # Upper bound
+```
+
+### ManualDistribution
+
+```python
+class ManualDistribution(BaseModel):
+    """Manual distribution with discrete values."""
+    
+    params: ManualDistributionParams
+    
+class ManualDistributionParams(BaseModel):
+    values: list[float]  # Discrete values to sample from
+    weights: list[float] | None = None  # Optional probability weights
+```
diff --git a/fern/v0.5.0/pages/api-reference/processors.mdx b/fern/v0.5.0/pages/api-reference/processors.mdx
new file mode 100644
index 000000000..a69d642ac
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/processors.mdx
@@ -0,0 +1,147 @@
+---
+title: Processors
+description: API reference for processor configuration objects.
+---
+
+The `processors` module defines configuration objects for post-generation data transformations. Processors run after column generation and can modify the dataset schema or content before output.
+
+## ProcessorType Enum
+
+```python
+class ProcessorType(str, Enum):
+    """Enumeration of available processor types."""
+    
+    DROP_COLUMNS = "drop_columns"
+    SCHEMA_TRANSFORM = "schema_transform"
+```
+
+## ProcessorConfig (Base Class)
+
+```python
+class ProcessorConfig(ConfigBase, ABC):
+    """Abstract base class for all processor configuration types."""
+    
+    name: str  # Unique name of the processor
+    processor_type: str  # Discriminator field
+```
+
+## DropColumnsProcessorConfig
+
+```python
+class DropColumnsProcessorConfig(ProcessorConfig):
+    """Configuration for dropping columns from output."""
+    
+    name: str  # Processor identifier
+    column_names: list[str]  # Columns to remove from output
+    processor_type: Literal[ProcessorType.DROP_COLUMNS] = ProcessorType.DROP_COLUMNS
+```
+
+### Behavior
+
+- Columns in `column_names` are removed from the main output
+- Dropped column values are saved to a separate file in `dropped-columns/`
+- When this processor is added via the config builder, column configs are automatically marked with `drop=True`
+
+### Example Usage
+
+```python
+import data_designer.config as dd
+
+builder = dd.DataDesignerConfigBuilder()
+# ... add columns ...
+
+builder.add_processor(
+    dd.DropColumnsProcessorConfig(
+        name="remove_intermediate",
+        column_names=["temp_calculation", "raw_context", "debug_info"],
+    )
+)
+```
+
+<Tip title="Alternative: Set drop directly on columns">
+Instead of using a processor, you can set `drop=True` directly when configuring a column:
+
+```python
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="intermediate_reasoning",
+        model_alias="nvidia-text",
+        prompt="...",
+        drop=True,  # This column will be dropped from output
+    )
+)
+```
+</Tip>
+
+## SchemaTransformProcessorConfig
+
+```python
+class SchemaTransformProcessorConfig(ProcessorConfig):
+    """Configuration for transforming output schema."""
+    
+    name: str  # Processor identifier
+    template: dict[str, Any]  # Jinja2 template for output schema (must be JSON-serializable)
+    processor_type: Literal[ProcessorType.SCHEMA_TRANSFORM] = ProcessorType.SCHEMA_TRANSFORM
+```
+
+### Behavior
+
+- Each key in `template` becomes a column in the transformed output
+- Values are Jinja2 templates with access to all columns in the batch
+- Complex structures (lists, nested dicts) are supported but must be JSON-serializable
+- Output saved to `processors-outputs/{processor_name}/`
+- Original dataset passes through unchanged
+
+### Template Capabilities
+
+- **Variable substitution**: `{{ column_name }}`
+- **Filters**: `{{ text | upper }}`, `{{ text | lower }}`, `{{ text | trim }}`
+- **Nested structures**: Arbitrarily deep JSON structures
+- **Lists**: `["{{ col1 }}", "{{ col2 }}"]`
+- **Conditionals**: `{% if condition %}...{% endif %}`
+
+### Example Usage
+
+```python
+import data_designer.config as dd
+
+builder = dd.DataDesignerConfigBuilder()
+# ... add columns with 'question' and 'answer' ...
+
+# Transform to chat message format
+builder.add_processor(
+    dd.SchemaTransformProcessorConfig(
+        name="chat_format",
+        template={
+            "messages": [
+                {"role": "user", "content": "{{ question }}"},
+                {"role": "assistant", "content": "{{ answer }}"},
+            ],
+            "metadata": {
+                "category": "{{ category | upper }}",
+                "generated": True,
+            },
+        },
+    )
+)
+```
+
+## Helper Function
+
+```python
+def get_processor_config_from_kwargs(
+    processor_type: ProcessorType, 
+    **kwargs: Any
+) -> ProcessorConfig:
+    """Create a processor configuration from a processor type and keyword arguments.
+    
+    Args:
+        processor_type: The type of processor to create.
+        **kwargs: Additional keyword arguments passed to the processor constructor.
+    
+    Returns:
+        A processor configuration object of the specified type.
+    """
+```
+
+This function is used internally by `DataDesignerConfigBuilder.add_processor()` when you provide a `processor_type` and keyword arguments instead of a config object directly.
diff --git a/fern/v0.5.0/pages/api-reference/run-config.mdx b/fern/v0.5.0/pages/api-reference/run-config.mdx
new file mode 100644
index 000000000..e5b4b0acc
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/run-config.mdx
@@ -0,0 +1,101 @@
+---
+title: Run Config
+description: Runtime settings for dataset generation behavior.
+---
+
+The `run_config` module defines runtime settings that control dataset generation behavior, including early shutdown thresholds, batch sizing, and non-inference worker concurrency.
+
+## Usage
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+data_designer = DataDesigner()
+data_designer.set_run_config(dd.RunConfig(
+    buffer_size=500,
+    max_conversation_restarts=3,
+))
+```
+
+## RunConfig
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `buffer_size` | `int` | `1000` | Number of records processed per batch |
+| `max_conversation_restarts` | `int` | `5` | Maximum full conversation restarts for failed generations |
+| `max_conversation_correction_steps` | `int` | `0` | Maximum in-conversation correction attempts |
+| `non_inference_max_parallel_workers` | `int` | `4` | Thread pool size for non-LLM operations |
+| `disable_early_shutdown` | `bool` | `False` | Disable early shutdown on high error rates |
+| `shutdown_error_rate` | `float` | `0.5` | Error rate threshold for early shutdown |
+| `shutdown_error_window` | `int` | `10` | Minimum tasks before error monitoring begins |
+
+## Parameters in Detail
+
+### `buffer_size`
+
+Controls how many records are processed per batch. Each batch completes entirely before moving to the next.
+
+| Value | Memory Usage | Throughput | Error Feedback |
+|-------|--------------|------------|----------------|
+| **Low** (100-500) | Lower | May not saturate inference | Fast |
+| **Default** (1000) | Moderate | Good for most cases | Moderate |
+| **High** (2000-5000) | Higher | Better for deep pipelines | Slower |
+
+### `max_conversation_restarts`
+
+When generation fails (parsing error, schema violation, etc.), the entire conversation is restarted from scratch. This parameter limits total restart attempts per cell.
+
+### `max_conversation_correction_steps`
+
+Instead of restarting, Data Designer can attempt in-conversation corrections by feeding the error back to the model. Set this to enable error-recovery within the conversation.
+
+<Tip title="Strict schemas">
+For strict schema requirements, consider `max_conversation_restarts=7` with `max_conversation_correction_steps=2`.
+</Tip>
+
+### `non_inference_max_parallel_workers`
+
+Thread pool size for non-LLM operations (samplers, expressions, validators). Increase for workloads with many CPU-bound columns.
+
+### Early Shutdown
+
+Early shutdown terminates generation if the error rate exceeds a threshold, preventing wasted computation on failing workloads.
+
+- `disable_early_shutdown`: Set to `True` to see all errors during debugging
+- `shutdown_error_rate`: Error rate threshold (0.5 = 50%)
+- `shutdown_error_window`: Minimum tasks before monitoring begins
+
+## Example Configurations
+
+### High-throughput workload
+
+```python
+run_config = dd.RunConfig(
+    buffer_size=2000,
+    max_conversation_restarts=3,
+)
+```
+
+### Strict schema requirements
+
+```python
+run_config = dd.RunConfig(
+    max_conversation_restarts=7,
+    max_conversation_correction_steps=2,
+)
+```
+
+### Debugging failed generations
+
+```python
+run_config = dd.RunConfig(
+    disable_early_shutdown=True,
+    buffer_size=100,
+)
+```
+
+## See Also
+
+- [Architecture & Performance](/docs/concepts/architecture-and-performance): Detailed tuning guide
+- [Inference Parameters](/docs/concepts/models/inference-parameters): Per-model concurrency settings
diff --git a/fern/v0.5.0/pages/api-reference/sampler-params.mdx b/fern/v0.5.0/pages/api-reference/sampler-params.mdx
new file mode 100644
index 000000000..6c19b1990
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/sampler-params.mdx
@@ -0,0 +1,273 @@
+---
+title: Sampler Parameters
+description: API reference for sampler parameter configuration objects.
+---
+
+The `sampler_params` module defines parameter configuration objects for all Data Designer sampler types. Sampler parameters are used within the `SamplerColumnConfig` to specify how values should be generated for sampled columns.
+
+<Tip title="Displaying available samplers and their parameters">
+The config builder has an `info` attribute that can be used to display the available sampler types and their parameters:
+
+```python
+config_builder.info.display("samplers")
+```
+</Tip>
+
+## SamplerType
+
+Enum defining all available sampler types:
+
+```python
+class SamplerType(str, Enum):
+    BERNOULLI = "bernoulli"
+    BERNOULLI_MIXTURE = "bernoulli_mixture"
+    BINOMIAL = "binomial"
+    CATEGORY = "category"
+    DATETIME = "datetime"
+    GAUSSIAN = "gaussian"
+    PERSON = "person"
+    PERSON_FROM_FAKER = "person_from_faker"
+    POISSON = "poisson"
+    SCIPY = "scipy"
+    SUBCATEGORY = "subcategory"
+    TIMEDELTA = "timedelta"
+    UNIFORM = "uniform"
+    UUID = "uuid"
+```
+
+## CategorySamplerParams
+
+Parameters for categorical sampling with optional probability weighting.
+
+```python
+class CategorySamplerParams(ConfigBase):
+    """Parameters for categorical sampling."""
+    
+    values: list[str | int | float]  # List of categorical values to sample from (required)
+    weights: list[float] | None = None  # Optional unnormalized probability weights
+    sampler_type: Literal[SamplerType.CATEGORY] = SamplerType.CATEGORY
+```
+
+When `weights` are provided, they are automatically normalized to sum to 1.0. Larger weights result in higher sampling probability.
+
+## SubcategorySamplerParams
+
+Parameters for subcategory sampling conditioned on a parent category column.
+
+```python
+class SubcategorySamplerParams(ConfigBase):
+    """Parameters for hierarchical categorical sampling."""
+    
+    category: str  # Name of parent category column
+    values: dict[str, list[str | int | float]]  # Mapping of parent values to subcategory values
+    sampler_type: Literal[SamplerType.SUBCATEGORY] = SamplerType.SUBCATEGORY
+```
+
+## UniformSamplerParams
+
+Parameters for sampling from a continuous Uniform distribution.
+
+```python
+class UniformSamplerParams(ConfigBase):
+    """Parameters for uniform distribution sampling."""
+    
+    low: float  # Lower bound (inclusive)
+    high: float  # Upper bound (inclusive)
+    decimal_places: int | None = None  # Number of decimal places to round to
+    sampler_type: Literal[SamplerType.UNIFORM] = SamplerType.UNIFORM
+```
+
+## GaussianSamplerParams
+
+Parameters for sampling from a Gaussian (Normal) distribution.
+
+```python
+class GaussianSamplerParams(ConfigBase):
+    """Parameters for Gaussian (normal) distribution sampling."""
+    
+    mean: float  # Distribution mean (required)
+    stddev: float  # Standard deviation (required)
+    decimal_places: int | None = None  # Number of decimal places to round to
+    sampler_type: Literal[SamplerType.GAUSSIAN] = SamplerType.GAUSSIAN
+```
+
+## BernoulliSamplerParams
+
+Parameters for sampling from a Bernoulli distribution (binary outcomes).
+
+```python
+class BernoulliSamplerParams(ConfigBase):
+    """Parameters for Bernoulli (binary) sampling."""
+    
+    p: float  # Probability of success (1), must be between 0.0 and 1.0
+    sampler_type: Literal[SamplerType.BERNOULLI] = SamplerType.BERNOULLI
+```
+
+## BernoulliMixtureSamplerParams
+
+Parameters for sampling from a Bernoulli mixture distribution. Combines a Bernoulli distribution with a scipy.stats distribution.
+
+```python
+class BernoulliMixtureSamplerParams(ConfigBase):
+    """Parameters for Bernoulli mixture distribution sampling."""
+    
+    p: float  # Probability of sampling from the mixture distribution (non-zero outcome)
+    dist_name: str  # Name of scipy.stats distribution for non-zero samples
+    dist_params: dict  # Parameters for the scipy.stats distribution
+    sampler_type: Literal[SamplerType.BERNOULLI_MIXTURE] = SamplerType.BERNOULLI_MIXTURE
+```
+
+With probability `1-p`, the sample is 0. With probability `p`, the sample is drawn from the specified distribution.
+
+## BinomialSamplerParams
+
+Parameters for sampling from a Binomial distribution.
+
+```python
+class BinomialSamplerParams(ConfigBase):
+    """Parameters for binomial distribution sampling."""
+    
+    n: int  # Number of trials
+    p: float  # Probability of success per trial (0.0 to 1.0)
+    sampler_type: Literal[SamplerType.BINOMIAL] = SamplerType.BINOMIAL
+```
+
+## PoissonSamplerParams
+
+Parameters for sampling from a Poisson distribution.
+
+```python
+class PoissonSamplerParams(ConfigBase):
+    """Parameters for Poisson distribution sampling."""
+    
+    mean: float  # Mean number of events (lambda/rate parameter)
+    sampler_type: Literal[SamplerType.POISSON] = SamplerType.POISSON
+```
+
+## ScipySamplerParams
+
+Parameters for sampling from any scipy.stats distribution.
+
+```python
+class ScipySamplerParams(ConfigBase):
+    """Parameters for scipy.stats distribution sampling."""
+    
+    dist_name: str  # Name of scipy.stats distribution (e.g., "beta", "gamma", "expon")
+    dist_params: dict  # Distribution-specific parameters
+    decimal_places: int | None = None  # Number of decimal places to round to
+    sampler_type: Literal[SamplerType.SCIPY] = SamplerType.SCIPY
+```
+
+See [scipy.stats documentation](https://docs.scipy.org/doc/scipy/reference/stats.html) for available distributions.
+
+## UUIDSamplerParams
+
+Parameters for UUID generation.
+
+```python
+class UUIDSamplerParams(ConfigBase):
+    """Parameters for UUID generation."""
+    
+    prefix: str | None = None  # Optional prefix for the UUID
+    short_form: bool = False  # Truncate to 8 characters
+    uppercase: bool = False  # Convert to uppercase
+    sampler_type: Literal[SamplerType.UUID] = SamplerType.UUID
+```
+
+## DatetimeSamplerParams
+
+Parameters for uniform datetime sampling within a specified range.
+
+```python
+class DatetimeSamplerParams(ConfigBase):
+    """Parameters for datetime sampling."""
+    
+    start: str  # Start of date range (parseable by pandas.to_datetime)
+    end: str  # End of date range (parseable by pandas.to_datetime)
+    unit: Literal["Y", "M", "D", "h", "m", "s"] = "D"  # Sampling granularity
+    sampler_type: Literal[SamplerType.DATETIME] = SamplerType.DATETIME
+```
+
+The `unit` parameter determines the smallest possible time interval:
+- `"Y"`: Years
+- `"M"`: Months
+- `"D"`: Days (default)
+- `"h"`: Hours
+- `"m"`: Minutes
+- `"s"`: Seconds
+
+## TimeDeltaSamplerParams
+
+Parameters for sampling time deltas relative to a reference datetime column.
+
+```python
+class TimeDeltaSamplerParams(ConfigBase):
+    """Parameters for timedelta (duration) sampling."""
+    
+    dt_min: int  # Minimum delta (inclusive), must be >= 0
+    dt_max: int  # Maximum delta (exclusive), must be > dt_min
+    reference_column_name: str  # Column containing reference datetime
+    unit: Literal["D", "h", "m", "s"] = "D"  # Time unit for delta values
+    sampler_type: Literal[SamplerType.TIMEDELTA] = SamplerType.TIMEDELTA
+```
+
+<Note>
+Years and months are not supported as timedelta units because they have variable lengths.
+</Note>
+
+## PersonSamplerParams
+
+Parameters for sampling synthetic person data from Nemotron Personas managed datasets.
+
+```python
+class PersonSamplerParams(ConfigBase):
+    """Parameters for Nemotron-Personas person sampling."""
+    
+    locale: str = "en_US"  # Must be a supported managed dataset locale
+    sex: Literal["Male", "Female"] | None = None  # Filter by sex
+    city: str | list[str] | None = None  # Filter by city
+    age_range: list[int] = [18, 70]  # [min_age, max_age]
+    select_field_values: dict[str, list[str]] | None = None  # Custom field filters
+    with_synthetic_personas: bool = False  # Include personality profiles
+    sampler_type: Literal[SamplerType.PERSON] = SamplerType.PERSON
+```
+
+Supported locales for managed datasets: `en_US`, `ja_JP`, `en_IN`, `hi_Deva_IN`, `hi_Latn_IN`, `en_SG`, `pt_BR`.
+
+## PersonFromFakerSamplerParams
+
+Parameters for Faker-based person sampling. Generates basic synthetic person data without the demographic accuracy of managed datasets.
+
+```python
+class PersonFromFakerSamplerParams(ConfigBase):
+    """Parameters for Faker-based person sampling."""
+    
+    locale: str = "en_US"  # Any Faker-supported locale
+    sex: Literal["Male", "Female"] | None = None  # Filter by sex
+    city: str | list[str] | None = None  # Filter by city
+    age_range: list[int] = [18, 70]  # [min_age, max_age]
+    sampler_type: Literal[SamplerType.PERSON_FROM_FAKER] = SamplerType.PERSON_FROM_FAKER
+```
+
+## SamplerParamsT
+
+Type alias representing the union of all sampler parameter types:
+
+```python
+SamplerParamsT = (
+    SubcategorySamplerParams
+    | CategorySamplerParams
+    | DatetimeSamplerParams
+    | PersonSamplerParams
+    | PersonFromFakerSamplerParams
+    | TimeDeltaSamplerParams
+    | UUIDSamplerParams
+    | BernoulliSamplerParams
+    | BernoulliMixtureSamplerParams
+    | BinomialSamplerParams
+    | GaussianSamplerParams
+    | PoissonSamplerParams
+    | UniformSamplerParams
+    | ScipySamplerParams
+)
+```
diff --git a/fern/v0.5.0/pages/api-reference/validator-params.mdx b/fern/v0.5.0/pages/api-reference/validator-params.mdx
new file mode 100644
index 000000000..3308e2248
--- /dev/null
+++ b/fern/v0.5.0/pages/api-reference/validator-params.mdx
@@ -0,0 +1,167 @@
+---
+title: Validator Parameters
+description: API reference for validator parameter configuration objects.
+---
+
+When creating a `ValidationColumnConfig`, two parameters are used to define the validator: `validator_type` and `validator_params`.
+The `validator_type` parameter can be set to either `code`, `local_callable` or `remote`. The `validator_params` accompanying each of these is described below.
+
+## CodeValidatorParams
+
+```python
+class CodeValidatorParams(BaseModel):
+    """Parameters for code validation."""
+    
+    code_lang: CodeLang  # Programming language to validate
+```
+
+### Supported Languages
+
+For Python code validation (uses Ruff):
+- `CodeLang.PYTHON`
+
+For SQL code validation (uses SQLFluff):
+- `CodeLang.SQL_ANSI`
+- `CodeLang.SQL_POSTGRES`
+- `CodeLang.SQL_MYSQL`
+- `CodeLang.SQL_SQLITE`
+- `CodeLang.SQL_TSQL`
+- `CodeLang.SQL_BIGQUERY`
+
+### Example Usage
+
+```python
+from data_designer.essentials import (
+    CodeLang,
+    CodeValidatorParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+
+# Python code validation
+python_validator = ValidationColumnConfig(
+    name="python_validation",
+    validator_type=ValidatorType.CODE,
+    target_columns=["python_code"],
+    validator_params=CodeValidatorParams(code_lang=CodeLang.PYTHON),
+    batch_size=10,
+)
+
+# SQL code validation
+sql_validator = ValidationColumnConfig(
+    name="sql_validation",
+    validator_type=ValidatorType.CODE,
+    target_columns=["sql_query"],
+    validator_params=CodeValidatorParams(code_lang=CodeLang.SQL_POSTGRES),
+    batch_size=10,
+)
+```
+
+## LocalCallableValidatorParams
+
+```python
+class LocalCallableValidatorParams(BaseModel):
+    """Parameters for local callable validation."""
+    
+    validation_function: Callable[[pd.DataFrame], pd.DataFrame]
+    # Function that takes DataFrame and returns DataFrame with is_valid column
+    
+    output_schema: dict | None = None
+    # Optional JSON schema to validate function output
+```
+
+### Function Requirements
+
+The validation function must:
+1. Accept a `pd.DataFrame` containing the target columns
+2. Return a `pd.DataFrame` with at minimum an `is_valid` column (boolean or null)
+3. Any additional columns in the output become validation metadata
+
+### Example Usage
+
+```python
+import pandas as pd
+from data_designer.essentials import (
+    LocalCallableValidatorParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+
+def validate_positive_prices(df: pd.DataFrame) -> pd.DataFrame:
+    """Validate that all prices are positive."""
+    result = pd.DataFrame()
+    result["is_valid"] = df["price"] > 0
+    result["error_message"] = result["is_valid"].apply(
+        lambda v: "" if v else "Price must be positive"
+    )
+    return result
+
+validator = ValidationColumnConfig(
+    name="price_validation",
+    validator_type=ValidatorType.LOCAL_CALLABLE,
+    target_columns=["price"],
+    validator_params=LocalCallableValidatorParams(
+        validation_function=validate_positive_prices,
+    ),
+    batch_size=50,
+)
+```
+
+## RemoteValidatorParams
+
+```python
+class RemoteValidatorParams(BaseModel):
+    """Parameters for remote HTTP validation."""
+    
+    endpoint_url: str  # URL of the validation endpoint
+    timeout: float = 30.0  # Request timeout in seconds
+    max_retries: int = 3  # Number of retry attempts
+    retry_backoff: float = 2.0  # Exponential backoff factor
+    max_parallel_requests: int = 4  # Maximum concurrent requests
+    output_schema: dict | None = None  # Optional response schema validation
+```
+
+### Request/Response Format
+
+**Request (POST):**
+```json
+{
+    "data": [
+        {"column1": "value1", "column2": "value2"},
+        {"column1": "value3", "column2": "value4"}
+    ]
+}
+```
+
+**Response:**
+```json
+{
+    "data": [
+        {"is_valid": true, "additional_field": "value"},
+        {"is_valid": false, "additional_field": "value"}
+    ]
+}
+```
+
+### Example Usage
+
+```python
+from data_designer.essentials import (
+    RemoteValidatorParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+
+validator = ValidationColumnConfig(
+    name="external_validation",
+    validator_type=ValidatorType.REMOTE,
+    target_columns=["content"],
+    validator_params=RemoteValidatorParams(
+        endpoint_url="https://api.example.com/validate",
+        timeout=60.0,
+        max_retries=3,
+        max_parallel_requests=4,
+    ),
+    batch_size=5,
+)
+```
diff --git a/fern/v0.5.0/pages/concepts/architecture-and-performance.mdx b/fern/v0.5.0/pages/concepts/architecture-and-performance.mdx
new file mode 100644
index 000000000..4d2df4de0
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/architecture-and-performance.mdx
@@ -0,0 +1,221 @@
+---
+title: Architecture & Performance
+description: Understand Data Designer's execution model and tune performance for your workload.
+---
+
+Data Designer is an **orchestration framework** that coordinates synthetic data generation workflows. It is a **client** of LLM inference servers—it does not host models itself.
+
+This guide explains the architecture, execution model, and how to tune performance for your specific use case.
+
+## Separation of Concerns
+
+```
+┌─────────────────────────────────────┐          ┌─────────────────────────────────────┐
+│         Data Designer               │          │       Inference Server(s)           │
+│         (Orchestration)             │  HTTP    │       (LLM Hosting)                 │
+│                                     │  ─────►  │                                     │
+│  • Dataset workflow management      │          │  • Model weights and execution      │
+│  • Column dependency resolution     │          │  • GPU allocation and scheduling    │
+│  • Batching and parallelism         │          │  • Request queuing                  │
+│  • Retry and error handling         │          │  • Token generation                 │
+│  • Data validation and quality      │          │  • Rate limiting (optional)         │
+└─────────────────────────────────────┘          └─────────────────────────────────────┘
+              ▲                                                    ▲
+              │                                                    │
+        Your workflow                                    Your infrastructure
+         configuration                                    (or cloud API)
+```
+
+### What Data Designer Does
+
+- **Orchestrates** the generation workflow across multiple columns
+- **Resolves dependencies** between columns (DAG-based execution)
+- **Batches** work into manageable chunks (`buffer_size`)
+- **Parallelizes** LLM calls within batches (`max_parallel_requests`)
+- **Handles errors** with retries and early shutdown logic
+- **Validates** generated data against schemas and constraints
+
+### What Data Designer Does NOT Do
+
+- **Host models**: You must provide LLM endpoints
+- **Manage GPUs**: Your inference server handles GPU allocation
+- **Scale inference**: You must provision sufficient capacity
+- **Rate limit**: Your server or API gateway handles this
+
+## Execution Model
+
+<Note title="Column-Wise Generator">
+This describes Data Designer's current **column-wise dataset generator**. Other dataset generation strategies are in development.
+</Note>
+
+Data Designer processes datasets in **batches**, with **parallel** operations within each batch.
+
+### How It Works
+
+**Step 1: Split into batches**
+
+Your dataset is divided into batches of `buffer_size` records. Each batch is processed completely before moving to the next.
+
+**Step 2: Process columns sequentially**
+
+Within a batch, columns are generated one at a time following the dependency graph. The order depends on column dependencies—expression columns may come before LLM columns if the LLM columns depend on them.
+
+Example workflow:
+
+```
+Batch 1 (100 records)
+│
+├─► Column 1: category (Sampler)      ──── All 100 values generated
+├─► Column 2: prompt (LLM Text)       ──── All 100 values generated
+├─► Column 3: response (LLM Text)     ──── All 100 values generated
+├─► Column 4: score (Expression)      ──── All 100 values computed
+│
+└─► Write batch to disk
+    │
+    ▼
+Batch 2 (100 records)
+    ...repeat...
+```
+
+**Step 3: Generate cells in parallel**
+
+Within each column, cells are processed **in parallel** up to the configured limit:
+
+| Column Type | Parallelism Control |
+|-------------|---------------------|
+| Sampler | `non_inference_max_parallel_workers` |
+| LLM (Text, Code, Structured, Judge) | `max_parallel_requests` |
+| Expression | Sequential (fast, CPU-bound) |
+
+### Key Concepts
+
+| Concept | Description |
+|---------|-------------|
+| **Batching** | Records are split into batches of `buffer_size`. Each batch completes entirely before the next begins. |
+| **Sequential columns** | Within a batch, columns are generated one at a time, respecting the dependency graph. |
+| **Parallel cells** | Within a column, individual cells (records) are generated in parallel up to the configured limit. |
+
+### Concurrency Formula
+
+At any moment, the number of concurrent LLM requests is:
+
+```python
+concurrent_requests = min(
+    buffer_size,                # Records in current batch
+    max_parallel_requests,      # Per-model limit
+    remaining_cells_in_column   # Cells left to generate
+)
+```
+
+**Example**: With `buffer_size=100` and `max_parallel_requests=8`, Data Designer sends up to 8 LLM requests at a time until all 100 cells in the column are complete.
+
+## Configuration Parameters
+
+### `buffer_size` (RunConfig)
+
+Controls how many records are processed per batch.
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+run_config = dd.RunConfig(buffer_size=2000)
+
+designer = DataDesigner()
+designer.set_run_config(run_config)
+```
+
+| Value | Memory Usage | Throughput | Error Feedback |
+|-------|--------------|------------|----------------|
+| **Low** (100-500) | Lower | May not saturate inference | Fast |
+| **Default** (1000) | Moderate | Good for most cases | Moderate |
+| **High** (2000-5000) | Higher | Better for deep pipelines | Slower |
+
+**When to increase**: High-capacity inference server, single-model workflows, memory not constrained
+
+**When to decrease**: Memory-constrained environments, development/debugging, complex multi-model pipelines
+
+### `max_parallel_requests` (InferenceParams)
+
+Controls concurrent LLM API calls **per model alias**.
+
+```python
+import data_designer.config as dd
+
+model = dd.ModelConfig(
+    alias="my-model",
+    model="nvidia/nemotron-3-nano-30b-a3b",
+    inference_parameters=dd.ChatCompletionInferenceParams(
+        max_parallel_requests=8,
+    ),
+)
+```
+
+**Default**: 4
+
+**When to increase**: Your inference backend has high throughput capacity, you're using a cloud API with generous rate limits, or you're running vLLM/TensorRT-LLM with multiple GPUs
+
+**When to decrease**: You're hitting rate limits or 429 errors, the inference server is overloaded, or you want more predictable/debuggable execution
+
+<Tip title="Finding the optimal value">
+The right value depends on your inference stack and model. Self-hosted vLLM servers can often handle values as high as 256, 512, or even 1024 depending on your hardware.
+
+**Benchmark approach**: Run a small dataset (e.g., 100 records) with increasing `max_parallel_requests` values (4 → 8 → 16 → 32 → ...) and measure generation time. Stop increasing when the runtime stops decreasing—that's when your inference server is saturated.
+</Tip>
+
+### `non_inference_max_parallel_workers` (RunConfig)
+
+Controls thread pool size for non-LLM operations (samplers, expressions, validators).
+
+```python
+run_config = dd.RunConfig(non_inference_max_parallel_workers=8)
+designer.set_run_config(run_config)
+```
+
+**Default**: 4
+
+**When to increase**: Many CPU-bound columns (complex expressions, heavy sampling)
+
+### Error Handling (RunConfig)
+
+Control retry behavior and early shutdown for failed generations.
+
+```python
+run_config = dd.RunConfig(
+    max_conversation_restarts=5,           # Full conversation restarts (default: 5)
+    max_conversation_correction_steps=0,   # In-conversation corrections (default: 0)
+    disable_early_shutdown=False,          # Enable early shutdown (default)
+    shutdown_error_rate=0.5,               # Shut down if >50% errors
+    shutdown_error_window=10,              # Min tasks before error monitoring
+)
+designer.set_run_config(run_config)
+```
+
+**When to adjust**:
+
+- **Strict schemas**: Increase `max_conversation_restarts` to 7, add `max_conversation_correction_steps=2`
+- **Debugging**: Set `disable_early_shutdown=True` to see all errors
+- **Simple text**: Reduce `max_conversation_restarts` to 3
+
+## Common Problems
+
+| Problem | Symptom | Solution |
+|---------|---------|----------|
+| **Low throughput** | Low GPU utilization | Increase `max_parallel_requests` and/or `buffer_size` |
+| **Long tail of slow generations** | Most records fast, few very slow | Reduce `max_conversation_restarts`, simplify schemas, improve prompts |
+| **Multi-model idle periods** | One model busy, others idle | Reduce `buffer_size` for faster cycling, or consolidate models |
+| **Memory errors** | OOM crashes | Reduce `buffer_size` and `max_parallel_requests` |
+| **Too many errors** | Generation fails frequently | Check prompts/schemas; adjust `shutdown_error_rate` or disable early shutdown for debugging |
+
+## Tuning Workflow
+
+1. **Start with defaults** for initial development
+2. **Profile your workload**: How many LLM columns? How many records? What models?
+3. **Identify bottleneck**: Low GPU util → increase `max_parallel_requests`. Memory issues → decrease `buffer_size`. Long tails → tune retry settings.
+4. **Iterate**: Make one change at a time, measure impact before next change
+
+## Related Documentation
+
+- [Deployment Options](/docs/concepts/deployment-options): Choosing between library and microservice
+- [Model Configuration](/docs/concepts/models/model-configs): Complete model settings reference
+- [Inference Parameters](/docs/concepts/models/inference-parameters): Detailed parameter reference
diff --git a/fern/v0.5.0/pages/concepts/columns.mdx b/fern/v0.5.0/pages/concepts/columns.mdx
new file mode 100644
index 000000000..2bd7b9c7c
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/columns.mdx
@@ -0,0 +1,228 @@
+---
+title: Columns
+description: The fundamental building blocks in Data Designer for defining dataset fields.
+---
+
+Columns are the fundamental building blocks in Data Designer. Each column represents a field in your dataset and defines how to generate it—whether that's sampling from a distribution, calling an LLM, or applying a transformation.
+
+<Note title="The Declarative Approach">
+Columns are **declarative specifications**. You describe *what* you want, and the framework handles *how* to generate it—managing execution order, batching, parallelization, and resources automatically.
+</Note>
+
+## Column Types
+
+Data Designer provides eleven built-in column types, each optimized for different generation scenarios.
+
+### 🎲 Sampler Columns
+
+Sampler columns generate data using numerical sampling—fast, deterministic, and ideal for numerical and categorical dataset fields. They're significantly faster than LLMs and can produce data following specific distributions (Poisson for event counts, Gaussian for measurements, etc.).
+
+Available sampler types:
+
+- **UUID**: Unique identifiers
+- **Category**: Categorical values with optional probability weights
+- **Subcategory**: Hierarchical categorical data (states within countries, models within brands)
+- **Uniform**: Evenly distributed numbers (integers or floats)
+- **Gaussian**: Normally distributed values with configurable mean and standard deviation
+- **Bernoulli**: Binary outcomes with specified success probability
+- **Bernoulli Mixture**: Binary outcomes from multiple probability components
+- **Binomial**: Count of successes in repeated trials
+- **Poisson**: Count data and event frequencies
+- **Scipy**: Access to the full scipy.stats distribution library
+- **Person**: Realistic synthetic individuals with names, demographics, and attributes
+- **Datetime**: Timestamps within specified ranges
+- **Timedelta**: Time duration values
+
+<Tip title="Conditional Sampling">
+Samplers support **conditional parameters** that change behavior based on other columns. Want age distributions that vary by country? Income ranges that depend on occupation? Just define conditions on existing column values.
+</Tip>
+
+### 📝 LLM-Text Columns
+
+LLM-Text columns generate natural language text: product descriptions, customer reviews, narrative summaries, email threads, or anything requiring semantic understanding and creativity.
+
+Use **Jinja2 templating** in prompts to reference other columns. Data Designer automatically manages dependencies and injects the referenced column values into the prompt.
+
+<Note title="Generation Traces">
+LLM columns can optionally capture message traces in a separate `{column_name}__trace` column. Set `with_trace` on the column config to control what's captured: `TraceType.NONE` (default, no trace), `TraceType.LAST_MESSAGE` (final assistant message only), or `TraceType.ALL_MESSAGES` (full conversation history). The trace includes the ordered message history for the final generation attempt (system/user/assistant/tool calls/tool results), and may include model reasoning fields when the provider exposes them.
+</Note>
+
+<Tip title="Extracting Reasoning Content">
+Some models expose chain-of-thought reasoning separately from the main response via a `reasoning_content` field. To capture only this reasoning (without the full trace), set `extract_reasoning_content=True`:
+
+```python
+dd.LLMTextColumnConfig(
+    name="answer",
+    model_alias="reasoning-model",
+    prompt="Solve this problem: {{ problem }}",
+    extract_reasoning_content=True,  # Creates answer__reasoning_content column
+)
+```
+
+This creates a `{column_name}__reasoning_content` column containing the stripped reasoning content from the final assistant response, or `None` if the model didn't provide reasoning. This is independent of `with_trace`—you can use either or both.
+</Tip>
+
+<Tip title="Tool Use in LLM Columns">
+LLM columns can invoke external tools during generation via MCP (Model Context Protocol). Enable tools by setting `tool_alias` to reference a configured `ToolConfig`:
+
+```python
+dd.LLMTextColumnConfig(
+    name="answer",
+    model_alias="nvidia-text",
+    prompt="Search for information and answer: {{ question }}",
+    tool_alias="search-tools",  # References a ToolConfig
+    with_trace=dd.TraceType.ALL_MESSAGES,  # Capture tool call history
+)
+```
+
+When `tool_alias` is set, the model can request tool calls during generation. Data Designer executes the tools via configured MCP providers and feeds results back until the model produces a final answer. See [Tool Use & MCP](/docs/concepts/tool-use-and-mcp) for full configuration details.
+</Tip>
+
+<Tip title="Performance">
+LLM columns are parallelized within each batch using `max_parallel_requests` from your model's inference parameters. See the [Architecture & Performance](/docs/concepts/architecture-and-performance) guide for optimization strategies.
+</Tip>
+
+### 💻 LLM-Code Columns
+
+LLM-Code columns generate code in specific programming languages. They handle the prompting and parsing necessary to extract clean code from the LLM's response—automatically detecting and extracting code from markdown blocks. You provide the prompt and choose the model; the column handles the extraction.
+
+Supported languages: **Bash, C, C++, C#, COBOL, Go, Java, JavaScript, Kotlin, Python, Ruby, Rust, Scala, Swift, TypeScript**, plus **SQL** dialects (SQLite, PostgreSQL, MySQL, T-SQL, BigQuery, ANSI SQL).
+
+### 🗂️ LLM-Structured Columns
+
+LLM-Structured columns generate JSON with a *guaranteed schema*. Define your structure using a Pydantic model or JSON schema, and Data Designer ensures the LLM output conforms—no parsing errors, no schema drift.
+
+Use for complex nested structures: API responses, configuration files, database records with multiple related fields, or any structured data where type safety matters. Schemas can be arbitrarily complex with nested objects, arrays, enums, and validation constraints, but success depends on the model's capabilities.
+
+<Tip title="Schema Complexity and Model Choice">
+Flat schemas with simple fields are easier and more robustly produced across models. Deeply nested schemas with complex validation constraints are more sensitive to model choice—stronger models handle complexity better. If you're experiencing schema conformance issues, try simplifying the schema or switching to a more capable model.
+</Tip>
+
+### ⚖️ LLM-Judge Columns
+
+LLM-Judge columns score generated content across multiple quality dimensions using LLMs as evaluators.
+
+Define scoring rubrics (relevance, accuracy, fluency, helpfulness) and the judge model evaluates each record. Score rubrics specify criteria and scoring options (1-5 scales, categorical grades, etc.), producing quantified quality metrics for every data point.
+
+Use judge columns for data quality filtering (e.g., keep only 4+ rated responses), A/B testing generation strategies, and quality monitoring over time.
+
+### 🖼️ Image Columns
+
+Image columns generate images from text prompts using either **diffusion** models (DALL·E, Stable Diffusion, Imagen) or **autoregressive** models (Gemini image, GPT image).
+
+Use **Jinja2 templating** in the prompt to reference other columns, driving diversity across generated images. For example, reference sampled attributes like style, subject, and composition to produce varied images without manually writing different prompts.
+
+Image columns require a model configured with `ImageInferenceParams`. Model-specific options (size, quality, aspect ratio) are passed via `extra_body` in the inference parameters.
+
+**Output modes:**
+
+- **Preview** (`data_designer.preview()`): Images are stored as base64-encoded strings directly in the DataFrame for quick iteration
+- **Create** (`data_designer.create()`): Images are saved to disk in an `images/<column_name>/` folder with UUID filenames; the DataFrame stores relative paths
+
+Image columns also support `multi_modal_context` for autoregressive models that accept image inputs, enabling image-to-image generation workflows.
+
+<Tip title="Tutorials">
+The image tutorials cover three workflows: [Providing Images as Context](/docs/tutorials/images-as-context) (image → text), Generating Images (text → image), and Editing Images with Image Context (image → image).
+</Tip>
+
+### 🧬 Embedding Columns
+
+Embedding columns generate vector embeddings (numerical representations) for text content using embedding models. These embeddings capture semantic meaning, enabling similarity search, clustering, and semantic analysis.
+
+Specify a `target_column` containing text, and Data Designer generates embeddings for that content. The target column can contain either a single text string or a list of text strings in stringified JSON format. In the latter case, embeddings are generated for each text string in the list.
+
+Common use cases:
+
+- **Semantic search**: Generate embeddings for documents, then find similar content by vector similarity
+- **Clustering**: Group similar texts based on embedding proximity
+- **Recommendation systems**: Match content by semantic similarity
+- **Anomaly detection**: Identify outliers in embedding space
+
+<Note title="Embedding Models">
+Embedding columns require an embedding model configured with `EmbeddingInferenceParams`. These models differ from chat completion models—they output vectors rather than text. The generation type is automatically determined by the inference parameters type.
+</Note>
+
+### 🧩 Expression Columns
+
+Expression columns handle simple transformations using **Jinja2 templates**—concatenate first and last names, calculate numerical totals, format date strings. No LLM overhead needed.
+
+Template capabilities:
+
+- **Variable substitution**: Pull values from any existing column
+- **String filters**: Uppercase, lowercase, strip whitespace, replace patterns
+- **Conditional logic**: if/elif/else support
+- **Arithmetic**: Add, subtract, multiply, divide
+
+### 🔍 Validation Columns
+
+Validation columns check generated content against rules and return structured pass/fail results.
+
+Built-in validation types:
+
+**Code validation** runs Python or SQL code through a linter to validate the code.
+
+**Local callable validation** accepts a Python function directly when using Data Designer as a library.
+
+**Remote validation** sends data to HTTP endpoints for validation-as-a-service. Useful for linters, security scanners, or proprietary systems.
+
+### 🌱 Seed Dataset Columns
+
+Seed dataset columns bootstrap generation from existing data. Provide a real dataset, and those columns become available as context for generating new synthetic data.
+
+Typical pattern: use seed data for one part of your schema (real product names and categories), then generate synthetic fields around it (customer reviews, purchase histories, ratings). The seed data provides realism and constraints; generated columns add volume and variation.
+
+### 🔧 Custom Columns
+
+Custom columns let you implement your own generation logic using Python functions. Use the `@custom_column_generator` decorator to declare dependencies, and the framework handles DAG ordering and parallelization.
+
+Two generation strategies:
+
+- **`cell_by_cell`** (default): Function receives one row, framework parallelizes
+- **`full_column`**: Function receives entire DataFrame for vectorized operations
+
+For LLM access, declare `model_aliases` in the decorator and receive a `models` dict as the third argument. See [Custom Columns](/docs/concepts/custom-columns) for details.
+
+## Shared Column Properties
+
+Every column configuration inherits from `SingleColumnConfig` with these standard properties:
+
+### `name`
+
+The column's identifier—unique within your configuration, used in Jinja2 references, and becomes the column name in the output DataFrame. Choose descriptive names: `user_review` > `col_17`.
+
+### `drop`
+
+Boolean flag (default: `False`) controlling whether the column appears in final output. Setting `drop=True` generates the column (available as a dependency) but excludes it from final output.
+
+**When to drop columns:**
+
+- Intermediate calculations that feed expressions but aren't meaningful standalone
+- Context columns used only for LLM prompt templates
+- Validation results during development unwanted in production
+
+Dropped columns participate fully in generation and the dependency graph—just filtered out at the end.
+
+### `column_type`
+
+Literal string identifying the column type: `"sampler"`, `"llm-text"`, `"expression"`, etc. Set automatically by each configuration class and serves as Pydantic's discriminator for deserialization.
+
+You rarely set this manually—instantiating `LLMTextColumnConfig` automatically sets `column_type="llm-text"`. Serialization is reversible: save to YAML, load later, and Pydantic reconstructs the exact objects.
+
+### `required_columns`
+
+Computed property listing columns that must be generated before this one. The framework derives this automatically:
+
+- For LLM/Expression columns: extracted from Jinja2 template `{{ variables }}`
+- For Validation columns: explicitly listed target columns
+- For Sampler columns with conditional parameters: columns referenced in conditions
+
+You read this property for introspection but never set it—always computed from configuration details.
+
+### `side_effect_columns`
+
+Computed property listing columns created implicitly alongside the primary column. Currently, only LLM columns produce side effects:
+
+- `{name}__trace`: Created when `with_trace` is not `TraceType.NONE` on the column.
+- `{name}__reasoning_content`: Created when `extract_reasoning_content=True` on the column.
+
+For detailed information on each column type, refer to the [column configuration API reference](/api/column-configs).
diff --git a/fern/v0.5.0/pages/concepts/custom-columns.mdx b/fern/v0.5.0/pages/concepts/custom-columns.mdx
new file mode 100644
index 000000000..54badd06d
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/custom-columns.mdx
@@ -0,0 +1,132 @@
+---
+title: Custom Columns
+description: Implement your own generation logic using Python functions.
+---
+
+Custom columns let you implement your own generation logic using Python functions. Use them for multi-step LLM workflows, external API integration, or any scenario requiring full programmatic control. For reusable, distributable components, see [Plugins](/docs/plugins/overview) instead.
+
+## Quick Start
+
+```python
+import data_designer.config as dd
+
+@dd.custom_column_generator(required_columns=["name"])
+def create_greeting(row: dict) -> dict:
+    row["greeting"] = f"Hello, {row['name']}!"
+    return row
+
+config_builder.add_column(
+    dd.CustomColumnConfig(
+        name="greeting",
+        generator_function=create_greeting,
+    )
+)
+```
+
+## Function Signatures
+
+Three signatures are supported. **Parameter names are validated**:
+
+| Args | Signature | Use Case |
+|------|-----------|----------|
+| 1 | `fn(row) -> dict` | Simple transforms |
+| 2 | `fn(row, generator_params) -> dict` | With typed params |
+| 3 | `fn(row, generator_params, models) -> dict` | LLM access via models dict |
+
+For `full_column` strategy, use `df` instead of `row`.
+
+For LLM access without params, use `generator_params: None`:
+
+```python
+@dd.custom_column_generator(required_columns=["name"], model_aliases=["my-model"])
+def generate_message(row: dict, generator_params: None, models: dict) -> dict:
+    response, _ = models["my-model"].generate(prompt=f"Greet {row['name']}")
+    row["greeting"] = response
+    return row
+```
+
+Model aliases are validated before generation starts. If an alias doesn't exist in your config, an error is raised during the health check.
+
+## Generation Strategies
+
+| Strategy | Input | Use Case |
+|----------|-------|----------|
+| `cell_by_cell` (default) | `row: dict` | LLM calls, row-by-row logic |
+| `full_column` | `df: DataFrame` | Vectorized DataFrame operations |
+
+**Recommendation:** Use `cell_by_cell` for LLM calls. The framework handles parallelization automatically. Use `full_column` only for vectorized operations that don't involve LLM calls.
+
+For `full_column`, set `generation_strategy=dd.GenerationStrategy.FULL_COLUMN`.
+
+## The Decorator
+
+```python
+@dd.custom_column_generator(
+    required_columns=["col1"],        # DAG ordering
+    side_effect_columns=["extra"],    # Additional columns created
+    model_aliases=["model1"],         # Required for LLM access
+)
+```
+
+## Models Dict
+
+The third argument is a dict of `ModelFacade` instances, keyed by alias. **You must declare all models required in your custom column generator in `model_aliases`** - this populates the `models` dict and enables health checks before generation starts.
+
+```python
+@dd.custom_column_generator(model_aliases=["my-model"])
+def my_generator(row: dict, generator_params: None, models: dict) -> dict:
+    model = models["my-model"]
+    response, trace = model.generate(
+        prompt="...",
+        parser=my_custom_parser,  # optional, defaults to identity
+        system_prompt="...",
+        max_correction_steps=3,
+    )
+    row["result"] = response
+    return row
+```
+
+This gives you direct access to all `ModelFacade` capabilities: custom parsers, correction loops, structured output, tool use, etc.
+
+## Configuration
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `name` | str | Yes | Column name |
+| `generator_function` | Callable | Yes | Decorated function |
+| `generation_strategy` | GenerationStrategy | No | `CELL_BY_CELL` or `FULL_COLUMN` |
+| `generator_params` | BaseModel | No | Typed params passed to function |
+
+## Multi-Turn Example
+
+```python
+@dd.custom_column_generator(
+    required_columns=["topic"],
+    side_effect_columns=["draft", "critique"],
+    model_aliases=["writer", "editor"],
+)
+def writer_editor(row: dict, generator_params: None, models: dict) -> dict:
+    draft, _ = models["writer"].generate(prompt=f"Write about '{row['topic']}'")
+    critique, _ = models["editor"].generate(prompt=f"Critique: {draft}")
+    revised, _ = models["writer"].generate(prompt=f"Revise based on: {critique}\n\nOriginal: {draft}")
+
+    row["final_text"] = revised
+    row["draft"] = draft
+    row["critique"] = critique
+    return row
+```
+
+## Development Testing
+
+Test generators with real LLM calls without running the full pipeline:
+
+```python
+data_designer = DataDesigner()
+models = data_designer.get_models(["my-model"])
+result = my_generator({"name": "Alice"}, None, models)
+```
+
+## See Also
+
+- [Column Configs Reference](/api/column-configs)
+- [Plugins Overview](/docs/plugins/overview)
diff --git a/fern/v0.5.0/pages/concepts/deployment-options.mdx b/fern/v0.5.0/pages/concepts/deployment-options.mdx
new file mode 100644
index 000000000..ec5524444
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/deployment-options.mdx
@@ -0,0 +1,171 @@
+---
+title: Deployment Options
+description: Choose between the open-source library and NeMo Microservice deployment.
+---
+
+Data Designer is available as both an **open-source library** and a **NeMo Microservice**. This guide helps you choose the right deployment option for your use case.
+
+## Deployment Architectures at a Glance
+
+Data Designer supports three main deployment patterns:
+
+<Cards>
+  <Card title="Library + Your LLM Provider" icon="laptop">
+    Each user runs the library locally and connects to their choice of LLM provider.
+
+    ![Library with Decentralized Providers](/assets/images/deployment-library-decentralized.png)
+  </Card>
+  <Card title="Library + Enterprise Gateway" icon="building">
+    Users run the library locally but share a centralized enterprise LLM gateway with RBAC and governance.
+
+    ![Library with Enterprise Gateway](/assets/images/deployment-enterprise-gateway.png)
+  </Card>
+  <Card title="SDG as a Service (Microservice)" icon="cloud">
+    A centralized SDG service that multiple users access via REST API.
+
+    ![SDG Microservice](/assets/images/deployment-microservice.png)
+  </Card>
+</Cards>
+
+## Quick Comparison
+
+| Aspect | Open-Source Library | NeMo Microservice |
+|--------|---------------------|-------------------|
+| **What it is** | Python package you import and run | REST API service exposing `preview` and `create` methods |
+| **Best for** | Developers with LLM access who want flexibility and customization | Teams using NeMo Microservices platform |
+| **LLM Access** | You provide (any OpenAI-compatible API) | Integrated with NeMo Microservices Platform |
+| **Installation** | `pip install data-designer` | Deploy via NeMo Microservices platform |
+| **Scaling** | You manage inference capacity | Managed alongside other NeMo services |
+
+<Note title="Same Configuration API">
+Both the library and microservice use the **same `DataDesignerConfigBuilder` API**. Start with the library, and your configurations migrate seamlessly if you later adopt the NeMo platform.
+</Note>
+
+## When to Use the Open-Source Library
+
+The library is the right choice for most users. Choose it if you:
+
+### You Have Access to LLMs
+
+![Library with Decentralized Providers](/assets/images/deployment-library-decentralized.png)
+
+You have API keys or endpoints for LLM inference:
+
+- **Cloud APIs**: NVIDIA API Catalog (build.nvidia.com), OpenAI, Azure OpenAI, Anthropic
+- **Self-hosted**: vLLM, TGI, TensorRT-LLM, or any OpenAI-compatible server
+- **Enterprise gateways**: Centralized LLM gateway with RBAC, rate limiting, or other enterprise features
+
+```python
+from data_designer.interface import DataDesigner
+from data_designer.config import ModelConfig
+
+# Use any OpenAI-compatible endpoint
+model = ModelConfig(
+    alias="my-model",
+    model="nvidia/nemotron-3-nano-30b-a3b",
+    provider="nvidia",  # or "openai", or a custom ModelProvider
+)
+
+dd = DataDesigner()
+# Your code controls the full workflow
+```
+
+### You Need Maximum Flexibility
+
+- **Custom plugins**: Extend Data Designer with custom column generators, validators, or processors
+- **Local development**: Rapid iteration with immediate feedback
+- **Integration**: Embed Data Designer into existing Python pipelines or notebooks
+- **Experimentation**: Research workflows with custom models or configurations
+
+### You Already Have Enterprise LLM Infrastructure
+
+![Library with Enterprise Gateway](/assets/images/deployment-enterprise-gateway.png)
+
+<Tip title="Library + Enterprise LLM Gateway">
+Many enterprises already have centralized LLM access through API gateways with:
+
+- Role-based access control (RBAC)
+- Rate limiting and quotas
+- Audit logging
+- Cost allocation
+
+In this case, **use the library** and point it at your enterprise gateway. You get enterprise-grade LLM access while retaining full control over your Data Designer workflows.
+</Tip>
+
+```python
+from data_designer.config import ModelConfig, ModelProvider
+
+# Define your enterprise gateway as a provider
+enterprise_provider = ModelProvider(
+    name="enterprise-gateway",
+    endpoint="https://llm-gateway.yourcompany.com/v1",
+    api_key="ENTERPRISE_LLM_KEY",  # Environment variable name (uppercase) or actual key
+)
+
+# Use the provider in your model config
+model = ModelConfig(
+    alias="enterprise-llm",
+    model="gpt-4",
+    provider="enterprise-gateway",  # References the provider above
+)
+```
+
+## When to Use the Microservice
+
+![SDG Microservice](/assets/images/deployment-microservice.png)
+
+The NeMo Microservice exposes Data Designer's `preview` and `create` methods as REST API endpoints. Choose it if you:
+
+### You're Using the NeMo Microservices Platform
+
+The primary value of the microservice is **integration with other NeMo Microservices**:
+
+- **NeMo Inference Microservices (NIMs)**: Seamless integration with NVIDIA's optimized inference endpoints
+- **NeMo Customizer**: Generate synthetic data for model fine-tuning workflows
+- **NeMo Evaluator**: Create evaluation datasets alongside model assessment
+- **Unified deployment**: Single platform for your entire AI pipeline
+
+### You Want to Expose SDG as a Team Service
+
+If you need to provide synthetic data generation as a shared service:
+
+- **Multi-tenant access**: Multiple teams submit generation jobs via API
+- **Job management**: Queue, monitor, and manage generation jobs centrally
+- **Resource sharing**: Shared infrastructure for SDG workloads
+
+## Decision Flowchart
+
+```
+                    ┌─────────────────────────┐
+                    │ Are you using the NeMo  │
+                    │ Microservices platform? │
+                    └───────────┬─────────────┘
+                                │
+                    ┌───────────┴───────────┐
+                    ▼                       ▼
+                   YES                      NO
+                    │                       │
+                    ▼                       ▼
+        ┌───────────────────┐   ┌───────────────────────────┐
+        │ Use Microservice  │   │ Do you need to expose SDG │
+        │                   │   │ as a shared REST service? │
+        │ Integrates with   │   └─────────────┬─────────────┘
+        │ NIMs, Customizer, │                 │
+        │ Evaluator         │     ┌───────────┴───────────┐
+        └───────────────────┘     ▼                       ▼
+                                 YES                      NO
+                                  │                       │
+                                  ▼                       ▼
+                      ┌─────────────────────┐   ┌─────────────────┐
+                      │ Consider if the     │   │ Use the Library │
+                      │ overhead is worth   │   │                 │
+                      │ it vs. library +    │   │ Most flexible   │
+                      │ enterprise gateway  │   │ option for      │
+                      └─────────────────────┘   │ direct use      │
+                                                └─────────────────┘
+```
+
+## Learn More
+
+- **Library**: Continue with this documentation
+- **Microservice**: See the [NeMo Data Designer Microservice documentation](https://docs.nvidia.com/nemo/microservices/latest/design-synthetic-data-from-scratch-or-seeds/index.html)
diff --git a/fern/v0.5.0/pages/concepts/mcp/configure-mcp-cli.mdx b/fern/v0.5.0/pages/concepts/mcp/configure-mcp-cli.mdx
new file mode 100644
index 000000000..761607905
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/mcp/configure-mcp-cli.mdx
@@ -0,0 +1,142 @@
+---
+title: Configuring MCP Using the CLI
+description: Use the interactive CLI to create and manage MCP providers and tool configurations.
+---
+
+The Data Designer CLI provides an interactive interface for creating and managing MCP providers and tool configurations stored in your Data Designer home directory (default: `~/.data-designer/`).
+
+## Configuration Files
+
+The CLI manages two YAML configuration files for MCP:
+
+- **`mcp_providers.yaml`**: MCP provider configurations
+- **`tool_configs.yaml`**: Tool configurations
+
+<Note title="Custom Directory">
+You can customize the configuration directory location with the `DATA_DESIGNER_HOME` environment variable:
+
+```bash
+export DATA_DESIGNER_HOME="/path/to/your/custom/directory"
+```
+</Note>
+
+## CLI Commands
+
+The Data Designer CLI provides commands for MCP configuration:
+
+```bash
+# Configure MCP providers
+data-designer config mcp
+
+# Configure tool configs
+data-designer config tools
+
+# List all configurations (including MCP)
+data-designer config list
+```
+
+<Tip title="Getting help">
+See available commands:
+
+```bash
+data-designer config --help
+```
+</Tip>
+
+## Configuring MCP Providers
+
+Run the interactive MCP provider configuration command:
+
+```bash
+data-designer config mcp
+```
+
+### Provider Type Selection
+
+The wizard first asks you to choose a provider type:
+
+1. **Remote SSE**: Connect to a pre-existing MCP server via HTTP Server-Sent Events
+2. **Local stdio subprocess**: Launch an MCP server as a subprocess
+
+### Remote SSE Configuration
+
+When configuring a Remote SSE provider, you'll be prompted for:
+
+- **Name**: Unique identifier (e.g., `"doc-search"`)
+- **Endpoint**: SSE endpoint URL (e.g., `"http://localhost:8080/sse"`)
+- **API Key**: Optional API key or environment variable name
+
+### Local Stdio Configuration
+
+When configuring a Local stdio provider, you'll be prompted for:
+
+- **Name**: Unique identifier (e.g., `"local-tools"`)
+- **Command**: Executable to run (e.g., `"python"`)
+- **Arguments**: Command-line arguments (e.g., `"-m my_mcp_server"`)
+- **Environment Variables**: Optional environment variables for the subprocess
+
+### Available Operations
+
+- **Add a new provider**: Define a new MCP provider
+- **Update an existing provider**: Modify provider settings
+- **Delete a provider**: Remove a provider
+- **Delete all providers**: Remove all MCP providers
+
+## Configuring Tool Configs
+
+Run the interactive tool configuration command:
+
+```bash
+data-designer config tools
+```
+
+<Info title="Provider Required">
+You need at least one MCP provider configured before adding tool configs. Run `data-designer config mcp` first if none exist.
+</Info>
+
+### Configuration Options
+
+When creating a tool config, you'll be prompted for:
+
+- **Tool Alias**: Unique name for referencing in columns (e.g., `"my-tools"`)
+- **Providers**: Select one or more MCP providers (checkbox selection)
+- **Allowed Tools**: Optionally restrict to specific tools (leave empty for all)
+- **Max Tool Call Turns**: Maximum tool-calling iterations (default: 5)
+- **Timeout**: Per-call timeout in seconds (default: 60.0)
+
+### Available Operations
+
+- **Add a new tool config**: Define a new tool configuration
+- **Update an existing tool config**: Modify settings
+- **Delete a tool config**: Remove a tool configuration
+- **Delete all tool configs**: Remove all tool configurations
+
+## Listing Configurations
+
+View all current configurations:
+
+```bash
+data-designer config list
+```
+
+This command displays:
+
+- **Model Providers**: All configured model providers
+- **Model Configurations**: All configured models
+- **MCP Providers**: All configured MCP providers with their endpoints
+- **Tool Configurations**: All configured tool configs with their settings
+
+## Manual Editing
+
+You can also edit the YAML files directly for advanced configurations. The files are located at:
+
+- `~/.data-designer/mcp_providers.yaml`
+- `~/.data-designer/tool_configs.yaml`
+
+After manual edits, the changes take effect the next time you initialize `DataDesigner`.
+
+## See Also
+
+- **[MCP Providers](/docs/concepts/mcp/mcp-providers)**: Learn about provider configuration options
+- **[Tool Configurations](/docs/concepts/mcp/tool-configs)**: Learn about tool config options
+- **[Configure Model Settings with the CLI](/docs/concepts/models/configure-with-cli)**: CLI guide for model configuration
diff --git a/fern/v0.5.0/pages/concepts/mcp/enabling-tools.mdx b/fern/v0.5.0/pages/concepts/mcp/enabling-tools.mdx
new file mode 100644
index 000000000..9fc682d61
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/mcp/enabling-tools.mdx
@@ -0,0 +1,108 @@
+---
+title: Enabling Tools on Columns
+description: Connect LLM columns to tool configurations using the tool_alias parameter.
+---
+
+This guide explains how to enable tool use on LLM columns by connecting them to tool configurations via the `tool_alias` parameter.
+
+## Overview
+
+To enable tool use on an LLM column, you reference a `ToolConfig` by its alias. During generation, the model can then request tool calls, and Data Designer executes them and feeds the results back to the model.
+
+## Using tool_alias
+
+Add the `tool_alias` parameter to any supported LLM column configuration:
+
+```python
+import data_designer.config as dd
+
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="answer",
+        prompt="Use tools as needed to answer: {{ question }}",
+        model_alias="nvidia-text",
+        tool_alias="my-tools",  # References a ToolConfig
+    )
+)
+```
+
+## Supported Column Types
+
+Tool use is supported on these column configuration types:
+
+| Column Type | Description |
+|------------|-------------|
+| `LLMTextColumnConfig` | Text generation with tool access |
+| `LLMCodeColumnConfig` | Code generation with tool access |
+| `LLMStructuredColumnConfig` | Structured JSON generation with tool access |
+| `LLMJudgeColumnConfig` | Judge/scoring with tool access |
+
+## How It Works
+
+When `tool_alias` is specified:
+
+1. **Tool schemas are fetched** from the referenced MCP providers
+2. **Model receives tool schemas** with the prompt
+3. **Model can request tool calls** in its response
+4. **Data Designer executes calls** and returns results to the model
+5. **Iteration continues** until the model produces a final answer (or limits are reached)
+
+## Complete Example
+
+Here's a complete workflow showing provider → ToolConfig → column:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# 1. Configure MCP provider
+mcp_provider = dd.LocalStdioMCPProvider(
+    name="demo-mcp",
+    command="python",
+    args=["-m", "my_mcp_server"],
+)
+
+# 2. Create DataDesigner instance with provider
+data_designer = DataDesigner(mcp_providers=[mcp_provider])
+
+# 3. Define tool configuration
+tool_config = dd.ToolConfig(
+    tool_alias="my-tools",
+    providers=["demo-mcp"],
+    allow_tools=["search_docs", "get_fact"],
+    max_tool_call_turns=5,
+)
+
+# 4. Create config builder with tool config
+builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
+
+# 5. Add columns that use tools
+builder.add_column(
+    dd.SamplerColumnConfig(
+        name="question",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=["What is machine learning?", "Explain neural networks"]
+        ),
+    )
+)
+
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="answer",
+        prompt="Use the available tools to research and answer: {{ question }}",
+        model_alias="nvidia-text",
+        tool_alias="my-tools",  # Enable tools
+        with_trace=dd.TraceType.ALL_MESSAGES,  # Capture tool call history
+    )
+)
+
+# 6. Generate data
+results = data_designer.preview(builder, num_records=5)
+```
+
+## See Also
+
+- **[Tool Configurations](/docs/concepts/mcp/tool-configs)**: Configure tool access and limits
+- **[Traces](/docs/concepts/traces)**: Capture and inspect tool call history
+- **[MCP Providers](/docs/concepts/mcp/mcp-providers)**: Configure MCP server connections
diff --git a/fern/v0.5.0/pages/concepts/mcp/mcp-providers.mdx b/fern/v0.5.0/pages/concepts/mcp/mcp-providers.mdx
new file mode 100644
index 000000000..92970fe98
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/mcp/mcp-providers.mdx
@@ -0,0 +1,147 @@
+---
+title: MCP Providers
+description: Configure local subprocess or remote SSE connections to MCP tool servers.
+---
+
+MCP providers are external services that host and serve tools via the Model Context Protocol (MCP). Data Designer uses provider configurations to establish connections to these services.
+
+## Overview
+
+An MCP provider defines how Data Designer connects to a tool server. Data Designer supports two provider types:
+
+| Provider Class | Connection Method | Use Case |
+|---------------|-------------------|----------|
+| `MCPProvider` | HTTP Server-Sent Events | Connect to a pre-existing MCP server |
+| `LocalStdioMCPProvider` | Subprocess via stdin/stdout | Launch an MCP server as a subprocess |
+
+When you create a `ToolConfig`, you reference providers by name, and Data Designer uses those provider settings to communicate with the appropriate MCP servers.
+
+## MCPProvider (Remote SSE)
+
+Use `MCPProvider` to connect to a pre-existing MCP server via Server-Sent Events:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+mcp_provider = dd.MCPProvider(
+    name="remote-mcp",
+    endpoint="http://localhost:8080/sse",
+    api_key="MCP_API_KEY",  # Environment variable name
+)
+
+data_designer = DataDesigner(mcp_providers=[mcp_provider])
+```
+
+### MCPProvider Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | `str` | Yes | Unique identifier for the provider |
+| `endpoint` | `str` | Yes | SSE endpoint URL (e.g., `"http://localhost:8080/sse"`) |
+| `api_key` | `str` | No | API key or environment variable name |
+| `provider_type` | `str` | No | Always `"sse"` (set automatically) |
+
+## LocalStdioMCPProvider (Subprocess)
+
+Use `LocalStdioMCPProvider` to launch an MCP server as a subprocess:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+mcp_provider = dd.LocalStdioMCPProvider(
+    name="demo-mcp",
+    command="python",
+    args=["-m", "my_mcp_server_module"],
+    env={"MY_SERVICE_TOKEN": "..."},
+)
+
+data_designer = DataDesigner(mcp_providers=[mcp_provider])
+```
+
+### LocalStdioMCPProvider Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | `str` | Yes | Unique identifier for the provider |
+| `command` | `str` | Yes | Executable to run (e.g., `"python"`, `"node"`) |
+| `args` | `list[str]` | No | Command-line arguments |
+| `env` | `dict[str, str]` | No | Environment variables for the subprocess |
+| `provider_type` | `str` | No | Always `"stdio"` (set automatically) |
+
+## API Key Configuration
+
+The `api_key` field can be specified in two ways:
+
+1. **Environment variable name** (recommended): Set `api_key` to the name of an environment variable (e.g., `"MCP_API_KEY"`). Data Designer will resolve it at runtime.
+
+2. **Plain-text value**: Set `api_key` to the actual API key string. This is less secure and not recommended for production.
+
+```python
+# Method 1: Environment variable (recommended)
+provider = dd.MCPProvider(
+    name="secure-mcp",
+    endpoint="https://mcp.example.com/sse",
+    api_key="MCP_API_KEY",  # Will be resolved from environment
+)
+
+# Method 2: Direct value (not recommended)
+provider = dd.MCPProvider(
+    name="secure-mcp",
+    endpoint="https://mcp.example.com/sse",
+    api_key="actual-api-key-value",
+)
+```
+
+## YAML Configuration
+
+Both provider types use a `provider_type` discriminator field in YAML configurations. When writing YAML configs manually (e.g., in `~/.data-designer/mcp_providers.yaml`), include the discriminator:
+
+```yaml
+providers:
+  # Remote SSE provider
+  - name: doc-search
+    provider_type: sse
+    endpoint: http://localhost:8080/sse
+    api_key: ${MCP_API_KEY}
+
+  # Local stdio provider
+  - name: local-tools
+    provider_type: stdio
+    command: python
+    args:
+      - -m
+      - my_mcp_server
+    env:
+      DEBUG: "true"
+```
+
+## Using Multiple Providers
+
+You can configure multiple MCP providers and use them together in a single `ToolConfig`:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+providers = [
+    dd.MCPProvider(
+        name="doc-search-mcp",
+        endpoint="http://localhost:8080/sse",
+    ),
+    dd.LocalStdioMCPProvider(
+        name="calculator-mcp",
+        command="python",
+        args=["-m", "calculator_mcp"],
+    ),
+]
+
+data_designer = DataDesigner(mcp_providers=providers)
+```
+
+## See Also
+
+- **[Tool Configurations](/docs/concepts/mcp/tool-configs)**: Configure tool access with ToolConfig
+- **[Configure MCP with the CLI](/docs/concepts/mcp/configure-mcp-cli)**: Use the CLI to manage MCP providers
+- **[Enabling Tools on Columns](/docs/concepts/mcp/enabling-tools)**: Use tools in LLM columns
diff --git a/fern/v0.5.0/pages/concepts/mcp/safety-and-limits.mdx b/fern/v0.5.0/pages/concepts/mcp/safety-and-limits.mdx
new file mode 100644
index 000000000..98df523da
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/mcp/safety-and-limits.mdx
@@ -0,0 +1,148 @@
+---
+title: Safety and Limits
+description: Configure tool allowlists, turn budgets, and timeouts for safe tool use.
+---
+
+This guide covers the safety controls available for tool use, including allowlists, turn budgets, and timeouts. These controls help prevent runaway loops and ensure predictable generation behavior.
+
+## Overview
+
+When LLM columns use tools, the model can make multiple tool calls in a loop until it produces a final answer. Without limits, this could lead to:
+
+- Excessive API calls and costs
+- Long generation times
+- Infinite loops if the model keeps requesting tools
+
+Data Designer provides three types of controls:
+
+| Control | Purpose |
+|---------|---------|
+| **Tool allowlists** | Restrict which tools can be called |
+| **Turn budgets** | Limit iterations of tool-calling |
+| **Timeouts** | Cap individual tool call latency |
+
+## Tool Allowlists
+
+Restrict which tools are available using `allow_tools`:
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="restricted-tools",
+    providers=["demo-mcp"],
+    allow_tools=["search_docs", "list_docs"],  # Only these tools
+)
+```
+
+### Behavior
+
+| Setting | Behavior |
+|---------|----------|
+| `allow_tools=None` (default) | All tools from the providers are available |
+| `allow_tools=["tool1", "tool2"]` | Only the specified tools are available |
+
+Tools not in the allowlist won't be included in the schemas sent to the model, so the model won't know they exist.
+
+<Tip title="Use allowlists for untrusted tools">
+If your MCP providers expose tools that could be dangerous or expensive, use allowlists to restrict access to only the tools you need.
+</Tip>
+
+## Turn Budgets
+
+Limit the number of tool-calling iterations using `max_tool_call_turns`:
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="limited-tools",
+    providers=["demo-mcp"],
+    max_tool_call_turns=5,  # Maximum 5 iterations (default)
+)
+```
+
+### Understanding Turns
+
+A **turn** is one iteration where the LLM requests tool calls. With parallel tool calling, a single turn may execute multiple tools simultaneously.
+
+| Scenario | Turn Count |
+|----------|------------|
+| Model requests 1 tool | 1 turn |
+| Model requests 3 tools in parallel | 1 turn |
+| Model requests 1 tool, then 2 more, then 1 more | 3 turns |
+
+This approach gives models flexibility to use parallel calling efficiently while still bounding total iterations.
+
+### Graceful Budget Exhaustion
+
+When the turn limit is reached, Data Designer doesn't abruptly stop generation. Instead:
+
+1. The model's tool call request is recorded in the conversation
+2. Tool "results" are returned with a refusal message explaining the limit was reached
+3. The model receives this feedback and can produce a final response
+
+This ensures the model can still provide a useful answer based on the tools it already called, rather than failing silently.
+
+## Timeouts
+
+Limit how long each tool call can take using `timeout_sec`:
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="fast-tools",
+    providers=["demo-mcp"],
+    timeout_sec=30.0,  # 30 seconds per tool call
+)
+```
+
+### Timeout Behavior
+
+When a timeout occurs:
+
+1. The tool call is terminated
+2. An error message is returned to the model
+3. The model can attempt recovery (retry, skip, or answer without the result)
+
+```python
+# Example error in trace when timeout occurs
+{
+    "role": "tool",
+    "content": "Error: Tool 'search_docs' failed: Connection timeout after 30s",
+    "tool_call_id": "call_abc123"
+}
+```
+
+### Default Timeout
+
+The default timeout is 60 seconds. Adjust based on your tools:
+
+| Tool Type | Recommended Timeout |
+|-----------|---------------------|
+| Fast lookups | 5-10 seconds |
+| Database queries | 15-30 seconds |
+| External API calls | 30-60 seconds |
+| Complex computations | 60+ seconds |
+
+## Combining Controls
+
+You can use all controls together for defense in depth:
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="secure-tools",
+    providers=["demo-mcp"],
+    allow_tools=["search_docs", "get_fact"],  # Restricted tools
+    max_tool_call_turns=3,                     # Limited iterations
+    timeout_sec=15.0,                          # Fast timeout
+)
+```
+
+## See Also
+
+- **[Tool Configurations](/docs/concepts/mcp/tool-configs)**: Complete ToolConfig reference
+- **[Traces](/docs/concepts/traces)**: Monitor tool usage patterns
diff --git a/fern/v0.5.0/pages/concepts/mcp/tool-configs.mdx b/fern/v0.5.0/pages/concepts/mcp/tool-configs.mdx
new file mode 100644
index 000000000..6d9a477e9
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/mcp/tool-configs.mdx
@@ -0,0 +1,116 @@
+---
+title: Tool Configurations
+description: Define how LLM columns access external tools during generation with ToolConfig.
+---
+
+Tool configurations define how LLM columns access external tools during generation. Each `ToolConfig` specifies which MCP providers to use, which tools are allowed, and operational limits.
+
+## Overview
+
+A `ToolConfig` connects LLM columns to MCP providers. When you create column configurations (like `LLMTextColumnConfig` or `LLMCodeColumnConfig`), you reference a tool configuration by its alias. Data Designer uses the tool configuration to determine which tools are available and how to manage tool calls.
+
+## ToolConfig Structure
+
+The `ToolConfig` class has the following fields:
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `tool_alias` | `str` | Yes | Unique identifier for this tool configuration (referenced by columns) |
+| `providers` | `list[str]` | Yes | List of MCP provider names to use (can reference multiple providers) |
+| `allow_tools` | `list[str]` | No | Restrict to specific tools (`None` = allow all tools from providers) |
+| `max_tool_call_turns` | `int` | No | Maximum tool-calling iterations (default: 5) |
+| `timeout_sec` | `float` | No | Per-call timeout in seconds (default: 60.0) |
+
+## Examples
+
+### Basic Tool Configuration
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="my-tools",
+    providers=["demo-mcp"],
+)
+```
+
+### Restricting Allowed Tools
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="search-only",
+    providers=["demo-mcp"],
+    allow_tools=["search_docs", "list_docs"],  # Only these tools allowed
+)
+```
+
+### Using Multiple Providers
+
+A single `ToolConfig` can reference multiple MCP providers, allowing tools to be drawn from different sources:
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="multi-search",
+    providers=["doc-search-mcp", "web-search-mcp"],
+    allow_tools=["search_docs", "search_web", "list_docs"],
+    max_tool_call_turns=10,
+)
+```
+
+When the model requests a tool call, Data Designer automatically finds which provider hosts that tool and routes the call appropriately.
+
+### Setting Operational Limits
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="limited-tools",
+    providers=["demo-mcp"],
+    max_tool_call_turns=3,   # Maximum 3 tool-calling iterations
+    timeout_sec=30.0,        # 30 seconds per tool call
+)
+```
+
+## Adding to Config Builder
+
+Tool configurations can be added to the config builder in two ways:
+
+```python
+import data_designer.config as dd
+
+tool_config = dd.ToolConfig(
+    tool_alias="my-tools",
+    providers=["demo-mcp"],
+)
+
+# Method 1: Pass at initialization
+builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
+
+# Method 2: Add later
+builder = dd.DataDesignerConfigBuilder()
+builder.add_tool_config(tool_config)
+```
+
+## Understanding Turn-Based Limiting
+
+The `max_tool_call_turns` parameter limits how many tool-calling iterations (turns) are permitted, not the total number of individual tool calls.
+
+<Note title="Turn-based vs call-based counting">
+A **turn** is one iteration where the LLM requests tool calls. With parallel tool calling, a single turn may execute multiple tools simultaneously.
+
+For example, if the model requests 3 tools in parallel, that counts as 1 turn, not 3. This gives models flexibility to use parallel calling efficiently while still bounding total iterations.
+</Note>
+
+When the turn limit is reached, Data Designer gracefully refuses additional tool calls rather than failing abruptly. The model receives feedback explaining the limit was reached and can produce a final response based on the tools it already called.
+
+## See Also
+
+- **[MCP Providers](/docs/concepts/mcp/mcp-providers)**: Configure connections to MCP servers
+- **[Enabling Tools on Columns](/docs/concepts/mcp/enabling-tools)**: Reference tool configs from LLM columns
+- **[Safety and Limits](/docs/concepts/mcp/safety-and-limits)**: Detailed guide on tool safety controls
+- **[Configure MCP with the CLI](/docs/concepts/mcp/configure-mcp-cli)**: Use the CLI to manage tool configurations
diff --git a/fern/v0.5.0/pages/concepts/models/configure-with-cli.mdx b/fern/v0.5.0/pages/concepts/models/configure-with-cli.mdx
new file mode 100644
index 000000000..90b58bf1e
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/models/configure-with-cli.mdx
@@ -0,0 +1,148 @@
+---
+title: Configuring Model Settings Using The CLI
+description: Use the Data Designer CLI to manage model providers and configurations.
+---
+
+The Data Designer CLI provides an interactive interface for creating and managing default model providers and model configurations stored in your Data Designer home directory (default: `~/.data-designer/`).
+
+## Configuration Files
+
+The CLI manages two YAML configuration files:
+
+- **`model_providers.yaml`**: Model provider configurations
+- **`model_configs.yaml`**: Model configurations
+
+<Info title="Automatic Configuration">
+If these configuration files don't already exist, the Data Designer library automatically creates them with default settings at runtime when first initialized.
+</Info>
+
+<Note title="Custom Directory">
+You can customize the configuration directory location with the `DATA_DESIGNER_HOME` environment variable:
+
+```bash
+export DATA_DESIGNER_HOME="/path/to/your/custom/directory"
+```
+</Note>
+
+## CLI Commands
+
+The Data Designer CLI provides four main configuration commands:
+
+```bash
+# Configure model providers
+data-designer config providers
+
+# Configure models
+data-designer config models
+
+# List current configurations
+data-designer config list
+
+# Reset all configurations
+data-designer config reset
+```
+
+<Tip title="Getting help">
+See available commands
+
+```bash
+data-designer --help
+```
+
+See available sub-commands
+
+```bash
+data-designer config --help
+```
+</Tip>
+
+## Managing Model Providers
+
+Run the interactive provider configuration command:
+
+```bash
+data-designer config providers
+```
+
+### Available Operations
+
+**Add a new provider**: Define a new provider by entering its name, endpoint URL, provider type, and optionally an API key (as plain text or as an environment variable name).
+
+**Update an existing provider**: Modify an existing provider's settings. All fields are pre-filled with current values.
+
+**Delete a provider**: Remove a provider and its associated models.
+
+**Delete all providers**: Remove all providers and their associated models.
+
+**Change default provider**: Set which provider is used by default. This option is only available when multiple providers are configured.
+
+## Managing Model Configurations
+
+Run the interactive model configuration command:
+
+```bash
+data-designer config models
+```
+
+<Info title="Provider Required">
+You need at least one provider configured before adding models. Run `data-designer config providers` first if none exist.
+</Info>
+
+### Available Operations
+
+**Add a new model configuration**
+
+Create a new model configuration with the following fields:
+
+- **Alias**: A unique name for referencing this model in a column configuration.
+- **Model ID**: The model identifier (e.g., `nvidia/nemotron-3-nano-30b-a3b`)
+- **Provider**: Select from available providers (if multiple exist)
+- **Temperature**: Sampling temperature (0.0 to 2.0)
+- **Top P**: Nucleus sampling parameter (0.0 to 1.0)
+- **Max Tokens**: Maximum output length (1 to 100000)
+
+<Note title="Additional Settings">
+To configure additional inference parameter settings or use distribution-based inference parameters, edit the `model_configs.yaml` file directly.
+</Note>
+
+**Update an existing model configuration**: Modify an existing model's configuration. All fields are pre-filled with current values.
+
+**Delete a model configuration**: Remove a single model configuration.
+
+**Delete all model configurations**: Remove all model configurations. The CLI will ask for confirmation before proceeding.
+
+## Listing Configurations
+
+View all current configurations:
+
+```bash
+data-designer config list
+```
+
+This command displays:
+
+- **Model Providers**: All configured providers with their endpoints (API keys are masked)
+- **Default Provider**: The currently selected default provider
+- **Model Configurations**: All configured models with their settings
+
+## Resetting Configurations
+
+Delete all configuration files:
+
+```bash
+data-designer config reset
+```
+
+The CLI will show which configuration files exist and ask for confirmation before deleting them.
+
+<Warning title="Destructive Operation">
+This command permanently deletes all configuration files and resets to the default model providers and configurations. You'll need to reconfigure your custom configurations from scratch.
+</Warning>
+
+## See Also
+
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured providers and model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Model Providers](/docs/concepts/models/model-providers)**: Learn about the `ModelProvider` class and provider configuration
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about `ModelConfig`
+- **[Quick Start Guide](/docs/quick-start)**: Get started with a simple example
diff --git a/fern/v0.5.0/pages/concepts/models/custom-model-settings.mdx b/fern/v0.5.0/pages/concepts/models/custom-model-settings.mdx
new file mode 100644
index 000000000..cf05630b1
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/models/custom-model-settings.mdx
@@ -0,0 +1,220 @@
+---
+title: Custom Model Settings
+description: Create custom providers and model configurations for Data Designer.
+---
+
+While Data Designer ships with pre-configured model providers and configurations, you can create custom configurations to use different models, adjust inference parameters, or connect to custom API endpoints.
+
+## When to Use Custom Settings
+
+Use custom model settings when you need to:
+
+- Use models not included in the defaults
+- Adjust inference parameters (temperature, top_p, max_tokens) for specific use cases
+- Add distribution-based inference parameters for variability
+- Connect to self-hosted or custom model endpoints
+- Create multiple variants of the same model with different settings
+
+## Creating and Using Custom Settings
+
+### Custom Models with Default Providers
+
+Create custom model configurations that use the default providers (no need to define providers yourself):
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Create custom models using default providers
+custom_models = [
+    # High-temperature for more variability
+    dd.ModelConfig(
+        alias="creative-writer",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",  # Uses default NVIDIA provider
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=1.2,
+            top_p=0.98,
+            max_tokens=4096,
+        ),
+    ),
+    # Low-temperature for less variability
+    dd.ModelConfig(
+        alias="fact-checker",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",  # Uses default NVIDIA provider
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=0.1,
+            top_p=0.9,
+            max_tokens=2048,
+        ),
+    ),
+]
+
+# Create DataDesigner (uses default providers)
+data_designer = DataDesigner()
+
+# Pass custom models to config builder
+config_builder = dd.DataDesignerConfigBuilder(model_configs=custom_models)
+
+# Add a topic column using a categorical sampler
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="topic",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=["Artificial Intelligence", "Space Exploration", "Ancient History", "Climate Science"],
+        ),
+    )
+)
+
+# Use your custom models
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="creative_story",
+        model_alias="creative-writer",
+        prompt="Write a creative short story about {{topic}}.",
+    )
+)
+
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="facts",
+        model_alias="fact-checker",
+        prompt="List 3 facts about {{topic}}.",
+    )
+)
+
+# Preview your dataset
+preview_result = data_designer.preview(config_builder=config_builder)
+preview_result.display_sample_record()
+```
+
+<Note title="Default Providers Always Available">
+When you only specify `model_configs`, the default model providers (NVIDIA, OpenAI, and OpenRouter) are still available. You only need to create custom providers if you want to connect to different endpoints or modify provider settings.
+</Note>
+
+<Tip title="Mixing Custom and Default Models">
+When you provide custom `model_configs` to `DataDesignerConfigBuilder`, they **replace** the defaults entirely. To use custom model configs in addition to the default configs, use the add_model_config method:
+
+```python
+import data_designer.config as dd
+
+# Load defaults first
+config_builder = dd.DataDesignerConfigBuilder()
+
+# Add custom model to defaults
+config_builder.add_model_config(
+    dd.ModelConfig(
+        alias="my-custom-model",
+        model="nvidia/llama-3.3-nemotron-super-49b-v1.5",
+        provider="nvidia",  # Uses default provider
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=0.6,
+            max_tokens=8192,
+        ),
+    )
+)
+
+# Now you can use both default and custom models
+# Default: nvidia-text, nvidia-reasoning, nvidia-vision, etc.
+# Custom: my-custom-model
+```
+</Tip>
+
+### Custom Providers with Custom Models
+
+Define both custom providers and custom model configurations when you need to connect to services not included in the defaults:
+
+<Warning title="Network Accessibility">
+The custom provider endpoints must be reachable from where Data Designer runs. Ensure network connectivity, firewall rules, and any VPN requirements are properly configured.
+</Warning>
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Step 1: Define custom providers
+custom_providers = [
+    dd.ModelProvider(
+        name="my-custom-provider",
+        endpoint="https://api.my-llm-service.com/v1",
+        provider_type="openai",  # OpenAI-compatible API
+        api_key="MY_SERVICE_API_KEY",  # Environment variable name
+    ),
+    dd.ModelProvider(
+        name="my-self-hosted-provider",
+        endpoint="https://my-org.internal.com/llm/v1",
+        provider_type="openai",
+        api_key="SELF_HOSTED_API_KEY",
+    ),
+]
+
+# Step 2: Define custom models
+custom_models = [
+    dd.ModelConfig(
+        alias="my-text-model",
+        model="openai/some-model-id",
+        provider="my-custom-provider",  # References provider by name
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=0.85,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    dd.ModelConfig(
+        alias="my-self-hosted-text-model",
+        model="openai/some-hosted-model-id",
+        provider="my-self-hosted-provider",
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=0.7,
+            top_p=0.9,
+            max_tokens=1024,
+        ),
+    ),
+]
+
+# Step 3: Create DataDesigner with custom providers
+data_designer = DataDesigner(model_providers=custom_providers)
+
+# Step 4: Create config builder with custom models
+config_builder = dd.DataDesignerConfigBuilder(model_configs=custom_models)
+
+# Step 5: Add a topic column using a categorical sampler
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="topic",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=["Technology", "Healthcare", "Finance", "Education"],
+        ),
+    )
+)
+
+# Step 6: Use your custom model by referencing its alias
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="short_news_article",
+        model_alias="my-text-model",  # Reference custom alias
+        prompt="Write a short news article about the '{{topic}}' topic in 10 sentences.",
+    )
+)
+
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="long_news_article",
+        model_alias="my-self-hosted-text-model",  # Reference custom alias
+        prompt="Write a detailed news article about the '{{topic}}' topic.",
+    )
+)
+
+# Step 7: Preview your dataset
+preview_result = data_designer.preview(config_builder=config_builder)
+preview_result.display_sample_record()
+```
+
+## See Also
+
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured providers and model settings
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: CLI-based configuration
+- **[Quick Start Guide](/docs/quick-start)**: Basic usage example
diff --git a/fern/v0.5.0/pages/concepts/models/default-model-settings.mdx b/fern/v0.5.0/pages/concepts/models/default-model-settings.mdx
new file mode 100644
index 000000000..cedcc521f
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/models/default-model-settings.mdx
@@ -0,0 +1,130 @@
+---
+title: Default Model Settings
+description: Pre-configured model providers and configurations included with Data Designer.
+---
+
+Data Designer ships with pre-configured model providers and model configurations that make it easy to start generating synthetic data without manual setup.
+
+## Model Providers
+
+Data Designer includes a few default model providers that are configured automatically:
+
+### NVIDIA Provider (`nvidia`)
+
+- **Endpoint**: `https://integrate.api.nvidia.com/v1`
+- **API Key**: Set via `NVIDIA_API_KEY` environment variable
+- **Models**: Access to NVIDIA's hosted models from [build.nvidia.com](https://build.nvidia.com)
+- **Getting Started**: Sign up and get your API key at [build.nvidia.com](https://build.nvidia.com)
+
+The NVIDIA provider gives you access to state-of-the-art models including Nemotron and other NVIDIA-optimized models.
+
+### OpenAI Provider (`openai`)
+
+- **Endpoint**: `https://api.openai.com/v1`
+- **API Key**: Set via `OPENAI_API_KEY` environment variable
+- **Models**: Access to OpenAI's model catalog
+- **Getting Started**: Get your API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
+
+The OpenAI provider gives you access to GPT models and other OpenAI offerings.
+
+### OpenRouter Provider (`openrouter`)
+
+- **Endpoint**: `https://openrouter.ai/api/v1`
+- **API Key**: Set via `OPENROUTER_API_KEY` environment variable
+- **Models**: Access to a wide variety of models through OpenRouter's unified API
+- **Getting Started**: Get your API key from [openrouter.ai](https://openrouter.ai)
+
+The OpenRouter provider gives you access to a unified interface for many different language models from various providers.
+
+## Model Configurations
+
+Data Designer provides pre-configured model aliases for common use cases. When you create a `DataDesignerConfigBuilder` without specifying `model_configs`, these default configurations are automatically available.
+
+### NVIDIA Models
+
+The following model configurations are automatically available when `NVIDIA_API_KEY` is set:
+
+| Alias | Model | Use Case | Inference Parameters |
+|-------|-------|----------|---------------------|
+| `nvidia-text` | `nvidia/nemotron-3-nano-30b-a3b` | General text generation | `temperature=1.0, top_p=1.0` |
+| `nvidia-reasoning` | `openai/gpt-oss-20b` | Reasoning and analysis tasks | `temperature=0.35, top_p=0.95` |
+| `nvidia-vision` | `nvidia/nemotron-nano-12b-v2-vl` | Vision and image understanding | `temperature=0.85, top_p=0.95` |
+| `nvidia-embedding` | `nvidia/llama-3.2-nv-embedqa-1b-v2` | Text embeddings | `encoding_format="float", extra_body={"input_type": "query"}` |
+
+
+### OpenAI Models
+
+The following model configurations are automatically available when `OPENAI_API_KEY` is set:
+
+| Alias | Model | Use Case | Inference Parameters |
+|-------|-------|----------|---------------------|
+| `openai-text` | `gpt-4.1` | General text generation | `temperature=0.85, top_p=0.95` |
+| `openai-reasoning` | `gpt-5` | Reasoning and analysis tasks | `temperature=0.35, top_p=0.95` |
+| `openai-vision` | `gpt-5` | Vision and image understanding | `temperature=0.85, top_p=0.95` |
+| `openai-embedding` | `text-embedding-3-large` | Text embeddings | `encoding_format="float"` |
+
+### OpenRouter Models
+
+The following model configurations are automatically available when `OPENROUTER_API_KEY` is set:
+
+| Alias | Model | Use Case | Inference Parameters |
+|-------|-------|----------|---------------------|
+| `openrouter-text` | `nvidia/nemotron-3-nano-30b-a3b` | General text generation | `temperature=1.0, top_p=1.0` |
+| `openrouter-reasoning` | `openai/gpt-oss-20b` | Reasoning and analysis tasks | `temperature=0.35, top_p=0.95` |
+| `openrouter-vision` | `nvidia/nemotron-nano-12b-v2-vl` | Vision and image understanding | `temperature=0.85, top_p=0.95` |
+| `openrouter-embedding` | `openai/text-embedding-3-large` | Text embeddings | `encoding_format="float"` |
+
+
+## Using Default Settings
+
+Default settings work out of the box - no configuration needed! Simply create `DataDesigner` and `DataDesignerConfigBuilder` instances without any arguments, and reference the default model aliases in your column configurations.
+
+For a complete example showing how to use default model settings, see the **[Quick Start Guide](/docs/quick-start)**.
+
+### How Default Model Providers and Configurations Work
+
+When the Data Designer library or the CLI is initialized, default model configurations and providers are stored in the Data Designer home directory for easy access and customization if they do not already exist. These configuration files serve as the single source of truth for model settings. By default they are saved to the following paths:
+
+- **Model Configs**: `~/.data-designer/model_configs.yaml`
+- **Model Providers**: `~/.data-designer/model_providers.yaml`
+
+<Tip>
+While these files provide a convenient way to specify settings for your model providers and configuration you use most often, they can always be set programmatically in your SDG workflow.
+</Tip>
+
+You can customize the home directory location by setting the `DATA_DESIGNER_HOME` environment variable:
+
+```bash
+# In your .bashrc, .zshrc, or similar
+export DATA_DESIGNER_HOME="/path/to/your/custom/directory"
+```
+
+These configuration files can be modified in two ways:
+
+1. **Using the CLI**: Run CLI commands to add, update, or delete model configurations and providers
+2. **Manual editing**: Directly edit the YAML files with your preferred text editor
+
+Both methods operate on the same files, ensuring consistency across your entire Data Designer setup.
+
+## Important Notes
+
+<Warning title="API Key Requirements">
+While default model configurations are always available, you need to set the appropriate API key environment variable (`NVIDIA_API_KEY`, `OPENAI_API_KEY`, or `OPENROUTER_API_KEY`) to actually use the corresponding models for data generation. Without a valid API key, any attempt to generate data using that provider's models will fail.
+</Warning>
+
+<Tip title="Environment Variables">
+Store your API keys in environment variables rather than hardcoding them in your scripts:
+
+```bash
+# In your .bashrc, .zshrc, or similar
+export NVIDIA_API_KEY="your-api-key-here"
+export OPENAI_API_KEY="your-openai-api-key-here"
+export OPENROUTER_API_KEY="your-openrouter-api-key-here"
+```
+</Tip>
+
+## See Also
+
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: Learn how to use the CLI to manage model settings
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about model configurations
diff --git a/fern/v0.5.0/pages/concepts/models/inference-parameters.mdx b/fern/v0.5.0/pages/concepts/models/inference-parameters.mdx
new file mode 100644
index 000000000..b139fd8b9
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/models/inference-parameters.mdx
@@ -0,0 +1,190 @@
+---
+title: Inference Parameters
+description: Control model behavior during synthetic data generation.
+---
+
+Inference parameters control how models generate responses during synthetic data generation. Data Designer provides three types of inference parameters: `ChatCompletionInferenceParams` for text/code/structured generation, `EmbeddingInferenceParams` for embedding generation, and `ImageInferenceParams` for image generation.
+
+## Overview
+
+When you create a `ModelConfig`, you can specify inference parameters to adjust model behavior. These parameters control aspects like randomness (temperature), diversity (top_p), context size (max_tokens), and more. Data Designer supports both static values and dynamic distribution-based sampling for certain parameters.
+
+## Chat Completion Inference Parameters
+
+The `ChatCompletionInferenceParams` class controls how models generate text completions (for text, code, and structured data generation). It provides fine-grained control over generation behavior and supports both static values and dynamic distribution-based sampling.
+
+### Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `temperature` | `float` or `Distribution` | No | Controls randomness in generation (0.0 to 2.0). Higher values = more creative/random |
+| `top_p` | `float` or `Distribution` | No | Nucleus sampling parameter (0.0 to 1.0). Controls diversity by filtering low-probability tokens |
+| `max_tokens` | `int` | No | Maximum number of tokens to generate in the response (≥ 1) |
+| `max_parallel_requests` | `int` | No | Maximum concurrent API requests to this model (default: 4, ≥ 1). See [Concurrency Control](#concurrency-control) below. |
+| `timeout` | `int` | No | API request timeout in seconds (≥ 1) |
+| `extra_body` | `dict[str, Any]` | No | Additional parameters to include in the API request body |
+
+<Note title="Default Values">
+If `temperature`, `top_p`, or `max_tokens` are not provided, the model provider's default values will be used. Different providers and models may have different defaults.
+</Note>
+
+<Tip title="Controlling Reasoning Effort for GPT-OSS Models">
+For gpt-oss models like `gpt-oss-20b` and `gpt-oss-120b`, you can control the reasoning effort using the `extra_body` parameter:
+
+```python
+import data_designer.config as dd
+
+# High reasoning effort (more thorough, slower)
+inference_parameters = dd.ChatCompletionInferenceParams(
+    extra_body={"reasoning_effort": "high"}
+)
+
+# Medium reasoning effort (balanced)
+inference_parameters = dd.ChatCompletionInferenceParams(
+    extra_body={"reasoning_effort": "medium"}
+)
+
+# Low reasoning effort (faster, less thorough)
+inference_parameters = dd.ChatCompletionInferenceParams(
+    extra_body={"reasoning_effort": "low"}
+)
+```
+</Tip>
+
+### Temperature and Top P Guidelines
+
+- **Temperature**:
+    - `0.0-0.3`: Highly deterministic, focused outputs (ideal for structured/reasoning tasks)
+    - `0.4-0.7`: Balanced creativity and coherence (general purpose)
+    - `0.8-1.0`: Creative, diverse outputs (ideal for creative writing)
+    - `1.0+`: Highly random and experimental
+
+- **Top P**:
+    - `0.1-0.5`: Very focused, only most likely tokens
+    - `0.6-0.9`: Balanced diversity
+    - `0.95-1.0`: Maximum diversity, including less likely tokens
+
+<Tip title="Adjusting Temperature and Top P Together">
+When tuning both parameters simultaneously, consider these combinations:
+
+- **For deterministic/structured outputs**: Low temperature (`0.0-0.3`) + moderate-to-high top_p (`0.8-0.95`)
+    - The low temperature ensures focus, while top_p allows some token diversity
+- **For balanced generation**: Moderate temperature (`0.5-0.7`) + high top_p (`0.9-0.95`)
+    - This is a good starting point for most use cases
+- **For creative outputs**: Higher temperature (`0.8-1.0`) + high top_p (`0.95-1.0`)
+    - Both parameters work together to maximize diversity
+
+**Avoid**: Setting both very low (overly restrictive) or adjusting both dramatically at once. When experimenting, adjust one parameter at a time to understand its individual effect.
+</Tip>
+
+## Distribution-Based Inference Parameters
+
+For `temperature` and `top_p` in `ChatCompletionInferenceParams`, you can specify distributions instead of fixed values. This allows Data Designer to sample different values for each generation request, introducing controlled variability into your synthetic data.
+
+### Uniform Distribution
+
+Samples values uniformly between a low and high bound:
+
+```python
+import data_designer.config as dd
+
+inference_params = dd.ChatCompletionInferenceParams(
+    temperature=dd.UniformDistribution(
+        params=dd.UniformDistributionParams(low=0.7, high=1.0)
+    ),
+)
+```
+
+### Manual Distribution
+
+Samples from a discrete set of values with optional weights:
+
+```python
+import data_designer.config as dd
+
+# Equal probability for each value
+inference_params = dd.ChatCompletionInferenceParams(
+    temperature=dd.ManualDistribution(
+        params=dd.ManualDistributionParams(values=[0.5, 0.7, 0.9])
+    ),
+)
+
+# Weighted probabilities (normalized automatically)
+inference_params = dd.ChatCompletionInferenceParams(
+    top_p=dd.ManualDistribution(
+        params=dd.ManualDistributionParams(
+            values=[0.8, 0.9, 0.95],
+            weights=[0.2, 0.5, 0.3]  # 20%, 50%, 30% probability
+        )
+    ),
+)
+```
+
+## Concurrency Control
+
+The `max_parallel_requests` parameter controls how many concurrent API calls Data Designer makes to a specific model. This directly impacts throughput and should be tuned to match your inference server's capacity.
+
+<Tip title="Performance Tuning">
+For recommended values by deployment type (NVIDIA API Catalog, vLLM, OpenAI, NIMs) and detailed optimization strategies, see the [Architecture & Performance](/docs/concepts/architecture-and-performance) guide.
+</Tip>
+
+## Embedding Inference Parameters
+
+The `EmbeddingInferenceParams` class controls how models generate embeddings. This is used when working with embedding models for tasks like semantic search or similarity analysis.
+
+### Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `encoding_format` | `Literal["float", "base64"]` | No | Format of the embedding encoding (default: "float") |
+| `dimensions` | `int` | No | Number of dimensions for the embedding |
+| `max_parallel_requests` | `int` | No | Maximum concurrent API requests (default: 4, ≥ 1) |
+| `timeout` | `int` | No | API request timeout in seconds (≥ 1) |
+| `extra_body` | `dict[str, Any]` | No | Additional parameters to include in the API request body |
+
+
+## Image Inference Parameters
+
+The `ImageInferenceParams` class is used for image generation models, including both diffusion models (DALL·E, Stable Diffusion, Imagen) and autoregressive models (Gemini image, GPT image). Unlike text models, image-specific options are passed entirely via `extra_body`, since they vary significantly between providers.
+
+### Fields
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `max_parallel_requests` | `int` | No | Maximum concurrent API requests (default: 4, ≥ 1) |
+| `timeout` | `int` | No | API request timeout in seconds (≥ 1) |
+| `extra_body` | `dict[str, Any]` | No | Model-specific image options (size, quality, aspect ratio, etc.) |
+
+### Examples
+
+```python
+import data_designer.config as dd
+
+# Autoregressive model (chat completions API, supports image context)
+dd.ModelConfig(
+    alias="image-model",
+    model="black-forest-labs/flux.2-pro",
+    provider="openrouter",
+    inference_parameters=dd.ImageInferenceParams(
+        extra_body={"height": 512, "width": 512}
+    ),
+)
+
+# Diffusion model (e.g., DALL·E, Stable Diffusion)
+dd.ModelConfig(
+    alias="dalle",
+    model="dall-e-3",
+    inference_parameters=dd.ImageInferenceParams(
+        extra_body={"size": "1024x1024", "quality": "hd"}
+    ),
+)
+```
+
+
+## See Also
+
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about configuring model settings
+- **[Model Providers](/docs/concepts/models/model-providers)**: Learn about configuring model providers
+- **[Architecture & Performance](/docs/concepts/architecture-and-performance)**: Understanding separation of concerns and optimizing concurrency
diff --git a/fern/v0.5.0/pages/concepts/models/model-configs.mdx b/fern/v0.5.0/pages/concepts/models/model-configs.mdx
new file mode 100644
index 000000000..22b3878b8
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/models/model-configs.mdx
@@ -0,0 +1,157 @@
+---
+title: Model Configurations
+description: Configure model settings for synthetic data generation.
+---
+
+Model configurations define the specific models you use for synthetic data generation and their associated inference parameters. Each `ModelConfig` represents a named model that can be referenced throughout your data generation workflows.
+
+## Overview
+
+A `ModelConfig` specifies which LLM model to use and how it should behave during generation. When you create column configurations (like `LLMText`, `LLMCode`, or `LLMStructured`), you reference a model by its alias. Data Designer uses the model configuration to determine which model to call and with what parameters.
+
+## ModelConfig Structure
+
+The `ModelConfig` class has the following fields:
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `alias` | `str` | Yes | Unique identifier for this model configuration (e.g., `"my-text-model"`, `"reasoning-model"`) |
+| `model` | `str` | Yes | Model identifier as recognized by the provider (e.g., `"nvidia/nemotron-3-nano-30b-a3b"`, `"gpt-4"`) |
+| `inference_parameters` | `InferenceParamsT` | No | Controls model behavior during generation. Use `ChatCompletionInferenceParams` for text/code/structured generation or `EmbeddingInferenceParams` for embeddings. Defaults to `ChatCompletionInferenceParams()` if not provided. The generation type is automatically determined by the inference parameters type. See [Inference Parameters](/docs/concepts/models/inference-parameters) for details. |
+| `provider` | `str` | No | Reference to the name of the Provider to use (e.g., `"nvidia"`, `"openai"`, `"openrouter"`). If not specified, one set as the default provider, which may resolve to the first provider if there are more than one |
+| `skip_health_check` | `bool` | No | Whether to skip the health check for this model. Defaults to `False`. Set to `True` to skip health checks when you know the model is accessible or want to defer validation. |
+
+
+## Examples
+
+### Basic Model Configuration
+
+```python
+from data_designer.essentials import ChatCompletionInferenceParams, ModelConfig
+
+# Simple model configuration with fixed parameters
+model_config = ModelConfig(
+    alias="my-text-model",
+    model="nvidia/nemotron-3-nano-30b-a3b",
+    provider="nvidia",
+    inference_parameters=ChatCompletionInferenceParams(
+        temperature=0.85,
+        top_p=0.95,
+        max_tokens=2048,
+    ),
+)
+```
+
+### Multiple Model Configurations for Different Tasks
+
+```python
+from data_designer.essentials import (
+    ChatCompletionInferenceParams,
+    EmbeddingInferenceParams,
+    GenerationType,
+    ModelConfig
+)
+
+model_configs = [
+    # Creative tasks
+    ModelConfig(
+        alias="creative-model",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.9,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    # Critic tasks
+    ModelConfig(
+        alias="critic-model",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.25,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    # Reasoning and structured tasks
+    ModelConfig(
+        alias="reasoning-model",
+        model="openai/gpt-oss-20b",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.3,
+            top_p=0.9,
+            max_tokens=4096,
+        ),
+    ),
+    # Vision tasks
+    ModelConfig(
+        alias="vision-model",
+        model="nvidia/nemotron-nano-12b-v2-vl",
+        provider="nvidia",
+        inference_parameters=ChatCompletionInferenceParams(
+            temperature=0.7,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+    # Embedding tasks
+    ModelConfig(
+        alias="embedding_model",
+        model="nvidia/llama-3.2-nv-embedqa-1b-v2",
+        provider="nvidia",
+        inference_parameters=EmbeddingInferenceParams(
+            encoding_format="float",
+            extra_body={
+                "input_type": "query"
+            }
+        )
+    )
+]
+```
+
+<Tip title="Experiment with max_tokens for Task-Specific Model Configurations">
+The number of tokens required to generate a single data entry can vary significantly with use case. For example, reasoning models often need more tokens to "think through" problems before generating a response. Note that `max_tokens` specifies the **maximum number of output tokens** to generate in the response, so set this value based on the expected length of the generated content.
+</Tip>
+
+### Skipping Health Checks
+
+By default, Data Designer runs a health check for each model before starting data generation to ensure the model is accessible and configured correctly. You can skip this health check for specific models by setting `skip_health_check=True`:
+
+```python
+import data_designer.config as dd
+
+model_config = dd.ModelConfig(
+    alias="my-model",
+    model="nvidia/nemotron-3-nano-30b-a3b",
+    provider="nvidia",
+    inference_parameters=dd.ChatCompletionInferenceParams(
+        temperature=0.85,
+        top_p=0.95,
+        max_tokens=2048,
+    ),
+    skip_health_check=True,  # Skip health check for this model
+)
+```
+
+<Info title="When to Skip Health Checks">
+Skipping health checks can be useful when:
+
+- You've already verified the model is accessible and want to speed up initialization
+- You're using a model that doesn't support the standard health check format
+- You want to defer model validation until the model is actually used
+
+Note that skipping health checks means errors will only be discovered during actual data generation.
+</Info>
+
+## See Also
+
+- **[Inference Parameters](/docs/concepts/models/inference-parameters)**: Detailed guide to inference parameters and how to configure them
+- **[Model Providers](/docs/concepts/models/model-providers)**: Learn about configuring model providers
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: Use the CLI to manage model settings
+- **[Column Configurations](/api/column-configs)**: Learn how to use models in column configurations
+- **[Architecture & Performance](/docs/concepts/architecture-and-performance)**: Understanding separation of concerns and optimizing concurrency
diff --git a/fern/v0.5.0/pages/concepts/models/model-providers.mdx b/fern/v0.5.0/pages/concepts/models/model-providers.mdx
new file mode 100644
index 000000000..efc877f31
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/models/model-providers.mdx
@@ -0,0 +1,56 @@
+---
+title: Model Providers
+description: Configure connections to model hosting services.
+---
+
+Model providers are external services that host and serve models. Data Designer uses the `ModelProvider` class to configure connections to these services.
+
+## Overview
+
+A `ModelProvider` defines how Data Designer connects to a provider's API endpoint. When you create a `ModelConfig`, you reference a provider by name, and Data Designer uses that provider's settings to make API calls to the appropriate endpoint.
+
+## ModelProvider Configuration
+
+The `ModelProvider` class has the following fields:
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | `str` | Yes | Unique identifier for the provider (e.g., `"nvidia"`, `"openai"`, `"openrouter"`) |
+| `endpoint` | `str` | Yes | API endpoint URL (e.g., `"https://integrate.api.nvidia.com/v1"`) |
+| `provider_type` | `str` | No | Provider type (default: `"openai"`). Uses OpenAI-compatible API format |
+| `api_key` | `str` | No | API key or environment variable name (e.g., `"NVIDIA_API_KEY"`) |
+| `extra_body` | `dict[str, Any]` | No | Additional parameters to include in the request body of all API requests to the provider. |
+| `extra_headers` | `dict[str, str]` | No | Additional headers to include in all API requests to the provider. |
+
+## API Key Configuration
+
+The `api_key` field can be specified in two ways:
+
+1. **Environment variable name** (recommended): Set `api_key` to the name of an environment variable (e.g., `"NVIDIA_API_KEY"`). Data Designer will automatically resolve it at runtime.
+
+2. **Plain-text value**: Set `api_key` to the actual API key string. This is less secure and not recommended for production use.
+
+```python
+# Method 1: Environment variable (recommended)
+provider = ModelProvider(
+    name="nvidia",
+    endpoint="https://integrate.api.nvidia.com/v1",
+    api_key="NVIDIA_API_KEY",  # Will be resolved from environment
+)
+
+# Method 2: Direct value (not recommended)
+provider = ModelProvider(
+    name="nvidia",
+    endpoint="https://integrate.api.nvidia.com/v1",
+    api_key="nvapi-abc123...",  # Direct API key
+)
+```
+
+## See Also
+
+- **[Model Configurations](/docs/concepts/models/model-configs)**: Learn about configuring models
+- **[Inference Parameters](/docs/concepts/models/inference-parameters)**: Detailed guide to inference parameters and how to configure them
+- **[Default Model Settings](/docs/concepts/models/default-model-settings)**: Pre-configured providers and model settings included with Data Designer
+- **[Custom Model Settings](/docs/concepts/models/custom-model-settings)**: Learn how to create custom providers and model configurations
+- **[Configure Model Settings With the CLI](/docs/concepts/models/configure-with-cli)**: Use the CLI to manage providers and model settings
+- **[Quick Start Guide](/docs/quick-start)**: Get started with a simple example
diff --git a/fern/v0.5.0/pages/concepts/person-sampling.mdx b/fern/v0.5.0/pages/concepts/person-sampling.mdx
new file mode 100644
index 000000000..0f3cbe3ae
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/person-sampling.mdx
@@ -0,0 +1,245 @@
+---
+title: Person Sampling in Data Designer
+description: Generate synthetic person data for your datasets.
+---
+
+Person sampling in Data Designer allows you to generate synthetic person data for your datasets. There are two distinct approaches, each with different capabilities and use cases.
+
+## Overview
+
+Data Designer provides two ways to generate synthetic people:
+
+1. **Faker-based sampling** - Quick, basic PII generation for testing or when realistic demographic distributions are not relevant for your use case
+2. **Nemotron-Personas datasets** - Demographically accurate, rich persona data
+
+---
+
+## Approach 1: Faker-Based Sampling
+
+### What It Does
+Uses the Faker library to generate random personal information. The data is basic and not demographically accurate, but is useful for quick testing, prototyping, or when realistic demographic distributions are not relevant for your use case.
+
+### Features
+- Gives you access to person attributes that Faker exposes
+- Quick to set up with no additional downloads
+- Generates random names, emails, addresses, phone numbers, etc.
+- Supports [all Faker-supported locales](https://faker.readthedocs.io/en/master/locales.html)
+- **Not demographically grounded** - data patterns don't reflect real-world demographics
+
+### Usage Example
+```python
+import data_designer.config as dd
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="customer",
+        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,
+        params=dd.PersonFromFakerSamplerParams(
+            locale="en_US",
+            age_range=[25, 65],
+            sex="Female",
+        ),
+    )
+)
+```
+
+For more details, see the documentation for [`SamplerColumnConfig`](/api/column-configs) and [`PersonFromFakerSamplerParams`](/api/sampler-params).
+
+---
+
+## Approach 2: Nemotron-Personas Datasets
+
+### What It Does
+Uses curated Nemotron-Personas datasets from NVIDIA GPU Cloud (NGC) to generate demographically accurate person data with rich personality profiles and behavioral characteristics.
+
+The NGC datasets are extended versions of the [open-source Nemotron-Personas datasets on HuggingFace](https://huggingface.co/collections/nvidia/nemotron-personas), with additional fields and enhanced data quality.
+
+Supported locales:
+
+- `en_US`: United States
+- `en_IN`: India (English)
+- `en_SG`: Singapore (English)
+- `hi_Deva_IN`: India (Devanagari script)
+- `hi_Latn_IN`: India (Latin script)
+- `ja_JP`: Japan
+- `pt_BR`: Brazil (Portuguese)
+
+### Features
+- **Demographically accurate personal details**: Names, ages, sex, marital status, education, occupation based on census data
+- **Rich persona details**: Comprehensive behavioral profiles including:
+  - Big Five personality traits with scores
+  - Cultural backgrounds and narratives
+  - Skills and hobbies
+  - Career goals and aspirations
+  - Context-specific personas (professional, financial, healthcare, sports, arts, travel, culinary, etc.)
+- Consistent, referenceable attributes across your dataset
+- Grounded in real-world demographic distributions
+
+### Prerequisites
+
+To use the extended Nemotron-Personas datasets with Data Designer, you need to download them [from NGC](https://catalog.ngc.nvidia.com/search?orderBy=scoreDESC&query=nemotron+personas) and move them to the Data Designer managed assets directory.
+
+See below for step-by-step instructions.
+
+### Nemotron-Personas Datasets Setup Instructions
+
+#### Step 0: Obtain an NGC API Key and install the NGC CLI
+
+To download the Nemotron-Personas datasets from NGC, you will need to obtain an NGC API key and install the NGC CLI.
+
+1. **NGC API Key**: Obtain from [NVIDIA GPU Cloud](https://ngc.nvidia.com/)
+2. **NGC CLI**: [NGC CLI](https://org.ngc.nvidia.com/setup/installers/cli)
+
+
+#### Step 1: Set Your NGC API Key
+```bash
+export NGC_API_KEY="your-ngc-api-key-here"
+```
+
+#### Step 2 (option 1): Download Nemotron-Personas Datasets via the Data Designer CLI
+
+Once you have the NGC CLI and your NGC API key set up, you can download the datasets via the Data Designer CLI.
+
+You can pass the locales you want to download as arguments to the CLI command:
+```bash
+data-designer download personas --locale en_US --locale ja_JP
+```
+
+Or you can use the interactive mode to select the locales you want to download:
+```bash
+data-designer download personas
+```
+
+#### Step 2 (option 2): Download Nemotron-Personas Datasets Directly
+
+Use the NGC CLI to download the datasets:
+```bash
+# For Nemotron-Personas USA
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-en_us"
+
+# For Nemotron-Personas IN
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-hi_deva_in"
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-hi_latn_in"
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-en_in"
+
+# For Nemotron-Personas JP
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-ja_jp"
+
+# For Nemotron-Personas SG
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-en_sg"
+
+# For Nemotron-Personas BR
+ngc registry resource download-version "nvidia/nemotron-personas/nemotron-personas-dataset-pt_br"
+```
+
+Then move the downloaded dataset to the Data Designer managed assets directory:
+```bash
+mkdir -p ~/.data-designer/managed-assets/datasets/
+mv nemotron-personas-dataset-*/*.parquet ~/.data-designer/managed-assets/datasets/
+```
+
+#### Step 3: Use PersonSampler in Your Code
+```python
+import data_designer.config as dd
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="customer",
+        sampler_type=dd.SamplerType.PERSON,
+        params=dd.PersonSamplerParams(
+            locale="en_US",
+            sex="Female",
+            age_range=[25, 45],
+            with_synthetic_personas=True,
+        ),
+    )
+)
+```
+
+For more details, see the documentation for [`SamplerColumnConfig`](/api/column-configs) and [`PersonSamplerParams`](/api/sampler-params).
+
+### Available Data Fields
+
+**Core Fields (all locales):**
+
+| Field | Type | Notes |
+|-------|------|-------|
+| `uuid` | UUID | Unique identifier |
+| `first_name` | string | |
+| `middle_name` | string | |
+| `last_name` | string | |
+| `sex` | enum | "Male" or "Female" |
+| `birth_date` | date | Derived: year, month, day |
+| `street_number` | int | |
+| `street_name` | string | |
+| `unit` | string | Address line 2 |
+| `city` | string | |
+| `region` | string | Alias: state |
+| `district` | string | Alias: county |
+| `postcode` | string | Alias: zipcode |
+| `country` | string | |
+| `phone_number` | PhoneNumber | Derived: area_code, country_code, prefix, line_number |
+| `marital_status` | string | Values: never_married, married_present, separated, widowed, divorced |
+| `education_level` | string or None | |
+| `bachelors_field` | string or None | |
+| `occupation` | string or None | |
+| `email_address` | string | |
+| `national_id` | string |
+
+**Japan-Specific Fields (`ja_JP`):**
+
+- `area`
+- `prefecture`
+- `zone`
+
+**Brazil-Specific Fields (`pt_BR`):**
+
+- `race` - Census-reported race
+
+**Brazil and India Shared Fields (`pt_BR`, `en_IN`, `hi_Deva_IN`, `hi_Latn_IN`):**
+
+- `religion` - Census-reported religion
+
+**India-Specific Fields (`en_IN`, `hi_Deva_IN`, `hi_Latn_IN`):**
+
+- `district` - Census-reported district
+- `education_degree` - Census-reported education degree
+- `first_language` - Native language
+- `second_language` - Second language (if applicable)
+- `third_language` - Third language (if applicable)
+- `zone` - Urban vs rural
+
+**With Synthetic Personas Enabled:**
+
+- Big Five personality traits (Openness, Conscientiousness, Extraversion, Agreeableness, Neuroticism) with t-scores and labels
+- Cultural background narratives
+- Skills and competencies
+- Hobbies and interests
+- Career goals
+- Context-specific personas (professional, financial, healthcare, sports, arts & entertainment, travel, culinary, etc.)
+
+*Japan-specific persona fields:*
+
+- `aspects`
+- `digital_skills`
+
+*Brazil and India shared persona fields (`pt_BR`, `en_IN`, `hi_Deva_IN`, `hi_Latn_IN`):*
+
+- `religious_persona`
+- `religious_background`
+
+*India-specific persona fields (`en_IN`, `hi_Deva_IN`, `hi_Latn_IN`):*
+
+- `linguistic_persona`
+- `linguistic_background`
+
+### Configuration Parameters
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `locale` | str | Language/region code - must be one of: "en_US", "en_IN", "en_SG", "hi_Deva_IN", "hi_Latn_IN", "ja_JP", "pt_BR" |
+| `sex` | str (optional) | Filter by "Male" or "Female" |
+| `city` | str or list[str] (optional) | Filter by specific city or cities within locale |
+| `age_range` | list[int] (optional) | Two-element list [min_age, max_age] (default: [18, 114]) |
+| `with_synthetic_personas` | bool (optional) | Include rich personality profiles (default: False) |
+| `select_field_values` | dict (optional) | Custom field-based filtering (e.g., `{"state": ["NY", "CA"], "education_level": ["bachelors"]}`) |
diff --git a/fern/v0.5.0/pages/concepts/processors.mdx b/fern/v0.5.0/pages/concepts/processors.mdx
new file mode 100644
index 000000000..a1fef4ad9
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/processors.mdx
@@ -0,0 +1,165 @@
+---
+title: Processors
+description: Transformations that modify your dataset before or after columns are generated.
+---
+
+Processors are transformations that modify your dataset before or after columns are generated. They run at different stages and can reshape, filter, or augment the data.
+
+<Tip title="When to Use Processors">
+Processors handle transformations that don't fit the "column" model: restructuring the schema for a specific output format, dropping intermediate columns in bulk, or applying batch-wide operations.
+</Tip>
+
+## Overview
+
+Each processor:
+
+- Receives the complete batch DataFrame
+- Applies its transformation
+- Passes the result to the next processor (or to output)
+
+Processors can run at three stages, determined by which callback methods they implement:
+
+| Stage | When it runs | Callback method | Use cases |
+|-------|--------------|-----------------|-----------|
+| Pre-batch | After seed columns, before dependent columns | `process_before_batch()` | Transform seed data before other columns are generated |
+| Post-batch | After each batch completes | `process_after_batch()` | Drop columns, transform schema per batch |
+| After generation | Once, on final dataset after all batches | `process_after_generation()` | Deduplicate, aggregate statistics, final cleanup |
+
+<Info title="Full Schema Available During Generation">
+Each batch carries the full dataset schema during generation. Post-batch schema changes such as column dropping only alter past batches, so all columns remain accessible to generators while building follow-up batches.
+</Info>
+
+A processor can implement any combination of these callbacks. The built-in processors use `process_after_batch()` by default.
+
+## Processor Types
+
+### 🗑️ Drop Columns Processor
+
+Removes specified columns from the output dataset. Dropped columns are saved separately in the `dropped-columns` directory for reference.
+
+<Tip title="Dropping Columns is More Easily Achieved via drop = True">
+The Drop Columns Processor is different from others in the sense that it does not need to be explicitly added: setting `drop = True` when configuring a column will accomplish the same.
+</Tip>
+
+**Configuration:**
+
+```python
+import data_designer.config as dd
+
+processor = dd.DropColumnsProcessorConfig(
+    name="remove_intermediate",
+    column_names=["temp_calculation", "raw_input", "debug_info"],
+)
+```
+
+**Behavior:**
+
+- Columns specified in `column_names` are removed from the output
+- Original values are preserved in a separate parquet file
+- Missing columns produce a warning but don't fail the build
+- Column configs are automatically marked with `drop=True` when this processor is added
+
+**Use Cases:**
+
+- Removing intermediate columns used only for LLM context
+- Cleaning up debug or validation columns before final output
+- Separating sensitive data from the main dataset
+
+### 🔄 Schema Transform Processor
+
+Creates an additional dataset with a transformed schema using Jinja2 templates. The output is written to a separate directory alongside the main dataset.
+
+**Configuration:**
+
+```python
+import data_designer.config as dd
+
+processor = dd.SchemaTransformProcessorConfig(
+    name="chat_format",
+    template={
+        "messages": [
+            {"role": "user", "content": "{{ question }}"},
+            {"role": "assistant", "content": "{{ answer }}"},
+        ],
+        "metadata": "{{ category | upper }}",
+    },
+)
+```
+
+**Behavior:**
+
+- Each key in `template` becomes a column in the transformed dataset
+- Values are Jinja2 templates with access to all columns in the batch
+- Complex structures (lists, nested dicts) are supported
+- Output is saved to the `processors-outputs/{name}/` directory
+- The original dataset passes through unchanged
+
+**Template Capabilities:**
+
+- **Variable substitution**: `{{ column_name }}`
+- **Filters**: `{{ text | upper }}`, `{{ text | lower }}`, `{{ text | trim }}`
+- **Nested structures**: Arbitrarily deep JSON structures
+- **Lists**: `["{{ col1 }}", "{{ col2 }}"]`
+
+**Use Cases:**
+
+- Converting flat columns to chat message format
+- Restructuring data for specific model training formats
+- Creating derived views without modifying the source dataset
+
+## Using Processors
+
+Add processors to your configuration using the builder's `add_processor` method:
+
+```python
+import data_designer.config as dd
+
+builder = dd.DataDesignerConfigBuilder()
+
+# ... add columns ...
+
+# Drop intermediate columns
+builder.add_processor(
+    dd.DropColumnsProcessorConfig(
+        name="cleanup",
+        column_names=["scratch_work", "raw_context"],
+    )
+)
+
+# Transform to chat format
+builder.add_processor(
+    dd.SchemaTransformProcessorConfig(
+        name="chat_format",
+        template={
+            "messages": [
+                {"role": "user", "content": "{{ question }}"},
+                {"role": "assistant", "content": "{{ answer }}"},
+            ],
+        },
+    )
+)
+```
+
+### Execution Order
+
+Processors execute in the order they're added. Plan accordingly when one processor's output affects another.
+
+## Configuration Parameters
+
+### Common Parameters
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `name` | str | Identifier for the processor, used in output directory names |
+
+### DropColumnsProcessorConfig
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `column_names` | list[str] | Columns to remove from output |
+
+### SchemaTransformProcessorConfig
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `template` | dict[str, Any] | Jinja2 template defining the output schema. Must be JSON-serializable. |
diff --git a/fern/v0.5.0/pages/concepts/seed-datasets.mdx b/fern/v0.5.0/pages/concepts/seed-datasets.mdx
new file mode 100644
index 000000000..53aea1e2e
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/seed-datasets.mdx
@@ -0,0 +1,265 @@
+---
+title: Seed Datasets
+description: Bootstrap synthetic data generation from existing datasets.
+---
+
+Seed datasets let you bootstrap synthetic data generation from existing data. Instead of generating everything from scratch, you provide a dataset whose columns become available as context in your prompts and expressions—grounding your synthetic data in real-world examples.
+
+<Tip title="When to Use Seed Datasets">
+Seed datasets shine when you have **real data you want to build on**:
+
+- Product catalogs → generate customer reviews
+- Medical diagnoses → generate physician notes
+- Code snippets → generate documentation
+- Company profiles → generate financial reports
+
+The seed data provides realism and domain specificity; Data Designer adds volume and variation.
+</Tip>
+
+## The Basic Pattern
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Define your model configuration
+model_configs = [
+    dd.ModelConfig(
+        alias="my-model",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+    )
+]
+
+config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)
+
+# 1. Attach a seed dataset
+seed_source = dd.LocalFileSeedSource(path="products.csv")
+config_builder.with_seed_dataset(seed_source)
+
+# 2. Reference seed columns in your prompts
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="review",
+        model_alias="my-model",
+        prompt="""\
+Write a customer review for {{ product_name }}.
+Category: {{ category }}
+Price: ${{ price }}
+""",
+    )
+)
+```
+
+Every column in your seed dataset becomes available as a Jinja2 variable in prompts and expressions. Data Designer automatically:
+
+- Reads rows from the seed dataset
+- Injects seed column values into templates
+
+## Seed Sources
+
+Data Designer supports three ways to provide seed data:
+
+### LocalFileSeedSource
+
+Load from a local file—CSV, Parquet, or JSON.
+
+```python
+# Single file
+seed_source = dd.LocalFileSeedSource(path="data/products.csv")
+
+# Parquet files with wildcard
+seed_source = dd.LocalFileSeedSource(path="data/products/*.parquet")
+```
+
+<Note title="Supported Formats">
+- CSV (`.csv`)
+- Parquet (`.parquet`)
+- JSON (`.json`, `.jsonl`)
+</Note>
+
+### HuggingFaceSeedSource
+
+Load directly from HuggingFace datasets without downloading manually.
+
+```python
+seed_source = dd.HuggingFaceSeedSource(
+    path="datasets/gretelai/symptom_to_diagnosis/data/train.parquet",
+    token="hf_...",  # Optional, for private datasets
+)
+```
+
+### DataFrameSeedSource
+
+Use an in-memory pandas DataFrame—great for preprocessing or combining multiple sources.
+
+```python
+import pandas as pd
+
+df = pd.read_csv("raw_data.csv")
+df = df[df["quality_score"] > 0.8]  # Filter to high-quality rows
+
+seed_source = dd.DataFrameSeedSource(df=df)
+```
+
+<Warning title="Serialization">
+`DataFrameSeedSource` can't be serialized to YAML/JSON configs. Use `LocalFileSeedSource` if you need to save and share configurations.
+</Warning>
+
+## Sampling Strategies
+
+Control how rows are read from the seed dataset.
+
+### Ordered (Default)
+
+Rows are read sequentially in their original order. Each generated record corresponds to the next row in the seed dataset. If you generate more records than exist in the seed dataset, it will cycle in order until completion.
+
+```python
+config_builder.with_seed_dataset(
+    seed_source,
+    sampling_strategy=dd.SamplingStrategy.ORDERED,
+)
+```
+
+### Shuffle
+
+Rows are randomly shuffled before sampling. Useful when your seed data has some ordering you want to break.
+
+```python
+config_builder.with_seed_dataset(
+    seed_source,
+    sampling_strategy=dd.SamplingStrategy.SHUFFLE,
+)
+```
+
+## Selection Strategies
+
+Select a subset of your seed dataset—useful for large datasets or parallel processing.
+
+### IndexRange
+
+Select a specific range of row indices.
+
+```python
+# Use only rows 100-199 (100 rows total)
+config_builder.with_seed_dataset(
+    seed_source,
+    selection_strategy=dd.IndexRange(start=100, end=199),
+)
+```
+
+### PartitionBlock
+
+Split the dataset into N equal partitions and select one. Perfect for distributing work across multiple jobs.
+
+```python
+# Split into 5 partitions, use the 3rd one (index=2, zero-based)
+config_builder.with_seed_dataset(
+    seed_source,
+    selection_strategy=dd.PartitionBlock(index=2, num_partitions=5),
+)
+```
+
+<Tip title="Parallel Processing">
+Run 5 parallel jobs, each with a different partition index, to process a large seed dataset in parallel:
+
+```python
+# Job 0: PartitionBlock(index=0, num_partitions=5)
+# Job 1: PartitionBlock(index=1, num_partitions=5)
+# Job 2: PartitionBlock(index=2, num_partitions=5)
+# ...
+```
+</Tip>
+
+### Combining Strategies
+
+Sampling and selection strategies work together. For example, shuffle rows *within* a specific partition:
+
+```python
+config_builder.with_seed_dataset(
+    seed_source,
+    sampling_strategy=dd.SamplingStrategy.SHUFFLE,
+    selection_strategy=dd.PartitionBlock(index=0, num_partitions=10),
+)
+```
+
+## Complete Example
+
+Here's a complete example generating physician notes from a symptom-to-diagnosis seed dataset:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+data_designer = DataDesigner()
+
+model_configs = [
+    dd.ModelConfig(
+        alias="medical-notes",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+    )
+]
+
+config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)
+
+# Attach seed dataset (has 'diagnosis' and 'symptoms' columns)
+seed_source = dd.LocalFileSeedSource(path="symptom_to_diagnosis.csv")
+config_builder.with_seed_dataset(seed_source)
+
+# Generate patient info
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="patient",
+        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,
+        params=dd.PersonFromFakerSamplerParams(),
+    )
+)
+
+config_builder.add_column(
+    dd.ExpressionColumnConfig(
+        name="patient_name",
+        expr="{{ patient.first_name }} {{ patient.last_name }}",
+    )
+)
+
+# Generate notes grounded in seed data
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="physician_notes",
+        model_alias="medical-notes",
+        prompt="""\
+You are a physician writing notes after a patient visit.
+
+Patient: {{ patient_name }}
+Diagnosis: {{ diagnosis }}
+Reported Symptoms: {{ symptoms }}
+
+Write detailed clinical notes for this visit.
+""",
+    )
+)
+
+# Preview
+preview = designer.preview(config_builder, num_records=5)
+preview.display_sample_record()
+```
+
+## Best Practices
+
+### Keep Seed Data Clean
+
+Garbage in, garbage out. Clean your seed data before using it:
+
+- Remove duplicates
+- Fix encoding issues
+- Filter out low-quality rows
+- Standardize column names
+
+### Match Generation Volume to Seed Size
+
+If your seed dataset has 1,000 rows and you generate 10,000 records, each seed row will be used ~10 times. Consider whether that's appropriate for your use case.
+
+### Use Seed Data for Diversity Control
+
+Seed datasets are excellent for controlling the distribution of your synthetic data. Want 30% electronics, 50% clothing, 20% home goods? Curate your seed dataset to match.
diff --git a/fern/v0.5.0/pages/concepts/tool-use-and-mcp.mdx b/fern/v0.5.0/pages/concepts/tool-use-and-mcp.mdx
new file mode 100644
index 000000000..163632883
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/tool-use-and-mcp.mdx
@@ -0,0 +1,72 @@
+---
+title: Tool Use & MCP
+description: Enable LLM columns to call external tools during generation using the Model Context Protocol.
+---
+
+Tool use lets LLM columns call external tools during generation (e.g., lookups, calculations, retrieval, domain services). Data Designer supports tool use via the **Model Context Protocol (MCP)**, which standardizes how tools are discovered and invoked.
+
+## Quick Start
+
+1. Configure an MCP provider ([Local](/docs/concepts/mcp/mcp-providers#localstdiomcpprovider-subprocess) or [Remote](/docs/concepts/mcp/mcp-providers#mcpprovider-remote-sse))
+2. Create a [ToolConfig](/docs/concepts/mcp/tool-configs) referencing your provider
+3. Add `tool_alias` to your [LLM column](/docs/concepts/mcp/enabling-tools)
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# 1. Configure provider
+
+## Local Stdio provider
+mcp_provider = dd.LocalStdioMCPProvider(
+    name="demo-mcp",
+    command="python",
+    args=["-m", "my_mcp_server"],
+)
+
+## Remote provider
+# mcp_provider = dd.MCPProvider(
+#     name="remote-mcp",
+#     endpoint="https://mcp.example.invalid/sse",
+#     api_key="REMOTE_MCP_API_KEY",
+# )
+
+data_designer = DataDesigner(mcp_providers=[mcp_provider])
+
+# 2. Create tool config
+tool_config = dd.ToolConfig(
+    tool_alias="my-tools",
+    providers=["demo-mcp"],
+)
+
+builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
+
+# 3. Use tools in column
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="answer",
+        prompt="Use tools to answer: {{ question }}",
+        model_alias="nvidia-text",
+        tool_alias="my-tools",
+    )
+)
+```
+
+## Guides
+
+| Guide | Description |
+|-------|-------------|
+| **[MCP Providers](/docs/concepts/mcp/mcp-providers)** | Configure local subprocess or remote SSE providers |
+| **[Tool Configs](/docs/concepts/mcp/tool-configs)** | Define tool permissions and limits |
+| **[Enabling Tools on Columns](/docs/concepts/mcp/enabling-tools)** | Use tools in LLM generation |
+| **[Configure via CLI](/docs/concepts/mcp/configure-mcp-cli)** | Interactive CLI configuration |
+| **[Traces](/docs/concepts/traces)** | Capture full conversation history |
+| **[Safety & Limits](/docs/concepts/mcp/safety-and-limits)** | Allowlists, budgets, timeouts |
+
+## Example
+
+See the [PDF Q&A Recipe](/docs/recipes/mcp-and-tooluse/pdf-qa) for a complete working example.
+
+## Code Reference
+
+For internal architecture and API documentation, see [MCP Code Reference](/api/mcp).
diff --git a/fern/v0.5.0/pages/concepts/traces.mdx b/fern/v0.5.0/pages/concepts/traces.mdx
new file mode 100644
index 000000000..bd3f12c21
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/traces.mdx
@@ -0,0 +1,218 @@
+---
+title: Message Traces
+description: Capture conversation history during LLM generation including tool calls and reasoning.
+---
+
+Traces capture the conversation history during LLM generation, including system prompts, user prompts, model reasoning, tool calls, tool results, and the final response. This visibility is essential for understanding model behavior, debugging generation issues, and iterating on prompts.
+
+Traces are also useful in certain scenarios as the target output of the workflow, e.g. producing an SFT dataset for fine-tuning tool-use capability, for instance.
+
+## Overview
+
+When generating content with LLM columns, you often need to understand what happened during generation:
+
+- What system prompt was used?
+- What did the rendered user prompt look like?
+- Did the model provide any reasoning content?
+- Which tools were called (if tool use is enabled)?
+- What arguments were passed to tools?
+- What did tools return?
+- Did the model retry after failures?
+- How did the model arrive at the final answer?
+
+Traces provide this visibility by capturing the ordered message history for each generation, including any multi-turn conversations that occur during tool use or retry scenarios.
+
+## Trace Types
+
+Data Designer supports three trace modes via the `TraceType` enum:
+
+| TraceType | Description |
+|-----------|-------------|
+| `TraceType.NONE` | No trace captured (default) |
+| `TraceType.LAST_MESSAGE` | Only the final assistant message is captured |
+| `TraceType.ALL_MESSAGES` | Full conversation history (system/user/assistant/tool) |
+
+## Enabling Traces
+
+### Per-Column (Recommended)
+
+Set `with_trace` on specific LLM columns:
+
+```python
+import data_designer.config as dd
+
+# Capture full conversation history
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="answer",
+        prompt="Answer: {{ question }}",
+        model_alias="nvidia-text",
+        with_trace=dd.TraceType.ALL_MESSAGES,  # Full trace
+    )
+)
+
+# Capture only the final assistant response
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="summary",
+        prompt="Summarize: {{ text }}",
+        model_alias="nvidia-text",
+        with_trace=dd.TraceType.LAST_MESSAGE,  # Just the final response
+    )
+)
+```
+
+## Trace Column Naming
+
+When enabled, LLM columns produce an additional side-effect column:
+
+- `{column_name}__trace`
+
+For example, if your column is named `"answer"`, the trace column will be `"answer__trace"`.
+
+## Trace Data Structure
+
+Each trace is a `list[dict]` where each dict represents a message in the conversation.
+
+### Message Fields by Role
+
+| Role | Fields | Description |
+|------|--------|-------------|
+| `system` | `role`, `content` | System prompt setting model behavior. `content` is a list of blocks in ChatML format. |
+| `user` | `role`, `content` | User prompt (rendered from template). `content` is a list of blocks (text + multimodal). |
+| `assistant` | `role`, `content`, `tool_calls`, `reasoning_content` | Model response; `content` may be empty if only requesting tools. |
+| `tool` | `role`, `content`, `tool_call_id` | Tool execution result; `tool_call_id` links to the request. |
+
+### Example Trace (Simple Generation)
+
+A basic trace without tool use:
+
+```python
+[
+    # System message (if configured)
+    {
+        "role": "system",
+        "content": [{"type": "text", "text": "You are a helpful assistant that provides clear, concise answers."}]
+    },
+    # User message (the rendered prompt)
+    {
+        "role": "user",
+        "content": [{"type": "text", "text": "What is the capital of France?"}]
+    },
+    # Final assistant response
+    {
+        "role": "assistant",
+        "content": [{"type": "text", "text": "The capital of France is Paris."}],
+        "reasoning_content": None  # May contain reasoning if model supports it
+    }
+]
+```
+
+### Example Trace (With Tool Use)
+
+When tool use is enabled, traces capture the full conversation including tool calls:
+
+```python
+[
+    # System message
+    {
+        "role": "system",
+        "content": [{"type": "text", "text": "You must call tools before answering. Only use tool results."}]
+    },
+    # User message (the rendered prompt)
+    {
+        "role": "user",
+        "content": [{"type": "text", "text": "What documents are in the knowledge base about machine learning?"}]
+    },
+    # Assistant requests tool calls
+    {
+        "role": "assistant",
+        "content": [{"type": "text", "text": ""}],
+        "tool_calls": [
+            {
+                "id": "call_abc123",
+                "type": "function",
+                "function": {
+                    "name": "list_docs",
+                    "arguments": "{\"query\": \"machine learning\"}"
+                }
+            }
+        ]
+    },
+    # Tool response (linked by tool_call_id)
+    {
+        "role": "tool",
+        "content": [{"type": "text", "text": "Found 3 documents: intro_ml.pdf, neural_networks.pdf, transformers.pdf"}],
+        "tool_call_id": "call_abc123"
+    },
+    # Final assistant response
+    {
+        "role": "assistant",
+        "content": [{"type": "text", "text": "The knowledge base contains three documents about machine learning: ..."}]
+    }
+]
+```
+
+### The tool_calls Structure
+
+When an assistant message includes tool calls:
+
+```python
+{
+    "id": "call_abc123",           # Unique ID linking to tool response
+    "type": "function",            # Always "function" for MCP tools
+    "function": {
+        "name": "search_docs",     # Tool name
+        "arguments": "{...}"       # JSON string of tool arguments
+    }
+}
+```
+
+## Extracting Reasoning Content
+
+Some models (particularly those with extended thinking or chain-of-thought capabilities) expose their reasoning process separately via the `reasoning_content` field in assistant messages. While this is included in full traces, you may want to capture it separately without the overhead of storing the entire conversation history.
+
+### Dedicated Reasoning Column
+
+Set `extract_reasoning_content=True` on any LLM column to create a `{column_name}__reasoning_content` side-effect column:
+
+```python
+import data_designer.config as dd
+
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="solution",
+        prompt="Solve this math problem step by step: {{ problem }}",
+        model_alias="reasoning-model",
+        extract_reasoning_content=True,  # Creates solution__reasoning_content
+    )
+)
+```
+
+The extracted reasoning content:
+
+- Contains only the `reasoning_content` from the **final** assistant message in the trace
+- Is stripped of leading/trailing whitespace
+- Is `None` if the model didn't provide reasoning content or if it was whitespace-only
+
+### When to Use Each Approach
+
+| Need | Approach |
+|------|----------|
+| Full conversation history for debugging | `with_trace=True` |
+| Just the model's reasoning/thinking | `extract_reasoning_content=True` |
+| Both conversation history and separate reasoning | Use both options |
+| Fine-tuning data with reasoning | `extract_reasoning_content=True` for clean extraction |
+
+### Availability
+
+The `extract_reasoning_content` option is available on all LLM column types:
+
+- `LLMTextColumnConfig`
+- `LLMCodeColumnConfig`
+- `LLMStructuredColumnConfig`
+- `LLMJudgeColumnConfig`
+
+## See Also
+
+- **[Safety and Limits](/docs/concepts/mcp/safety-and-limits)**: Understand turn limits and timeout behavior
diff --git a/fern/v0.5.0/pages/concepts/validators.mdx b/fern/v0.5.0/pages/concepts/validators.mdx
new file mode 100644
index 000000000..24176174f
--- /dev/null
+++ b/fern/v0.5.0/pages/concepts/validators.mdx
@@ -0,0 +1,341 @@
+---
+title: Validators
+description: Quality assurance mechanisms that check generated content against rules.
+---
+
+Validators are quality assurance mechanisms in Data Designer that check generated content against rules and return structured pass/fail results. They enable automated verification of data for correctness, code quality, and adherence to specifications.
+
+<Note title="Quality Gates for Generated Data">
+Validators act as **quality gates** in your generation pipeline. Use them to filter invalid records, score code quality, verify format compliance, or integrate with external validation services.
+</Note>
+
+## Overview
+
+Validation columns execute validation logic against target columns and produce structured results indicating:
+
+- **`is_valid`**: Boolean pass/fail status
+- **Additional metadata**: Error messages, scores, severity levels, and custom fields
+
+Validators currently support three execution strategies:
+
+1. **Code validation**: Lint and check Python or SQL code using industry-standard tools
+2. **Local callable validation**: Execute custom Python functions for flexible validation logic
+3. **Remote validation**: Send data to HTTP endpoints for external validation services
+
+## Validator Types
+
+### 🐍 Python Code Validator
+
+The Python code validator runs generated Python code through [Ruff](https://github.com/astral-sh/ruff), a fast Python linter that checks for syntax errors, undefined variables, and code quality issues.
+
+**Configuration:**
+
+```python
+import data_designer.config as dd
+
+validator_params = dd.CodeValidatorParams(code_lang=dd.CodeLang.PYTHON)
+```
+
+**Validation Output:**
+
+Each validated record returns:
+
+- **`is_valid`**: `True` if no fatal or error-level issues found
+- **`python_linter_score`**: Quality score from 0-10 (based on pylint formula)
+- **`python_linter_severity`**: Highest severity level found (`"none"`, `"convention"`, `"refactor"`, `"warning"`, `"error"`, `"fatal"`)
+- **`python_linter_messages`**: List of linter messages with line numbers, columns, and descriptions
+
+**Severity Levels:**
+
+- **Fatal**: Syntax errors preventing code execution
+- **Error**: Undefined names, invalid syntax
+- **Warning**: Code smells and potential issues
+- **Refactor**: Simplification opportunities
+- **Convention**: Style guide violations
+
+A record is marked valid if it has no messages or only messages at warning/convention/refactor levels.
+
+**Example Validation Result:**
+
+```python
+{
+    "is_valid": False,
+    "python_linter_score": 0,
+    "python_linter_severity": "error",
+    "python_linter_messages": [
+        {
+            "type": "error",
+            "symbol": "F821",
+            "line": 1,
+            "column": 7,
+            "message": "Undefined name `it`"
+        }
+    ]
+}
+```
+
+### 🗄️ SQL Code Validator
+
+The SQL code validator uses [SQLFluff](https://github.com/sqlfluff/sqlfluff), a dialect-aware SQL linter that checks query syntax and structure.
+
+**Configuration:**
+
+```python
+import data_designer.config as dd
+
+validator_params = dd.CodeValidatorParams(code_lang=dd.CodeLang.SQL_POSTGRES)
+```
+
+<Tip title="Multiple Dialects">
+The SQL code validator supports multiple dialects: `SQL_POSTGRES`, `SQL_ANSI`, `SQL_MYSQL`, `SQL_SQLITE`, `SQL_TSQL` and `SQL_BIGQUERY`.
+</Tip>
+
+**Validation Output:**
+
+Each validated record returns:
+
+- **`is_valid`**: `True` if no parsing errors found
+- **`error_messages`**: Concatenated error descriptions (empty string if valid)
+
+The validator focuses on parsing errors (PRS codes) that indicate malformed SQL. It also checks for common pitfalls like `DECIMAL` definitions without scale parameters.
+
+**Example Validation Result:**
+
+```python
+# Valid SQL
+{
+    "is_valid": True,
+    "error_messages": ""
+}
+
+# Invalid SQL
+{
+    "is_valid": False,
+    "error_messages": "PRS: Line 1, Position 1: Found unparsable section: 'NOT SQL'"
+}
+```
+
+### 🔧 Local Callable Validator
+
+The local callable validator executes custom Python functions for flexible validation logic.
+
+**Configuration:**
+
+```python
+import pandas as pd
+
+import data_designer.config as dd
+
+
+def my_validation_function(df: pd.DataFrame) -> pd.DataFrame:
+    """Validate that values are positive.
+
+    Args:
+        df: DataFrame with target columns
+
+    Returns:
+        DataFrame with is_valid column and optional metadata
+    """
+    result = pd.DataFrame()
+    result["is_valid"] = df["price"] > 0
+    result["error_message"] = result["is_valid"].apply(
+        lambda valid: "" if valid else "Price must be positive"
+    )
+    return result
+
+
+validator_params = dd.LocalCallableValidatorParams(
+    validation_function=my_validation_function,
+    output_schema={  # Optional: enforce output schema
+        "type": "object",
+        "properties": {
+            "data": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "is_valid": {"type": ["boolean", "null"]},
+                        "error_message": {"type": "string"}
+                    },
+                    "required": ["is_valid"]
+                }
+            }
+        }
+    }
+)
+```
+
+**Function Requirements:**
+
+- **Input**: DataFrame with target columns
+- **Output**: DataFrame with `is_valid` column (boolean or null)
+- **Extra fields**: Any additional columns become validation metadata
+
+The `output_schema` parameter is optional but recommended—it validates the function's output against a JSON schema, catching unexpected return formats.
+
+### 🌐 Remote Validator
+
+The remote validator sends data to HTTP endpoints for validation-as-a-service. This is useful for when you have validation software that needs to run on external compute and you can expose it through a service. Some examples are:
+
+- External linting services
+- Security scanners
+- Domain-specific validators
+- Proprietary validation systems
+
+<Note title="Authentication">
+Currently, the remote validator is only able to perform unauthenticated API calls. When implementing your own service, you can rely on network isolation for security. If you need to reach a service that requires authentication, you should implement a local proxy.
+</Note>
+
+**Configuration:**
+
+```python
+import data_designer.config as dd
+
+validator_params = dd.RemoteValidatorParams(
+    endpoint_url="https://api.example.com/validate",
+    timeout=30.0,  # Request timeout in seconds
+    max_retries=3,  # Retry attempts on failure
+    retry_backoff=2.0,  # Exponential backoff factor
+    max_parallel_requests=4,  # Concurrent request limit
+    output_schema={  # Optional: enforce response schema
+        "type": "object",
+        "properties": {
+            "data": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "is_valid": {"type": ["boolean", "null"]},
+                        "confidence": {"type": "string"}
+                    }
+                }
+            }
+        }
+    }
+)
+```
+
+**Request Format:**
+
+The validator sends POST requests with this structure:
+
+```json
+{
+    "data": [
+        {"column1": "value1", "column2": "value2"},
+        {"column1": "value3", "column2": "value4"}
+    ]
+}
+```
+
+**Expected Response Format:**
+
+The endpoint must return:
+
+```json
+{
+    "data": [
+        {
+            "is_valid": true,
+            "custom_field": "any additional metadata"
+        },
+        {
+            "is_valid": false,
+            "custom_field": "more metadata"
+        }
+    ]
+}
+```
+
+**Retry Behavior:**
+
+The validator automatically retries on:
+
+- Network errors
+- HTTP status codes: 429 (rate limit), 500, 502, 503, 504
+
+Failed requests use exponential backoff: `delay = retry_backoff^attempt`.
+
+**Parallelization:**
+
+Set `max_parallel_requests` to control concurrency. Higher values improve throughput but increase server load. The validator batches requests according to the `batch_size` parameter in the validation column configuration.
+
+## Using Validators in Columns
+
+Add validation columns to your configuration using the builder's `add_column` method:
+
+```python
+import data_designer.config as dd
+
+builder = dd.DataDesignerConfigBuilder()
+
+# Generate Python code
+builder.add_column(
+    dd.LLMCodeColumnConfig(
+        name="sorting_algorithm",
+        prompt="Write a Python function to sort a list using bubble sort.",
+        code_lang=dd.CodeLang.PYTHON,
+        model_alias="my-model"
+    )
+)
+
+# Validate the generated code
+builder.add_column(
+    dd.ValidationColumnConfig(
+        name="code_validation",
+        target_columns=["sorting_algorithm"],
+        validator_type="code",
+        validator_params=dd.CodeValidatorParams(code_lang=dd.CodeLang.PYTHON),
+        batch_size=10,
+        drop=False,
+    )
+)
+```
+
+The `target_columns` parameter specifies which columns to validate. All target columns are passed to the validator together (except for code validators, which process each column separately).
+
+### Configuration Parameters
+
+See more about parameters used to instantiate `ValidationColumnConfig` in the [API reference](/api/column-configs).
+
+### Batch Size Considerations
+
+Larger batch sizes improve efficiency but consume more memory:
+
+- **Code validators**: 5-20 records (file I/O overhead)
+- **Local callable**: 10-50 records (depends on function complexity)
+- **Remote validators**: 1-10 records (network latency, server capacity)
+
+Adjust based on:
+
+- Validator computational cost
+- Available memory
+- Network bandwidth (for remote validators)
+- Server rate limits
+
+If the validation logic uses information from other samples, only samples in the batch will be considered.
+
+### Multiple Column Validation
+
+Validate multiple columns simultaneously:
+
+```python
+import data_designer.config as dd
+
+builder.add_column(
+    dd.ValidationColumnConfig(
+        name="multi_column_validation",
+        target_columns=["column_a", "column_b", "column_c"],
+        validator_type="remote",
+        validator_params=dd.RemoteValidatorParams(
+            endpoint_url="https://api.example.com/validate"
+        )
+    )
+)
+```
+
+**Note**: Code validators always process each target column separately, even when multiple columns are specified. Local callable and remote validators receive all target columns together.
+
+## See Also
+
+- [Validator Parameters Reference](/api/validator-params): Configuration object schemas
diff --git a/fern/v0.5.0/pages/contributing.mdx b/fern/v0.5.0/pages/contributing.mdx
new file mode 100644
index 000000000..b317a75aa
--- /dev/null
+++ b/fern/v0.5.0/pages/contributing.mdx
@@ -0,0 +1,239 @@
+---
+title: 🎨✨ Contributing to NeMo Data Designer 🎨✨
+description: How to contribute to NeMo Data Designer
+---
+
+Thank you for your interest in contributing to Data Designer!
+
+We welcome contributions from the community and sincerely appreciate your efforts to improve the project. Whether you're fixing a typo, reporting a bug, proposing a new feature, or implementing a major enhancement, your work helps make Data Designer better for everyone 🎉.
+
+This guide will help you get started with the contribution process.
+
+## Table of Contents
+
+- [Getting Started](#getting-started)
+- [Ways to Contribute](#ways-to-contribute)
+- [Feature Requests](#feature-requests)
+- [Development Guide](#development-guide)
+- [Submitting Changes](#submitting-changes)
+- [Code of Conduct](#code-of-conduct)
+- [Signing off on your work](#signing-off-on-your-work)
+
+
+## Getting Started
+👋 Welcome to the Data Designer community! We're excited to have you here.
+
+Whether you're new to the project or ready to dive in, the resources below will help you get oriented and productive quickly:
+
+1. **[README.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/README.md)** – best place to start to learn the basics of the project
+
+2. **[AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md)** – context and instructions to help AI coding agents work on Data Designer (it's also useful for human developers!)
+
+3. **[Documentation](https://nvidia-nemo.github.io/DataDesigner/)** – detailed documentation on Data Designer's capabilities and usage
+
+## Ways to Contribute
+
+There are many ways to contribute to Data Designer:
+
+### 🐛 Bug Fixes
+
+Found a bug? Before reporting, please
+1. Verify you're using the latest version: `uv pip install --upgrade data-designer`
+2. Search for duplicates in the [issue tracker](https://github.com/NVIDIA-NeMo/DataDesigner/issues)
+
+When [creating a bug report](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new), please include:
+- Data Designer version
+- Python version and operating system
+- Minimal reproducible example
+- Expected vs. actual behavior
+- Full error messages and stack traces
+
+If you are interested in fixing the bug yourself, that's AWESOME! Please follow the [development guide](#development-guide) to get started.
+
+### ✨ Feature Implementation
+Want to add new functionality? Great! Please review [our development approach](#feature-requests) and open a feature request to discuss the idea and get feedback before investing significant time on the implementation.
+
+### 📖 Documentation Improvements
+Documentation is crucial for user adoption. Contributions that clarify usage, add examples, or fix typos are highly valued.
+
+### 💡 Examples and Tutorials
+Share your use cases! Example notebooks and tutorials help others understand how to leverage Data Designer effectively.
+
+### 🧪 Test Coverage
+Help us improve test coverage by adding tests for untested code paths or edge cases.
+
+## Feature Requests
+Data Designer is designed to be as flexible and extensible as possible, and we welcome your ideas for pushing its capabilities even further! To keep the core library maintainable, while also supporting innovation, we take an incremental approach when adding new features – we explore what's already possible, extend through plugins when needed, and integrate the most broadly useful features into the core library:
+
+### How We Grow Data Designer
+1. 🧗 **Explore what's possible**: Can your use case be achieved with current features? We've designed Data Designer to be composable – sometimes creative combinations of existing tools can accomplish what you need. Check out our examples or open an issue if you'd like help exploring this!
+
+2. 🔌 **Extend through plugins**: If existing features aren't quite enough, consider implementing your idea as a plugin that extends the core library. Plugins let you experiment and share functionality while keeping the core library focused.
+
+3. ⚙️ **Integrate into the core library**: If your feature or plugin proves broadly useful and aligns with Data Designer's goals, we'd love to integrate it into the core library! We're happy to discuss whether it's a good fit and how to move forward together.
+
+This approach helps us grow thoughtfully while keeping Data Designer focused and maintainable.
+
+### Submitting a Feature Request
+Open a [new issue](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new) with:
+
+- **Clear title**: Concise description of the feature
+- **Use case**: Explain what problem this solves and why it's important
+- **Proposed solution**: Describe how you envision the feature working
+- **Alternatives considered**: Other approaches you've thought about
+- **Examples**: Code examples or mockups of how users would interact with the feature
+- **Willingness to implement**: Are you interested in implementing this yourself?
+
+## Development Guide
+Data Designer uses [`uv`](https://github.com/astral-sh/uv) for dependency management. If you don't have uv installed, follow their [installation instructions](https://docs.astral.sh/uv/getting-started/installation/).
+
+### Initial Setup
+0. **Create or find an issue**
+
+    Before starting work, ensure there's an issue tracking your contribution:
+
+    - For bug fixes: Search [existing issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues) or [create a new one](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new)
+    - For new features: Open a [feature request](#feature-requests) to discuss the approach first
+    - Comment on the issue to let maintainers know you're working on it
+
+1. **Fork and clone the repository**
+
+    Start by [forking the Data Designer repository](https://github.com/NVIDIA-NeMo/DataDesigner/fork), then clone your fork and add the upstream remote:
+
+    ```bash
+    git clone https://github.com/YOUR_GITHUB_USERNAME/DataDesigner.git
+
+    cd DataDesigner
+
+    git remote add upstream https://github.com/NVIDIA-NeMo/DataDesigner.git
+    ```
+
+2. **Install dependencies**
+
+    ```bash
+    # Install project with dev dependencies
+    make install-dev
+
+    # Or, if you use Jupyter / IPython for development
+    make install-dev-notebooks
+    ```
+
+3. **Verify your setup**
+
+    ```bash
+    make test && make check-all
+    ```
+
+    If no errors are reported, you're ready to develop 🚀
+
+### Making Changes
+
+1. **Create a feature branch**
+
+    ```bash
+    git checkout main
+    git pull upstream main
+    git checkout -b <username>/<type-of-change>/<issue-number>-<short-description>
+    ```
+
+    Example types of change:
+
+    - `feat` for new features
+    - `fix` for bug fixes
+    - `docs` for documentation updates
+    - `test` for testing changes
+    - `refactor` for code refactoring
+    - `chore` for chore tasks
+    - `style` for style changes
+    - `perf` for performance improvements
+
+    Example branch name:
+
+    - `johnnygreco/feat/123-add-xyz-generator` for a new feature by @johnnygreco, addressing issue #123
+
+2. **Develop your changes**
+
+    Please follow the patterns and conventions used throughout the codebase, as well as those outlined in [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md).
+
+3. **Test and validate**
+
+    ```bash
+    make check-all-fix  # Format code and fix linting issues
+    make test           # Run all tests
+    make coverage       # Check test coverage (must be >90%)
+    ```
+
+    **Writing tests**: Place tests in [tests/](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/) mirroring the source structure. Use fixtures from [tests/conftest.py](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/conftest.py), mock external services with `unittest.mock` or `pytest-httpx`, and test both success and failure cases. See [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md) for patterns and examples.
+
+4. **Commit your work**
+
+    Write clear, descriptive commit messages, optionally including a brief summary (50 characters or less) and reference issue numbers when applicable (e.g., "Fixes #123").
+
+    ```bash
+    git commit -m "Add XYZ generator for synthetic data" -m "Fixes #123"
+    ```
+
+5. **Stay up to date**
+
+    Regularly sync your branch with upstream changes:
+
+    ```bash
+    git fetch upstream
+    git merge upstream/main
+    ```
+
+## Submitting Changes
+
+### Before Submitting
+
+Ensure your changes meet the following criteria:
+
+- All tests pass (`make test`)
+- Code is formatted and linted (`make check-all-fix`)
+- New functionality includes tests
+- Documentation is updated (README, docstrings, examples)
+- License headers are present on all new files
+- Commit messages are clear and descriptive
+
+### Creating a Pull Request
+
+1. **Push your changes** to your fork:
+
+    ```bash
+    git push origin <username>/<type-of-change>/<issue-number>-<short-description>
+    ```
+
+2. **Open a pull request** on GitHub from your fork to the main repository
+
+3. **Respond to review feedback** update your PR as needed
+
+### Pull Request Review Process
+
+- Maintainers will review your PR and may request changes
+- Address feedback by pushing additional commits to your branch
+- Reply to the feedback comment with a link to the commit that addresses it.
+- Once approved, a maintainer will merge your PR
+- Your contribution will be included in the next release!
+
+## Code of Conduct
+Data Designer follows the Contributor Covenant Code of Conduct. We are committed to providing a welcoming and inclusive environment for all contributors.
+
+**Please read our complete [Code of Conduct](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/CODE_OF_CONDUCT.md)** for full details on our standards and expectations.
+
+### License File Headers
+All code files that are added to this repository must include the appropriate NVIDIA copyright header:
+
+```python
+# SPDX-FileCopyrightText: Copyright (c) {YEAR} NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+```
+
+Use `make update-license-headers` to add headers automatically.
+
+## Signing off on your work
+
+When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. All contributors are asked to sign the Data Designer [Developer Certificate of Origin (DCO)](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/DCO) when submitting their first pull request. The process is automated by a bot that will comment on the pull request. Our DCO is the same as the Linux Foundation requires its contributors to sign.
+
+---
+
+Thank you for contributing to NeMo Data Designer! Your efforts help make synthetic data generation more accessible and powerful for everyone. 🎨✨
diff --git a/fern/v0.5.0/pages/devnotes/deep-research-trajectories.mdx b/fern/v0.5.0/pages/devnotes/deep-research-trajectories.mdx
new file mode 100644
index 000000000..ca8e9eff2
--- /dev/null
+++ b/fern/v0.5.0/pages/devnotes/deep-research-trajectories.mdx
@@ -0,0 +1,884 @@
+---
+title: "Deep Research Trajectories with NeMo Data Designer and MCP Tool Use"
+description: Using MCP tool-use support to generate multi-turn research trajectories for training deep research agents.
+---
+
+import { Authors } from "@/components/Authors";
+import { TrajectoryViewer } from "@/components/TrajectoryViewer";
+import trajectory from "@/components/devnotes/deep-research-trajectories/4hop-example";
+
+<Authors ids={["etramel"]} />
+
+Data Designer v0.5.0's MCP [tool-use support](/docs/concepts/tool-use-and-mcp) lets you generate multi-turn research trajectories, the kind of data needed to train deep research agents that iteratively search, read, and synthesize evidence before answering a question.
+
+---
+
+Deep research agents like [OpenResearcher](https://github.com/TIGER-AI-Lab/OpenResearcher) (Li, Jiang, Ma et al., 2026) and [Universal Deep Research](https://arxiv.org/abs/2509.00244) (Belcak & Molchanov, 2025) generate long reasoning chains interleaved with tool calls: formulating queries, retrieving documents, reading passages, refining hypotheses, and eventually synthesizing an answer. Training these agents requires trajectory data capturing the full multi-turn interaction between a model and its tools: every search, every document opened, every dead end explored.
+
+OpenResearcher demonstrated something worth paying attention to: synthetic trajectories generated against a *local* retriever ([BM25](https://dl.acm.org/doi/abs/10.1561/1500000019) over a static corpus, no web APIs) are sufficient to train [Nemotron Nano 3](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16) to outperform GPT-4.1 on deep research benchmarks. The data format (complete tool-use traces showing how a model moves through an information space) matters more than model scale. Nemotron Nano 3, with only 3B active parameters, beats models orders of magnitude larger on multi-hop research tasks.
+
+This post shows how to generate that same kind of training data using Data Designer's MCP tool-use capabilities. We build a retriever as an MCP server, construct a corpus with known-good evidence, run a teacher model through the full research process, and use an LLM judge for rejection sampling. The result is a pipeline that produces high-quality research trajectories you can use for supervised fine-tuning or as a starting point for RL.
+
+<TrajectoryViewer {...trajectory} />
+
+---
+
+## Step 1: Building the Retrieval MCP Server
+
+OpenResearcher's key design choice is a three-tool browser interface rather than a single retrieval call. The paper argues (and their ablations confirm) that separating search, document opening, and in-document search forces the model to develop genuine research strategies: skimming results, diving into promising documents, hunting for specific evidence within them. A single monolithic "retrieve" tool collapses this entire workflow into one step, which produces shorter and less useful training trajectories.
+
+We implement the same three tools as an MCP server that Data Designer can invoke during generation. Our retriever uses [BM25S](https://github.com/xhluca/bm25s) for fast lexical search over the corpus:
+
+```python
+from mcp.server.fastmcp import FastMCP
+
+mcp_server = FastMCP("corpus-retriever")
+
+@mcp_server.tool()
+def search(query: str, top_k: int = 10) -> dict:
+    """Search for candidate documents to explore."""
+    # BM25S search over the corpus, returns ranked results with snippets
+    ...
+
+@mcp_server.tool(name="open")
+def open_document(doc_id: str) -> dict:
+    """Open a document for detailed inspection with cursor-numbered chunks."""
+    # Returns content formatted as [1] paragraph... [2] paragraph...
+    ...
+
+@mcp_server.tool()
+def find(doc_id: str, query: str) -> dict:
+    """Find matching passages inside a document by keyword."""
+    # Returns matching chunks with cursor positions
+    ...
+
+if __name__ == "__main__":
+    mcp_server.run()
+```
+
+- **`search`** returns a ranked list of document IDs with short snippets, enough for the model to decide which documents look promising.
+- **`open`** returns the full document content, split into cursor-numbered chunks so the model can reference specific passages.
+- **`find`** does targeted keyword search *within* a single document, letting the model locate specific evidence without reading the entire thing.
+
+The cursor-based chunking across `open` and `find` gives the model a way to scan long documents incrementally, the way a human researcher would scan a paper for the relevant section rather than reading it cover to cover.
+
+The server runs as a local stdio process, which means Data Designer launches and manages it automatically. No external services, no API keys for retrieval, no rate limits.
+
+---
+
+## Step 2: Building the Corpus
+
+The corpus design follows directly from OpenResearcher's most striking ablation result. They tested what happens when you vary the retrieval corpus while keeping the reasoning model fixed (GPT-OSS-120B). The results, from the [OpenResearcher Appendix](https://boiled-honeycup-4c7.notion.site/Appendix-301e290627b58082abffd1ea2c262eb2):
+
+| Corpus | BrowseComp-Plus Accuracy |
+| :---- | :----: |
+| Golden passages only (BrowseComp-Plus corpus) | 56.0% |
+| 15M FineWeb + golden passages | 31.2% |
+| 15M FineWeb only | 0.71% |
+
+Without golden passages (documents known to contain evidence for the question), accuracy drops to nearly zero. The model can't learn research strategies from trajectories where every search is a dead end.
+
+The original OpenResearcher corpus uses 15M documents from [FineWeb](https://huggingface.co/datasets/HuggingFaceFW/fineweb) as distractors alongside 10K golden passages. For this demonstration, we use a lighter-weight approach: we construct the corpus from multi-hop QA datasets: [HotpotQA](https://arxiv.org/abs/1809.09600) (2-hop questions requiring two pieces of linked evidence) and [MuSiQue](https://arxiv.org/abs/2108.00573) (2-4 hop questions composed from single-hop sub-questions). Each question comes with annotated supporting passages, the specific paragraphs that contain the evidence needed to answer it. Golden passages go into the corpus alongside non-supporting passages from the same datasets as distractors, at roughly a 1:9 ratio. The model has to search through noise to find the signal, which is exactly the skill we want the training data to teach.
+
+The key constraint is that golden passages must be *findable but not obvious*. If the corpus is too small or the golden passages are too easy to identify, the trajectories won't transfer to real-world research where evidence is sparse. The distractor ratio controls this difficulty, and the paper's ablations give us a good starting point for tuning it.
+
+---
+
+## Step 3: The Data Designer Pipeline
+
+With the retriever server and corpus ready, the Data Designer pipeline ties everything together. We configure a teacher model, point it at the MCP retriever, and let it research each question from scratch. For this demo we hosted our own inference server, but anyone can try this pipeline using [Nemotron Nano 3 on build.nvidia.com](https://build.nvidia.com/nvidia/nemotron-3-nano-30b-a3b) with a free API key using the model configuration shown below.
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Search rollout model for trajectory generation
+config = dd.DataDesignerConfigBuilder()
+config.add_model_config(
+    dd.ModelConfig(
+        alias="search_rollout_model",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=0.95,
+            max_tokens=16384,
+        ),
+    )
+)
+```
+
+The temperature and top_p settings matter here. We want diverse research strategies across seeds (different query formulations, different document exploration orders) so that rejection sampling has a rich pool to select from. Setting temperature to 1.0 with top_p at 0.95 gives enough variation that the same question can produce meaningfully different trajectories across seeds.
+
+The MCP tool configuration tells Data Designer which server to use and how many tool-call turns to allow:
+
+```python
+# MCP retriever tool configuration
+tool_config = dd.ToolConfig(
+    tool_alias="knowledge-base",
+    providers=["corpus-retriever"],
+    max_tool_call_turns=150,
+)
+config.add_tool_config(tool_config)
+```
+
+We set `max_tool_call_turns` high (150) because deep research trajectories can be long. Our longest observed trajectory used 25 tool calls across 53 messages. Capping too low would truncate the most interesting research chains.
+
+The seed dataset contains the research questions alongside reference answers (which we'll use for rejection sampling in Step 4):
+
+```python
+config.with_seed_dataset(
+    dd.LocalFileSeedSource(path="questions.jsonl"),
+)
+
+config.add_column(
+    dd.ExpressionColumnConfig(
+        name="research_question",
+        expr="{{ question }}",
+    )
+)
+```
+
+The core of the pipeline is the research column, where the teacher model receives a question and a system prompt instructing it to use the retriever tools:
+
+```python
+SYSTEM_PROMPT = """You are a thorough research assistant. You have access to three tools \
+for navigating a knowledge base:
+- search(query, top_k): Find candidate documents relevant to your query
+- open(doc_id): Open a document to read its full content in numbered chunks
+- find(doc_id, query): Locate specific passages within a document by keyword
+
+Your task is to research the given question by searching for relevant documents, \
+reading their content, and synthesizing an answer from the evidence you find. \
+Be systematic: formulate search queries, explore promising results, and gather \
+evidence before answering. Cite specific passages when possible."""
+
+config.add_column(
+    dd.LLMTextColumnConfig(
+        name="research_answer",
+        prompt="Research and answer thoroughly:\n\n{{ research_question }}",
+        model_alias="search_rollout_model",
+        system_prompt=SYSTEM_PROMPT,
+        tool_alias="knowledge-base",
+        with_trace=dd.TraceType.ALL_MESSAGES,
+        extract_reasoning_content=True,
+    )
+)
+```
+
+Two settings are doing the important work here:
+
+- **`with_trace=dd.TraceType.ALL_MESSAGES`** captures the *entire* interaction (every tool call, every tool response, every intermediate reasoning step) into a separate trace column in ChatML format. This is the training data: the full trajectory of how the model moved through the information space.
+- **`extract_reasoning_content=True`** pulls out the model's internal chain-of-thought separately, so you can include or exclude it depending on your training setup.
+
+---
+
+## Step 4: Rejection Sampling with an LLM Judge
+
+Not every trajectory leads to a correct answer. OpenResearcher's approach is straightforward. Generate multiple trajectories per question, score them for correctness, and keep only the ones that got the right answer. We implement this with Data Designer's `LLMJudgeColumnConfig`, using a separate (smaller) model as the judge:
+
+```python
+# Judge model for rejection sampling
+config.add_model_config(
+    dd.ModelConfig(
+        alias="judge",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+    )
+)
+
+config.add_column(
+    dd.LLMJudgeColumnConfig(
+        name="correctness",
+        model_alias="judge",
+        prompt=(
+            "Question: {{ research_question }}\n"
+            "Reference answer: {{ answer }}\n"
+            "Generated answer: {{ research_answer }}\n"
+            "Does the generated answer correctly address the question?"
+        ),
+        scores=[
+            dd.Score(
+                name="correct",
+                description="Is the answer factually correct?",
+                options={
+                    1: "Correct",
+                    0: "Incorrect",
+                },
+            ),
+        ],
+    )
+)
+```
+
+The judge compares the generated answer against the reference answer from the seed dataset. Using a smaller model as judge is deliberate. We don't need the judge to *reason* about the question, just to compare two answers for factual agreement. This keeps costs down when scoring thousands of trajectories.
+
+In practice, you'd generate multiple trajectories per question (varying the random seed) and filter to `correctness.correct == 1`. The incorrect trajectories aren't wasted; they can serve as negative examples for preference-based training methods like DPO.
+
+---
+
+## Multi-Turn Tool Calling: Rough Edges in the Open Model Ecosystem
+
+The pipeline described above is straightforward in principle. In practice, getting multi-turn tool calling to work reliably with open-weight models served through vLLM turned out to be the hardest part of this project.
+
+We tested two open-weight models on a self-hosted [vLLM (v0.15.1)](https://github.com/vllm-project/vllm/releases/tag/v0.15.1) instance: [GPT-OSS-120B](https://huggingface.co/openai/gpt-oss-120b) and [Kimi K2.5](https://huggingface.co/moonshotai/Kimi-K2.5). Both failed to produce usable research trajectories, for related but distinct reasons.
+
+<Warning>
+**GPT-OSS-120B** uses a "Harmony" output format that routes text through named channels (reasoning, final answer, tool calls). When tools are involved, vLLM's parser consistently routes the model's output to the wrong channel: everything lands in `reasoning_content` while the `content` field stays empty. This happens at all `reasoning_effort` levels.
+
+**Kimi K2.5** exhibits a different failure mode. With thinking mode enabled, it has the same channel-routing problem. With thinking mode disabled, the model produces content text, but after the first tool result, it *narrates* what it plans to do next rather than emitting another tool call.
+</Warning>
+
+The original OpenResearcher codebase handles this by bypassing vLLM's tool call parser entirely. They hit the raw `/completions` endpoint, parse `<tool_call>` XML tags from the output with regex, and continue looping until the model emits an explicit answer marker.
+
+The open-source tool-calling stack is growing and maturing quickly, but multi-turn tool use with reasoning models is still a rough edge. For now, the practical path is to use models with battle-tested tool-calling support through their native APIs, which is what we do in the results below.
+
+---
+
+## Results
+
+We ran 64 questions uniformly sampled across 2, 3, and 4-hop difficulty levels from MuSiQue, with 50K FineWeb web documents as distractors (a 1:100 golden-to-distractor ratio). We tested two models, Claude Opus 4.5 (via API) and Nemotron Nano 3 (30B total / 3B active params, self-hosted via vLLM with reasoning disabled).
+
+| | Claude Opus 4.5 | Nemotron Nano 3 |
+| :---- | :----: | :----: |
+| **Samples** | 64 (55 completed) | 64 (61 completed) |
+| **Overall accuracy** | 41/55 (75%) | 32/61 (52%) |
+| **2-hop accuracy** | 18/23 (78%) | 13/23 (57%) |
+| **3-hop accuracy** | 15/18 (83%) | 11/22 (50%) |
+| **4-hop accuracy** | 8/14 (57%) | 8/16 (50%) |
+| **Avg tool calls** | 16.8 | 11.8 |
+| **Max tool calls** | 57 | 63 |
+| **Avg messages per trajectory** | 40.4 | 26.5 |
+| **Max messages per trajectory** | 117 | 129 |
+
+Opus is 22 points more accurate, but Nano runs roughly 5x faster on self-hosted hardware. Both models show tool usage scaling with hop count. Nano uses fewer tools but achieves lower accuracy, with the largest gap on 2-hop questions (78% vs 57%). Splitting by correctness reveals the same pattern in both models: incorrect trajectories are longer.
+
+<Accordion title="Detailed results by model and correctness">
+
+**Claude Opus 4.5:**
+
+| Outcome | Hops | Count | Avg Tool Calls | Avg Messages | Avg Answer Length |
+| :---- | :----: | :----: | :----: | :----: | :----: |
+| **Correct** | 2 | 18 | 7.3 | 18.9 | 1,072 chars |
+| | 3 | 15 | 14.9 | 35.7 | 1,372 chars |
+| | 4 | 8 | 21.0 | 50.6 | 1,705 chars |
+| | **All** | **41** | **12.8** | **31.2** | **1,305 chars** |
+| **Incorrect** | 2 | 5 | 21.0 | 48.6 | 1,534 chars |
+| | 3 | 3 | 25.7 | 63.0 | 1,795 chars |
+| | 4 | 6 | 36.0 | 85.2 | 1,903 chars |
+| | **All** | **14** | **28.4** | **67.4** | **1,748 chars** |
+
+**Nemotron Nano 3:**
+
+| Outcome | Hops | Count | Avg Tool Calls | Avg Messages | Avg Answer Length |
+| :---- | :----: | :----: | :----: | :----: | :----: |
+| **Correct** | 2 | 13 | 6.5 | 16.1 | 773 chars |
+| | 3 | 11 | 12.7 | 28.5 | 708 chars |
+| | 4 | 8 | 8.0 | 19.0 | 1,600 chars |
+| | **All** | **32** | **9.0** | **21.1** | **957 chars** |
+| **Incorrect** | 2 | 10 | 10.1 | 23.2 | 799 chars |
+| | 3 | 11 | 18.0 | 39.0 | 1,163 chars |
+| | 4 | 8 | 16.2 | 35.5 | 848 chars |
+| | **All** | **29** | **14.8** | **32.6** | **951 chars** |
+
+</Accordion>
+
+Correct trajectories are shorter at every hop level for both models. Incorrect trajectories are roughly twice as long because the model keeps searching when it can't find evidence, then writes a longer answer to compensate. This anti-correlation between trajectory length and correctness is consistent across model scales, which means trajectory length alone could serve as a lightweight filter during rejection sampling.
+
+---
+
+## Closing Remarks
+
+Thanks to the [OpenResearcher](https://github.com/TIGER-AI-Lab/OpenResearcher) team for their work showing that synthetic research trajectories over local retrieval can train small models to compete with much larger ones. Their results suggest we're only beginning to understand how LLMs interact with search tools and how the structure of those interactions shapes what models learn. We're excited to see where the community takes synthetic data research using [NeMo Data Designer](https://github.com/NVIDIA-NeMo/DataDesigner) as both the models and the tooling continue to improve.
+
+---
+
+## Try For Yourself
+
+<Accordion title="Full source: openresearcher_demo.py">
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Models
+config = dd.DataDesignerConfigBuilder()
+config.add_model_config(
+    dd.ModelConfig(
+        alias="search_rollout_model",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=0.95,
+            max_tokens=16384,
+        ),
+    )
+)
+config.add_model_config(
+    dd.ModelConfig(
+        alias="judge",
+        model="nvidia/nemotron-3-nano-30b-a3b",
+        provider="nvidia",
+    )
+)
+
+# MCP retriever
+tool_config = dd.ToolConfig(
+    tool_alias="knowledge-base",
+    providers=["corpus-retriever"],
+    max_tool_call_turns=150,
+)
+config.add_tool_config(tool_config)
+
+# Seed questions with reference answers
+config.with_seed_dataset(
+    dd.LocalFileSeedSource(path="questions.jsonl"),
+)
+
+config.add_column(
+    dd.ExpressionColumnConfig(
+        name="research_question",
+        expr="{{ question }}",
+    )
+)
+
+# Research trajectory generation
+config.add_column(
+    dd.LLMTextColumnConfig(
+        name="research_answer",
+        prompt="Research and answer thoroughly:\n\n{{ research_question }}",
+        model_alias="search_rollout_model",
+        system_prompt=SYSTEM_PROMPT,
+        tool_alias="knowledge-base",
+        with_trace=dd.TraceType.ALL_MESSAGES,
+        extract_reasoning_content=True,
+    )
+)
+
+# Rejection sampling judge
+config.add_column(
+    dd.LLMJudgeColumnConfig(
+        name="correctness",
+        model_alias="judge",
+        prompt=(
+            "Question: {{ research_question }}\n"
+            "Reference answer: {{ answer }}\n"
+            "Generated answer: {{ research_answer }}\n"
+            "Does the generated answer correctly address the question?"
+        ),
+        scores=[
+            dd.Score(
+                name="correct",
+                description="Is the answer factually correct?",
+                options={
+                    1: "Correct",
+                    0: "Incorrect",
+                },
+            ),
+        ],
+    )
+)
+
+# Run
+mcp_provider = dd.LocalStdioMCPProvider(
+    name="corpus-retriever",
+    command="uv",
+    args=["run", "retriever_mcp.py", "serve"],
+    env={"CORPUS_PATH": "corpus.jsonl"},
+)
+data_designer = DataDesigner(mcp_providers=[mcp_provider])
+results = data_designer.create(
+    config_builder=config,
+    num_records=1000,
+    dataset_name="research-trajectories",
+)
+```
+
+</Accordion>
+
+<Accordion title="Full source: prepare_corpus.py">
+
+```python
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["datasets", "huggingface_hub", "pyarrow"]
+# ///
+
+"""Prepare a retrieval corpus and question set for the OpenResearcher demo.
+
+Builds corpus.jsonl and questions.jsonl from two sources:
+
+    1. MuSiQue — multi-hop QA dataset (2/3/4-hop) with golden passages
+    2. FineWeb — web documents as distractors (matches the OpenResearcher paper)
+
+Golden passages (documents containing evidence for the answer) are mixed with
+FineWeb distractors at roughly 1:100 ratio, so the model must search through
+noise to find the signal.
+
+Usage:
+    uv run prepare_corpus.py
+"""
+
+from __future__ import annotations
+
+import json
+import random
+from pathlib import Path
+from urllib.parse import urlparse
+
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+NUM_QUESTIONS = 192          # 64 per hop level (2, 3, 4)
+NUM_FINEWEB_DISTRACTORS = 50_000
+FINEWEB_SHARD = 0
+OUTPUT_DIR = "data"
+
+
+# ---------------------------------------------------------------------------
+# MuSiQue extraction
+# ---------------------------------------------------------------------------
+
+def prepare_musique(num_questions: int) -> tuple[list[dict], list[dict]]:
+    """Load MuSiQue and extract multi-hop questions with golden passages.
+
+    Samples uniformly across hop counts (2, 3, 4) so the dataset has balanced
+    difficulty. Golden passages (is_supporting=True) go into the corpus;
+    non-golden passages from the same examples serve as additional distractors.
+
+    Returns:
+        (questions, corpus_docs) where corpus_docs have is_golden=True/False.
+    """
+    from datasets import load_dataset
+
+    print("Loading MuSiQue (train split)...")
+    dataset = load_dataset("bdsaglam/musique", split="train")
+
+    # Bucket answerable examples by hop count
+    hop_buckets: dict[int, list[dict]] = {}
+    for example in dataset:
+        if not example.get("answerable", False):
+            continue
+        num_hops = len(example.get("question_decomposition", []))
+        if num_hops < 2:
+            continue
+        hop_buckets.setdefault(num_hops, []).append(example)
+
+    # Sample uniformly: equal questions per hop level
+    available_hops = sorted(hop_buckets.keys())
+    per_hop = num_questions // len(available_hops)
+    selected_examples = []
+    for h in available_hops:
+        bucket = hop_buckets[h]
+        n = min(per_hop, len(bucket))
+        selected_examples.extend(random.sample(bucket, n))
+
+    print(f"  Selected {len(selected_examples)} questions across hops {available_hops}")
+
+    # Build questions and corpus docs
+    questions: list[dict] = []
+    golden_titles: dict[str, str] = {}
+    nongolden_titles: dict[str, str] = {}
+
+    for example in selected_examples:
+        num_hops = len(example["question_decomposition"])
+        questions.append({
+            "id": f"mq_{len(questions):06d}",
+            "question": example["question"],
+            "answer": example["answer"],
+            "source": "musique",
+            "num_hops": num_hops,
+            "seed_id": 0,
+        })
+
+        for para in example.get("paragraphs", []):
+            title = para.get("title", "").strip()
+            content = para.get("paragraph_text", "").strip()
+            if not title or not content:
+                continue
+            if para.get("is_supporting", False):
+                if len(content) > len(golden_titles.get(title, "")):
+                    golden_titles[title] = content
+            else:
+                if len(content) > len(nongolden_titles.get(title, "")):
+                    nongolden_titles[title] = content
+
+    # Golden passages
+    corpus_docs = [
+        {"title": t, "content": c, "source": "musique", "is_golden": True}
+        for t, c in sorted(golden_titles.items())
+    ]
+    # Non-golden passages (skip titles already in golden set)
+    corpus_docs.extend(
+        {"title": t, "content": c, "source": "musique", "is_golden": False}
+        for t, c in sorted(nongolden_titles.items())
+        if t not in golden_titles
+    )
+
+    print(f"  Golden passages: {len(golden_titles)}")
+    print(f"  Non-golden passages: {len(corpus_docs) - len(golden_titles)}")
+    return questions, corpus_docs
+
+
+# ---------------------------------------------------------------------------
+# FineWeb distractor caching
+# ---------------------------------------------------------------------------
+
+def cache_fineweb(shard_index: int, max_docs: int) -> list[dict]:
+    """Download a FineWeb parquet shard and extract English documents.
+
+    Uses huggingface_hub for direct shard download (faster than load_dataset)
+    and pyarrow for memory-efficient row-group-at-a-time reading.
+
+    Returns:
+        List of distractor documents with title (domain) and content (text).
+    """
+    from huggingface_hub import hf_hub_download
+    import pyarrow.parquet as pq
+
+    filename = f"sample/10BT/{shard_index:03d}_00000.parquet"
+    print(f"Downloading FineWeb shard: {filename}")
+    parquet_path = hf_hub_download(
+        repo_id="HuggingFaceFW/fineweb",
+        repo_type="dataset",
+        filename=filename,
+    )
+
+    pf = pq.ParquetFile(parquet_path)
+    print(f"  {pf.metadata.num_rows:,} rows in shard")
+
+    docs: list[dict] = []
+    for rg_idx in range(pf.metadata.num_row_groups):
+        table = pf.read_row_group(rg_idx, columns=["text", "url", "language", "token_count"])
+        batch = table.to_pydict()
+
+        for text, url, lang, tok_count in zip(
+            batch["text"], batch["url"], batch["language"], batch["token_count"]
+        ):
+            if lang != "en" or tok_count < 50:
+                continue
+            text = text.strip()
+            if not text:
+                continue
+
+            # Use domain as title
+            try:
+                domain = urlparse(url).netloc.removeprefix("www.")
+            except Exception:
+                domain = "unknown"
+
+            docs.append({
+                "title": domain,
+                "content": text,
+                "source": "fineweb",
+                "is_golden": False,
+            })
+            if len(docs) >= max_docs:
+                break
+
+        if len(docs) >= max_docs:
+            break
+
+    print(f"  Extracted {len(docs):,} English documents (min 50 tokens)")
+    return docs
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    output_dir = Path(OUTPUT_DIR)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Extract MuSiQue questions and golden passages
+    questions, corpus_docs = prepare_musique(NUM_QUESTIONS)
+
+    # Download FineWeb distractors
+    fineweb_docs = cache_fineweb(FINEWEB_SHARD, NUM_FINEWEB_DISTRACTORS)
+    corpus_docs.extend(fineweb_docs)
+
+    # Deduplicate by title (keep longest content)
+    title_to_best: dict[str, dict] = {}
+    for doc in corpus_docs:
+        title = doc["title"]
+        if title not in title_to_best or len(doc["content"]) > len(title_to_best[title]["content"]):
+            title_to_best[title] = doc
+
+    corpus = list(title_to_best.values())
+    random.shuffle(corpus)
+
+    # Assign stable IDs
+    prefix_map = {"musique": "md", "fineweb": "fw"}
+    source_counters: dict[str, int] = {}
+    for doc in corpus:
+        prefix = prefix_map.get(doc["source"], "xx")
+        idx = source_counters.get(doc["source"], 0)
+        doc["id"] = f"{prefix}_{idx:06d}"
+        source_counters[doc["source"]] = idx + 1
+
+    # Write corpus.jsonl
+    corpus_path = output_dir / "corpus.jsonl"
+    with open(corpus_path, "w") as f:
+        for doc in corpus:
+            f.write(json.dumps(doc, ensure_ascii=False) + "\n")
+
+    # Write questions.jsonl
+    random.shuffle(questions)
+    questions_path = output_dir / "questions.jsonl"
+    with open(questions_path, "w") as f:
+        for q in questions:
+            f.write(json.dumps(q, ensure_ascii=False) + "\n")
+
+    # Summary
+    golden = sum(1 for d in corpus if d["is_golden"])
+    nongolden = len(corpus) - golden
+    print(f"\nCorpus: {len(corpus):,} docs ({golden} golden, {nongolden} distractors)")
+    print(f"Questions: {len(questions)}")
+    print(f"Output: {corpus_path.resolve()}")
+    print(f"         {questions_path.resolve()}")
+
+
+if __name__ == "__main__":
+    main()
+```
+
+</Accordion>
+
+<Accordion title="Full source: retriever_mcp.py">
+
+```python
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["mcp", "bm25s", "PyStemmer"]
+# ///
+
+"""MCP Server: BM25S Corpus Retriever for OpenResearcher-style Deep Research
+
+A single-file MCP server that indexes a JSONL corpus and exposes BM25S
+lexical search via three browser tools:
+
+    - search(query, top_k): ranked document discovery
+    - open(doc_id): full document inspection with cursor-numbered chunks
+    - find(doc_id, query): in-document evidence lookup
+
+Corpus format (JSONL, one document per line):
+    {"id": "wiki_123", "title": "Christopher Nolan", "content": "Christopher Edward Nolan is a..."}
+
+Server mode (used by Data Designer):
+    CORPUS_PATH=corpus.jsonl uv run retriever_mcp.py serve
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import sys
+
+import bm25s
+from mcp.server.fastmcp import FastMCP
+
+MCP_SERVER_NAME = "corpus-retriever"
+
+_bm25_retriever: bm25s.BM25 | None = None
+_corpus: list[dict[str, str]] = []
+_id_to_index: dict[str, int] = {}
+
+mcp_server = FastMCP(MCP_SERVER_NAME)
+
+
+def load_corpus(corpus_path: str) -> list[dict[str, str]]:
+    """Load a JSONL corpus file into a list of document dicts."""
+    docs: list[dict[str, str]] = []
+    with open(corpus_path, "r", encoding="utf-8") as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                doc = json.loads(line)
+            except json.JSONDecodeError as e:
+                print(f"Warning: skipping malformed JSON at line {line_num}: {e}", file=sys.stderr)
+                continue
+            if "id" not in doc or "content" not in doc:
+                print(f"Warning: skipping line {line_num}, missing 'id' or 'content'", file=sys.stderr)
+                continue
+            docs.append({
+                "id": str(doc["id"]),
+                "title": str(doc.get("title", "")),
+                "content": str(doc["content"]),
+            })
+    return docs
+
+
+def build_index(docs: list[dict[str, str]]) -> bm25s.BM25:
+    """Build a BM25S index over title + content for each document."""
+    corpus_texts = [f"{d['title']} {d['content']}" for d in docs]
+    corpus_tokens = bm25s.tokenize(corpus_texts, stopwords="en")
+    retriever = bm25s.BM25()
+    retriever.index(corpus_tokens)
+    return retriever
+
+
+def initialize(corpus_path: str) -> None:
+    """Load corpus and build index into global state."""
+    global _bm25_retriever, _corpus, _id_to_index
+    print(f"Loading corpus from {corpus_path}...", file=sys.stderr)
+    _corpus = load_corpus(corpus_path)
+    if not _corpus:
+        print("Warning: corpus is empty", file=sys.stderr)
+        return
+    _id_to_index = {doc["id"]: idx for idx, doc in enumerate(_corpus)}
+    print(f"Building BM25S index over {len(_corpus)} documents...", file=sys.stderr)
+    _bm25_retriever = build_index(_corpus)
+    print(f"Index ready. {len(_corpus)} documents indexed.", file=sys.stderr)
+
+
+def _chunk_content(content: str) -> list[str]:
+    """Split document content into cursor-addressable chunks."""
+    paragraph_chunks = [c.strip() for c in re.split(r"\n\s*\n+", content) if c.strip()]
+    if len(paragraph_chunks) > 1:
+        return paragraph_chunks
+    line_chunks = [line.strip() for line in content.splitlines() if line.strip()]
+    if line_chunks:
+        return line_chunks
+    stripped = content.strip()
+    return [stripped] if stripped else []
+
+
+@mcp_server.tool()
+def search(query: str, top_k: int = 10) -> dict:
+    """Search for candidate documents to explore.
+
+    Args:
+        query: Search query string.
+        top_k: Maximum number of ranked results (default: 10).
+    """
+    global _bm25_retriever, _corpus
+    if _bm25_retriever is None or not _corpus:
+        return {"error": "Search index not initialized", "results": []}
+    query_tokens = bm25s.tokenize([query], stopwords="en")
+    k = max(1, min(top_k, len(_corpus)))
+    results, scores = _bm25_retriever.retrieve(query_tokens, k=k)
+    search_results: list[dict] = []
+    for i in range(results.shape[1]):
+        doc_idx = results[0, i]
+        score = float(scores[0, i])
+        if score <= 0:
+            continue
+        doc = _corpus[doc_idx]
+        snippet = doc["content"][:500]
+        if len(doc["content"]) > 500:
+            snippet += "..."
+        search_results.append({
+            "id": doc["id"],
+            "title": doc["title"],
+            "snippet": snippet,
+            "score": round(score, 4),
+        })
+    return {"results": search_results, "query": query, "total": len(search_results)}
+
+
+@mcp_server.tool(name="open")
+def open_document(doc_id: str) -> dict:
+    """Open a document for detailed inspection with cursor-numbered chunks.
+
+    Args:
+        doc_id: The document ID (from search results).
+    """
+    global _corpus, _id_to_index
+    if not _corpus:
+        return {"error": "Corpus not loaded"}
+    idx = _id_to_index.get(doc_id)
+    if idx is None:
+        return {"error": f"Document not found: {doc_id}"}
+    doc = _corpus[idx]
+    chunks = _chunk_content(doc["content"])
+    numbered_chunks = [{"cursor": i + 1, "text": chunk} for i, chunk in enumerate(chunks)]
+    formatted = "\n".join(f"[{e['cursor']}] {e['text']}" for e in numbered_chunks)
+    return {
+        "id": doc["id"],
+        "title": doc["title"],
+        "content": formatted,
+        "chunks": numbered_chunks,
+        "total_chunks": len(numbered_chunks),
+    }
+
+
+@mcp_server.tool()
+def find(doc_id: str, query: str) -> dict:
+    """Find matching passages inside a document by keyword.
+
+    Args:
+        doc_id: Document ID to search within.
+        query: Text to find (case-insensitive substring and keyword matching).
+    """
+    global _corpus, _id_to_index
+    if not _corpus:
+        return {"error": "Corpus not loaded", "matches": []}
+    idx = _id_to_index.get(doc_id)
+    if idx is None:
+        return {"error": f"Document not found: {doc_id}", "matches": []}
+    query_text = query.strip().lower()
+    if not query_text:
+        return {"error": "Query must be non-empty", "matches": []}
+    doc = _corpus[idx]
+    chunks = _chunk_content(doc["content"])
+    query_terms = [term for term in re.findall(r"\w+", query_text) if term]
+    matches: list[dict] = []
+    for i, chunk in enumerate(chunks, start=1):
+        haystack = chunk.lower()
+        if query_text in haystack or (query_terms and all(t in haystack for t in query_terms)):
+            matches.append({"cursor": i, "text": chunk})
+    return {
+        "doc_id": doc["id"],
+        "title": doc["title"],
+        "query": query,
+        "matches": matches,
+        "total_matches": len(matches),
+    }
+
+
+def serve() -> None:
+    """Run as MCP server subprocess (called by Data Designer)."""
+    corpus_path = os.environ.get("CORPUS_PATH", "corpus.jsonl")
+    initialize(corpus_path)
+    mcp_server.run()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="BM25S corpus retriever MCP server")
+    subparsers = parser.add_subparsers(dest="command")
+    subparsers.add_parser("serve", help="Run the MCP server (reads CORPUS_PATH from env)")
+    stats_parser = subparsers.add_parser("stats", help="Print corpus statistics")
+    stats_parser.add_argument("--corpus-path", default="corpus.jsonl")
+    args = parser.parse_args()
+    if args.command == "serve":
+        serve()
+    elif args.command == "stats":
+        docs = load_corpus(args.corpus_path)
+        total_chars = sum(len(d["content"]) for d in docs)
+        print(f"Corpus: {args.corpus_path}")
+        print(f"Documents: {len(docs)}")
+        print(f"Total content: {total_chars:,} chars (~{total_chars // 4:,} tokens)")
+    else:
+        parser.print_help()
+```
+
+</Accordion>
+
+
+
+## See Also
+
+- [NeMo Data Designer on GitHub](https://github.com/NVIDIA-NeMo/DataDesigner)
+- [OpenResearcher on GitHub](https://github.com/TIGER-AI-Lab/OpenResearcher)
+- [OpenResearcher blog post](https://boiled-honeycup-4c7.notion.site/OpenResearcher-A-Fully-Open-Pipeline-for-Long-Horizon-Deep-Research-Trajectory-Synthesis-2f7e290627b5800cb3a0cd7e8d6ec0ea)
+- [HotpotQA: A Dataset for Diverse, Explainable Multi-hop Question Answering](https://arxiv.org/abs/1809.09600)
+- [MuSiQue: Multi-hop Questions via Single-hop Question Composition](https://arxiv.org/abs/2108.00573)
+- [BM25S: Fast lexical search in Python](https://github.com/xhluca/bm25s)
diff --git a/fern/v0.5.0/pages/devnotes/design-principles.mdx b/fern/v0.5.0/pages/devnotes/design-principles.mdx
new file mode 100644
index 000000000..c2f121176
--- /dev/null
+++ b/fern/v0.5.0/pages/devnotes/design-principles.mdx
@@ -0,0 +1,153 @@
+---
+title: "Designing Data Designer: Why SDG Is a Systems Problem"
+description: The design principles behind NeMo Data Designer and why we built it as a composable orchestration framework.
+---
+
+import { Authors } from "@/components/Authors";
+
+<Authors ids={["kthadaka"]} />
+
+Synthetic data generation is more than a single prompt to a large language model. In this post, we walk through the design principles behind NeMo Data Designer and explain why we built it as a composable orchestration framework - treating SDG as a system of specialized stages rather than a monolithic generation task.
+
+![Designing Data Designer: from monolithic prompts to composable pipelines](/assets/images/design-principles-hero.png)
+
+---
+
+When people first encounter synthetic data generation, the instinct is natural: write a detailed prompt, call a powerful LLM, and collect the output. For quick experiments and small datasets, this works fine. But as you push toward production-quality data at scale - millions of records, multiple interrelated fields, strict quality requirements - the cracks start to show. We built Data Designer around the conviction that SDG is fundamentally a *systems* problem, and the framework's architecture reflects that belief at every level.
+
+This post isn't about a specific dataset or benchmark result. Instead, we want to pull back the curtain on the design principles that shaped Data Designer itself, and share the reasoning behind the decisions we made.
+
+---
+
+## A Prompt Is Not All You Need
+
+The most common approach to synthetic data generation looks something like this: pack everything you need into one prompt - the schema, the constraints, the style guidelines, the quality criteria - and send it to the biggest model you have access to. Crank up `max_tokens`, parse the output, and repeat.
+
+This approach has a ceiling, and you hit it faster than you'd expect.
+
+**Quality degrades when you overload a single call.** Asking one model to simultaneously generate content, maintain diversity across a dataset, follow a complex schema, and self-assess quality is asking it to solve several distinct problems at once. The model has to allocate its "attention budget" across all of these competing objectives, and something always gives. Usually it's the subtler requirements - diversity narrows, edge cases get dropped, and the data starts looking suspiciously uniform.
+
+**Prompts become unmaintainable.** As requirements grow, the prompt balloons. What started as a clean paragraph becomes a multi-page document with nested instructions, conditional logic described in natural language, and examples that compete for context window space. At this point, iterating on one aspect of generation (say, adjusting the complexity distribution or tweaking the output schema) means editing a fragile mega-prompt and hoping nothing else breaks.
+
+**There are no quality gates.** In a single-call setup, validation happens *after* you've already spent the compute. If 30% of your records are malformed or low-quality, you find out at the end and either filter them out (wasting the tokens) or re-generate (wasting even more). There's no mechanism to catch problems between stages, because there *are* no stages.
+
+**Scaling is limited.** A single model call is a single point of failure. You can parallelize across records, but you can't parallelize across *stages* of generation, and you can't route different parts of the task to models that are better suited for them.
+
+None of these are problems with LLMs themselves - they're problems with treating SDG as a single-step task. The fix isn't a better model. It's a better architecture.
+
+---
+
+## SDG as a System of Specialized Stages
+
+The shift in thinking is straightforward: instead of asking one model to do everything, decompose the generation task into a pipeline of focused stages, each responsible for one well-defined job.
+
+Regardless of what you're generating - QA pairs for retrieval training, reasoning traces for pretraining, multi-turn conversations for alignment, product reviews for testing, or labeled examples for classification - a well-decomposed SDG pipeline typically has four kinds of stages:
+
+1. **Seed curation.** Control what goes in. Whether you're sampling from an existing corpus, selecting subsets of your data, or generating realistic persona profiles with demographic and personality attributes, the seed data defines the distribution your synthetic data will cover. This is where you control diversity and domain coverage - before any LLM is involved - so that downstream generation stages inherit that diversity naturally through their prompts.
+
+2. **Staged generation.** Each generation step has a focused job. One stage might extract structured metadata from a document. Another might generate content grounded in that metadata. A third might transform or enrich that content further. Because each stage has a narrow scope, its prompt is simple, its output is predictable, and it's easy to iterate on independently.
+
+3. **Dependency management.** Later stages build on earlier outputs. A content generation stage needs access to extracted metadata. A formatting stage needs the generated content. These dependencies form a directed acyclic graph (DAG), and the system needs to resolve that graph automatically - so you can focus on defining the stages, not orchestrating them.
+
+4. **Quality control.** Validation and scoring aren't afterthoughts - they're explicit stages in the pipeline. An LLM judge can evaluate the output of a generation stage and a validator can check structural constraints. Because these run as part of the generation pipeline, you can identify quality issues early and make informed decisions about which records to keep before investing in further downstream processing.
+
+This decomposition buys you something that a single prompt never can: the ability to reason about, test, and improve each stage independently.
+
+---
+
+## Design Principles Behind Data Designer
+
+With that framing in mind, here are the principles that guided Data Designer's architecture.
+
+### Declarative over imperative
+
+When you define a Data Designer workflow, you describe the structure of the dataset you want - not the execution plan for how to generate it. You declare columns, their types, their prompts or schemas, and the models they should use. The framework handles the rest: resolving dependencies, scheduling execution, managing parallelism, batching requests, and retrying failures.
+
+This is a deliberate choice. We wanted the configuration to read like a description of the desired *output*, not a script full of API calls and error handling. It makes workflows easier to read, easier to share, and easier to modify - you can swap a model, adjust a prompt, or add a validation stage without rewriting control flow.
+
+### Columns as composable units
+
+The core abstraction in Data Designer is the *column*. Each column represents a single field in your dataset, and each column has a well-defined generation strategy: it might be an LLM text generation call, a structured output with a Pydantic schema, an embedding computation, a sampler, a Jinja2 expression that combines other columns, or a quality evaluation from an LLM judge.
+
+Columns reference each other through Jinja2 templates. When one column's prompt includes `{{ document_artifacts }}`, the framework knows that column depends on the `document_artifacts` column and must run after it. These references are automatically extracted to build a dependency graph, and the framework topologically sorts the graph to determine execution order. You don't write orchestration code - you just write columns, and the DAG emerges from the references between them.
+
+This composability is what makes it possible to go from a simple two-column workflow to a complex multi-stage pipeline without changing the underlying execution model.
+
+### Multi-model by design
+
+Not every stage in a pipeline needs the same model. Extracting structured metadata from a document is a different task than generating creative long-form content, which is a different task than scoring quality, which is a different task than computing embeddings.
+
+Data Designer treats multi-model orchestration as a first-class concern. Each column can specify its own model alias, and the framework manages model routing, per-model parallelism limits, and usage tracking independently. In practice, this means you can use a large reasoning model for your hardest generation stage, a smaller and faster model for evaluation and scoring, and a dedicated embedding model for semantic representations - all within the same workflow, without writing any routing logic yourself.
+
+### Quality as a first-class stage
+
+In Data Designer, quality control isn't a post-processing step you bolt on after generation. Validators and LLM-as-judge evaluations are column types, just like generation columns. They participate in the same dependency graph, run in the same execution engine, and their outputs are available to downstream stages.
+
+This means you can define a pipeline where a judge evaluates generated records immediately after they're created, and a downstream expression column flags records below a quality threshold - all within a single workflow definition. Quality scores are part of the pipeline, not something you remember to compute afterwards.
+
+### Extensibility via plugins
+
+No framework can anticipate every use case. Data Designer's plugin system lets you define custom column generators that work alongside the built-in types. A plugin is a Python class that inherits from the base column generator, packages with a configuration schema, and registers itself through a standard entry point. Once installed, it's indistinguishable from a built-in column type - it participates in dependency resolution, batching, and parallel execution like everything else.
+
+This is how domain-specific functionality gets added without forking the framework. If your use case requires embedding-based deduplication with FAISS indices and cosine similarity thresholds, for instance, you can build it as a plugin and drop it into any pipeline that needs it.
+
+---
+
+## What This Looks Like in Practice
+
+These principles apply to any SDG use case. Whether you're generating reasoning traces for pretraining (as in our [RQA dev note](/docs/devnotes/rqa)), multi-turn conversations for alignment tuning, labeled examples for text classification, product reviews for testing a recommendation system, or code-repair pairs for training a coding assistant - the same decomposition applies. You identify the stages, define the columns, declare the dependencies, and let the framework handle execution.
+
+To make one example concrete, consider a pipeline for generating training data for a retrieval model. The goal is to produce high-quality question-answer pairs grounded in a corpus of documents, with quality scoring. We choose this example because it exercises several stages and model types in a single workflow, but the pattern generalizes to any domain.
+
+In a single-prompt approach, you'd try to pack all of this into one call: "Given this document, generate diverse QA pairs of varying complexity and only include high-quality ones." The model would do its best, but you'd have limited control over any individual aspect.
+
+With Data Designer, the same task decomposes into a pipeline of focused stages:
+
+```
+      Seed Documents         Seed dataset column ingests documents
+            │                 from local files or HuggingFace
+            ▼
+┌─────────────────────────┐
+│  Artifact Extraction    │  LLM extracts key concepts, entities,
+│                         │  relationships from each document
+└───────────┬─────────────┘
+            │
+            ▼
+┌─────────────────────────┐
+│  QA Generation          │  LLM generates questions & answers grounded
+│                         │  in the extracted artifacts
+└───────────┬─────────────┘
+            │
+            ▼
+┌─────────────────────────┐
+│  Quality Evaluation     │  LLM judge scores each QA pair
+│                         │  on relevance, accuracy, clarity
+└───────────┬─────────────┘
+            │
+            ▼
+      Final Dataset
+```
+
+Each box is a column. Each one can use a different model. Each one has a focused prompt or algorithm. And because they're declared as columns with explicit dependencies, the framework handles the execution order, the batching, and the parallelism.
+
+The critical insight - and the one that applies regardless of your use case - is that every stage is independently *configurable*, *testable*, and *replaceable*. Want to try a different model for quality evaluation? Swap the model alias on that column. Want to tighten quality thresholds? Adjust the judge column's scoring rubric. Want to add a new stage that generates hard negatives for contrastive learning? Add a column and declare its dependencies. The rest of the pipeline doesn't change.
+
+---
+
+## Summary
+
+Synthetic data generation at scale is a systems problem, not just a prompting problem. The design principles behind Data Designer reflect this:
+
+1. **Declarative over imperative** - describe the dataset you want, not the execution plan
+2. **Columns as composable units** - each stage is self-contained, with dependencies resolved automatically via a DAG
+3. **Multi-model by design** - match the model to the task, with per-column model routing
+4. **Quality as a first-class stage** - validators and judges are part of the pipeline, not afterthoughts
+5. **Extensibility via plugins** - add domain-specific logic without forking the framework
+
+The result is a general-purpose framework where complex, multi-stage generation workflows - whether you're building retrieval training data, reasoning datasets, conversational corpora, or something we haven't imagined yet - are expressed as simple column declarations. The hard problems of orchestration, dependency resolution, batching, and error handling are solved once, in the framework, rather than reimplemented in every project.
+
+## See Also
+
+- [NeMo Data Designer on GitHub](https://github.com/NVIDIA-NeMo/DataDesigner)
+- [RQA Dev Note: Graduate-Level Science Reasoning Data](/docs/devnotes/rqa)
+- [Deep Research Trajectories](/docs/devnotes/deep-research-trajectories)
diff --git a/fern/v0.5.0/pages/devnotes/index.mdx b/fern/v0.5.0/pages/devnotes/index.mdx
new file mode 100644
index 000000000..cb88a190e
--- /dev/null
+++ b/fern/v0.5.0/pages/devnotes/index.mdx
@@ -0,0 +1,30 @@
+---
+title: Dev Notes
+description: In-depth guides, tutorials, and insights about synthetic data generation.
+---
+
+Welcome to NeMo Data Designer Dev Notes! Here you'll find in-depth technical articles, case studies, and insights about synthetic data generation.
+
+<Cards>
+  <Card
+    title="Designing Data Designer"
+    icon="drafting-compass"
+    href="/docs/devnotes/design-principles"
+  >
+    Why SDG is a systems problem and the design principles behind Data Designer's architecture.
+  </Card>
+  <Card
+    title="Graduate-Level Science Reasoning Data"
+    icon="flask"
+    href="/docs/devnotes/rqa"
+  >
+    How we created the RQA dataset to push the boundaries of model reasoning capabilities.
+  </Card>
+  <Card
+    title="Deep Research Trajectories"
+    icon="magnifying-glass"
+    href="/docs/devnotes/deep-research-trajectories"
+  >
+    Using MCP tool use to generate multi-turn research trajectories for training deep research agents.
+  </Card>
+</Cards>
diff --git a/fern/v0.5.0/pages/devnotes/rqa.mdx b/fern/v0.5.0/pages/devnotes/rqa.mdx
new file mode 100644
index 000000000..4e65091f2
--- /dev/null
+++ b/fern/v0.5.0/pages/devnotes/rqa.mdx
@@ -0,0 +1,250 @@
+---
+title: "Graduate-Level Science Reasoning Data with NeMo Data Designer"
+description: How we created the RQA dataset using Data Designer to push the boundaries of model performance.
+---
+
+import { Authors } from "@/components/Authors";
+import { MetricsTable } from "@/components/MetricsTable";
+
+<Authors ids={["dcorneil", "etramel"]} />
+
+Using NeMo Data Designer, we created the RQA (Reasoning Question-Answer) dataset: a massive collection of graduate-level, reasoning-heavy science samples designed to push the boundaries of model performance.
+
+![RQA Blog](/assets/images/rqa-blog.png)
+
+---
+
+Inference-time reasoning has transformed LLM capabilities, boosting performance in difficult domains like math and science. While reasoning is introduced in the post-training phase using Reinforcement Learning (RL), it builds on patterns that the model has seen throughout pretraining. In fact, research from NVIDIA has shown that [front-loading examples of reasoning into the pretraining phase](https://research.nvidia.com/labs/adlr/Synergy/) can have a positive, compounding impact on the quality of the final model. When training Nemotron 3 Nano, our goal was to introduce rich and diverse examples of reasoning directly into pretraining, laying the groundwork for reasoning RL in post-training.
+
+Using NeMo Data Designer, we created the RQA (Reasoning Question-Answer) dataset: a massive collection of graduate-level, reasoning-heavy science samples designed to push the boundaries of model performance. Each sample contains a question, a trace from a reasoning LLM attempting to answer that question, and the final resulting answer. As we'll show in the results, introducing RQA into pretraining **didn't just result in stronger scientific reasoning - it improved math and coding performance as well**.
+
+This blog post walks you through how we built it, and how you can adapt our approach for your own reasoning-intensive datasets.
+
+---
+
+## Step 1: Curating High-Quality Science Seeds from Essential-Web
+
+For our reasoning dataset, we knew that both quality and diversity were critical. We wanted to show the model examples of reasoning through difficult scientific problems, and we wanted to make sure that those problems covered as wide a range of scientific domains as possible. Using seed passages from web text was an obvious choice, because it allowed us to use the seed data to control both quality and diversity.
+
+We started with [Essential-Web](https://arxiv.org/abs/2506.14111), a Common Crawl (web text) dataset where each document has been labelled with respect to both quality and subject. For instance, documents are labelled with an estimated *Education Level*, where *Graduate Level* indicates that the text "requires graduate-level education or domain expertise. Assumes deep background knowledge and specialized training to comprehend". These labels let us rapidly filter down the documents to the highest-quality seeds for our scientific reasoning dataset.
+
+Starting from the [STEM subset of Essential-Web](https://huggingface.co/datasets/EssentialAI/eai-taxonomy-stem-w-dclm), we filtered to documents that were:
+
+1. Undergraduate-to-graduate education level
+2. Advanced reasoning depth
+3. High technical correctness
+4. Advanced [Bloom taxonomy levels](https://en.wikipedia.org/wiki/Bloom's_taxonomy) for both cognitive processes (Analyze, Evaluate or Create) and knowledge domains (Conceptual, Procedural or Metacognitive)
+5. In the English language and over 1000 characters.
+
+The resulting subset consisted of roughly 14 million documents, mostly academic. Since many of the documents were very long, we extracted random chunks of fewer than 4096 characters in length.
+
+Essential-AI also labelled the documents according to the [Free Decimal Correspondence (FDC) code](https://everybodyslibraries.com/about-the-free-decimal-correspondence/#:~:text=What%20is%20the%20Free%20Decimal,group%20of%20subjects%20and%20disciplines.), a public-domain analogue of the Dewey Decimal system. Using the FDC code, we could see that the topics weren't equally balanced across scientific domains; for instance, Medicine & Health was heavily over-represented. Since we planned to generate fewer than 14 million samples in total, we aimed to capture as broad a range of topics as possible in the subset of seeds we used.
+
+To arrive at a smaller set of seed documents balanced by topic, we used a hierarchical round-robin approach. First, we rotated between selecting seed documents across 8 major domains (Biology, Chemistry, Computer Science, Engineering, Math, Medicine/Health, Physics, and Other). Within each high-level domain, we further rotated between seed documents based on their 3-digit FDC codes; for instance, given a Physics sample with code 535 (*Light*), the next Physics sample might be from code 536 (*Heat*), then 537 (*Electricity*) and so on, ensuring that no single subdomain dominates. We continued the round robin selection at the first and second decimal place of the FDC code, where they existed.
+
+We tested approaches using both the first 4.5 million and the first 9 million seeds according to the round-robin approach described above.
+
+---
+
+## Step 2: Generating Challenging Questions
+
+With our seed documents ready, we moved to NeMo Data Designer to design the actual dataset. While the seed documents ground our dataset in the types of advanced scientific topics we're interested in, they don't typically show the *active process* of thinking through a difficult scientific problem; instead, scientific papers usually show the polished end result of advanced reasoning. This is where LLMs come in.
+
+We first needed examples of the type of tough questions that Nemotron might be asked by a user in the real world. To do this, we used Data Designer to prompt a reasoning-enabled LLM to generate a graduate-level question *inspired by* each seed passage:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+QUESTION_PROMPT = """
+Task: Generate a highly challenging, graduate-level reasoning question
+inspired by the following passage.
+
+Follow these instructions:
+1. The text serves only as inspiration for the question. You *must not*
+   reference the text directly in any way.
+2. The question should be appropriate for an advanced graduate-level exam
+   in a course specialized in this topic.
+3. Ensure that the question requires higher-order reasoning beyond simple
+   recall, such as mathematical reasoning, quantitative analysis, or synthesis.
+4. Tag the question with "Question:".
+
+Text:
+{{ seed_passage }}
+
+Question: [question]
+"""
+
+# Configure the workflow with a reasoning-enabled model
+config = dd.DataDesignerConfigBuilder(model_configs=[
+    dd.ModelConfig(
+        alias="reasoning-model",
+        model="qwen/qwen3-235b-a22b",
+        provider="nvidia",
+    ),
+])
+
+config.with_seed_dataset(
+    dd.LocalFileSeedSource(path="path/to/seed_data.parquet"),
+    sampling_strategy=dd.SamplingStrategy.SHUFFLE,
+)
+
+config.add_column(
+    dd.LLMTextColumnConfig(
+        name="question",
+        prompt=QUESTION_PROMPT,
+        model_alias="reasoning-model",
+    )
+)
+```
+
+<Note>
+Our prompt emphasizes that the question shouldn't reference the source passage. We want questions that stand on their own, without including the source passage itself; since these are passages from Common Crawl, we can expect that they appear in the pretraining data already, and our focus here is on generating new tokens.
+</Note>
+
+---
+
+## Step 3: Generating High-Quality Answers with Reasoning Traces
+
+If you've ever tried to read a teacher's answer key before, you know that sometimes the person who *wrote* the question isn't always the best at explaining how to *answer* it. In the real world, reasoning involves a lot of what-ifs, dead ends and backtracking - the types of behavior we can only get from a model when it has never seen the question before. This is why we chose to decouple answer generation from question generation, ensuring that the model doesn't have any context about how the question was generated or the source passage itself when it attempts to answer it.
+
+Below, we prompt the LLM directly with the questions we generated above, then capture the resulting reasoning trace and final answer for our RQA samples.
+
+```python
+config.add_column(
+    dd.LLMTextColumnConfig(
+        name="answer",
+        prompt="{{ question }}",  # Present just the question
+        model_alias="reasoning-model",
+        extract_reasoning_content=True,  # Extract reasoning into separate column
+    )
+)
+
+# Combine question, reasoning trace, and answer into final sample
+config.add_column(
+    dd.ExpressionColumnConfig(
+        name="rqa_sample",
+        expr="{{ question }}\n\n{{ answer__reasoning_content }}\n\n{{ answer }}",
+    )
+)
+```
+
+In the resulting dataset, we see the following new columns concatenated to the seed data columns:
+
+- `question`
+- `answer`
+- `answer__reasoning_content`
+- `rqa_sample`
+
+The `question` and `answer` columns are the final result of the calls to our reasoning LLM, while `answer__reasoning_content` is the reasoning trace generated by the LLM when generating the answer. Typically we discard the reasoning trace, but here it's critical: we want to include the model's chain-of-thought in the final training data, distilling strong priors in Nemotron Nano 3 about *how* to work through a challenging problem. The final column, `rqa_sample`, uses Jinja2 syntax to combine all three fields into the final sample for training.
+
+We chose to use the same frontier reasoning model to answer the questions as we used to generate them - leveraging the model's advanced capabilities both for formulating a tough, well-formed question and for puzzling through the answer. But with Data Designer, this choice is up to you - you can mix-and-match models any way you like.
+
+---
+
+## Results: Measurable Improvements in STEM Reasoning
+
+To evaluate the impact of the RQA data, we ran continued pretraining experiments on an internal checkpoint of [Nemotron-H 8B](https://research.nvidia.com/labs/adlr/nemotronh/). Nemotron-H used a two-phase pretraining approach (you can read more about it in our white paper [here](https://arxiv.org/pdf/2504.03624)). We intervened at the Phase 2 training stage, comparing the result of replacing either 4% or 8% of the existing data blend with RQA samples (taking weight from high-quality Common Crawl data). We ran the intervention for 18k steps, between a checkpoint at 140k steps and a checkpoint at 158k steps.
+
+<MetricsTable
+  headers={["Data Blend", "Validation Loss (↓)", "MMLU-Pro (with CoT, ↑)", "Math 500 (with CoT, ↑)", "GSM8K (with CoT, ↑)", "Humaneval+ (↑)", "MBPP+ (↑)"]}
+  rows={[
+    ["Baseline data blend (140k steps)", "1.309", "36.99", "-", "79.98", "38.14", "48.68"],
+    ["Baseline data blend (158k steps)", "1.258", "43.39", "71.00", "81.96", "42.71", "53.31"],
+    ["with RQA (4.5m @4%, 158k steps)", "1.256", "44.31", "73.40", "82.79", "47.20", "54.84"],
+    ["with RQA (9m @8%, 158k steps)", "1.255", "45.80", "73.40", "84.76", "45.61", "53.80"],
+  ]}
+  lowerIsBetter={[1]}
+  higherIsBetter={[2, 3, 4, 5, 6]}
+/>
+
+One of the most surprising (and exciting!) results was that RQA didn't just improve performance on tests of scientific reasoning like MMLU-Pro - it also improved performance on benchmarks associated with math reasoning (Math 500, GSM8K) and coding capabilities (Humaneval+, MBPP+). This shows how early introduction of advanced reasoning capabilities can produce robust improvements across different domains.
+
+You can check out the RQA dataset we generated for Nemotron 3 Nano [here](https://huggingface.co/datasets/nvidia/Nemotron-Pretraining-Specialized-v1/viewer/Nemotron-Pretraining-RQA).
+
+---
+
+## Get Started with Data Designer
+
+Apart from the seed data, the entire pipeline is reproducible using NeMo Data Designer. Note how Data Designer handles complex data formatting with ease, leveraging Jinja2 templates in prompt generation and built-in logic to extract reasoning traces from model responses.
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Configure your model
+model_configs = [
+    dd.ModelConfig(
+        alias="reasoning-model",
+        model="qwen/qwen3-235b-a22b",
+        provider="nvidia",
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            max_tokens=8192,
+            timeout=300,  # 5 minute timeout for long reasoning chains
+        ),
+    ),
+]
+
+# Build the workflow
+config = dd.DataDesignerConfigBuilder(model_configs=model_configs)
+config.with_seed_dataset(
+    dd.LocalFileSeedSource(path="path/to/your_seed_data.parquet"),
+    sampling_strategy=dd.SamplingStrategy.SHUFFLE,
+)
+
+# Generate questions
+config.add_column(
+    dd.LLMTextColumnConfig(
+        name="question",
+        prompt=QUESTION_PROMPT,
+        model_alias="reasoning-model",
+    )
+)
+
+# Generate answers with reasoning trace
+config.add_column(
+    dd.LLMTextColumnConfig(
+        name="answer",
+        prompt="{{ question }}",
+        model_alias="reasoning-model",
+        extract_reasoning_content=True,  # Extract reasoning into separate column
+    )
+)
+
+# Combine into final sample
+config.add_column(
+    dd.ExpressionColumnConfig(
+        name="rqa_sample",
+        expr="{{ question }}\n\n{{ answer__reasoning_content }}\n\n{{ answer }}",
+    )
+)
+
+# Run generation and save to disk
+data_designer = DataDesigner()
+result = data_designer.create(
+    config_builder=config,
+    num_records=N_RECORDS,
+    dataset_name="rqa_dataset",
+)
+```
+
+---
+
+## Summary
+
+The RQA dataset demonstrates that targeted synthetic data generation can meaningfully improve advanced reasoning capabilities. By:
+
+1. Curating high-quality scientific seed data
+2. Generating challenging, standalone questions from those seeds
+3. Using powerful reasoning models to reason through how to answer those questions
+
+...we created a dataset that pushes models toward graduate-level science reasoning - and generalizable improvements on math and code as well.
+
+The workflow is fully configurable and extensible: swap in your own seed data, adjust the prompts, or add custom validators. Data Designer makes it possible to iterate rapidly on synthetic data pipelines, turning what used to be months of manual annotation into hours of programmable generation.
+
+## See Also
+
+- [NeMo Data Designer on GitHub](https://github.com/NVIDIA-NeMo/DataDesigner)
+- [Nemotron 3 Nano Technical Report](https://arxiv.org/pdf/2512.20848)
+- [Essential-Web](https://arxiv.org/abs/2506.14111)
+- [Design Principles](/docs/devnotes/design-principles)
diff --git a/fern/v0.5.0/pages/index.mdx b/fern/v0.5.0/pages/index.mdx
new file mode 100644
index 000000000..5597a1b3f
--- /dev/null
+++ b/fern/v0.5.0/pages/index.mdx
@@ -0,0 +1,112 @@
+---
+title: 🎨 NeMo Data Designer Library
+description: A general framework for generating high-quality synthetic data from scratch or using seed data.
+---
+
+[![GitHub](https://img.shields.io/badge/github-repo-952fc6?logo=github)](https://github.com/NVIDIA-NeMo/DataDesigner) [![License](https://img.shields.io/badge/License-Apache_2.0-0074df.svg)](https://opensource.org/licenses/Apache-2.0) [![NeMo Microservices](https://img.shields.io/badge/NeMo-Microservices-76b900)](https://docs.nvidia.com/nemo/microservices/latest/index.html)
+
+👋 Welcome! Data Designer is an orchestration framework for generating high-quality synthetic data. You provide LLM endpoints (NVIDIA, OpenAI, vLLM, etc.), and Data Designer handles batching, parallelism, validation, and more.
+
+**Configure** columns and models → **Preview** samples and iterate → **Create** your full dataset at scale.
+
+Unlike raw LLM calls, Data Designer gives you statistical diversity, field correlations, automated validation, and reproducible workflows. For details, see [Architecture & Performance](/docs/concepts/architecture-and-performance).
+
+## Install
+
+```bash
+pip install data-designer
+```
+
+## Setup
+
+Get an API key from one of the default providers and set it as an environment variable:
+
+```bash
+# NVIDIA (build.nvidia.com) - recommended
+export NVIDIA_API_KEY="your-api-key-here"
+
+# OpenAI (platform.openai.com)
+export OPENAI_API_KEY="your-openai-api-key-here"
+
+# OpenRouter (openrouter.ai)
+export OPENROUTER_API_KEY="your-openrouter-api-key-here"
+```
+
+Verify your configuration is ready:
+
+```bash
+data-designer config list
+```
+
+This displays the pre-configured model providers and models. See [CLI Configuration](/docs/concepts/models/configure-with-cli) to customize.
+
+## Your First Dataset
+
+Let's generate multilingual greetings to see Data Designer in action:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Initialize with default model providers
+data_designer = DataDesigner()
+config_builder = dd.DataDesignerConfigBuilder()
+
+# Add a sampler column to randomly select a language
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="language",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=["English", "Spanish", "French", "German", "Italian"],
+        ),
+    )
+)
+
+# Add an LLM text generation column
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="greeting",
+        model_alias="nvidia-text",
+        prompt="Write a casual and formal greeting in {{ language }}.",
+    )
+)
+
+# Generate a preview
+results = data_designer.preview(config_builder)
+results.display_sample_record()
+```
+
+🎉 That's it! You've just designed your first synthetic dataset.
+
+## Next Steps
+
+<Cards>
+  <Card
+    title="Tutorials"
+    icon="book-open"
+    href="/docs/tutorials/overview"
+  >
+    Step-by-step notebooks covering core features
+  </Card>
+  <Card
+    title="Recipes"
+    icon="chef-hat"
+    href="/docs/recipes"
+  >
+    Ready-to-use examples for common use cases
+  </Card>
+  <Card
+    title="Concepts"
+    icon="settings"
+    href="/docs/concepts/columns"
+  >
+    Deep dive into columns, models, and configuration
+  </Card>
+</Cards>
+
+## Learn More
+
+- **[Deployment Options](/docs/concepts/deployment-options)** – Library vs. NeMo Microservice
+- **[Model Configuration](/docs/concepts/models/default-model-settings)** – Configure LLM providers and models
+- **[Architecture & Performance](/docs/concepts/architecture-and-performance)** – Optimize for throughput and scale
diff --git a/fern/v0.5.0/pages/installation.mdx b/fern/v0.5.0/pages/installation.mdx
new file mode 100644
index 000000000..ff7bbd6ec
--- /dev/null
+++ b/fern/v0.5.0/pages/installation.mdx
@@ -0,0 +1,36 @@
+---
+title: Installation
+description: How to install Data Designer
+---
+
+Installing Data Designer is as simple as:
+
+<Tabs>
+  <Tab title="pip">
+    ```bash
+    pip install data-designer
+    ```
+  </Tab>
+  <Tab title="uv">
+    ```bash
+    uv add data-designer
+    ```
+  </Tab>
+</Tabs>
+
+## Development Installation
+
+To install the latest development version from the GitHub repository:
+
+<Tabs>
+  <Tab title="pip">
+    ```bash
+    pip install 'git+https://github.com/NVIDIA-NeMo/DataDesigner@main'
+    ```
+  </Tab>
+  <Tab title="uv">
+    ```bash
+    uv add 'git+https://github.com/NVIDIA-NeMo/DataDesigner@main'
+    ```
+  </Tab>
+</Tabs>
diff --git a/fern/v0.5.0/pages/plugins/available.mdx b/fern/v0.5.0/pages/plugins/available.mdx
new file mode 100644
index 000000000..594e53acb
--- /dev/null
+++ b/fern/v0.5.0/pages/plugins/available.mdx
@@ -0,0 +1,6 @@
+---
+title: "🚧 Available Plugins: Coming Soon"
+description: List of available Data Designer plugins.
+---
+
+This page will list available Data Designer plugins. Stay tuned!
diff --git a/fern/v0.5.0/pages/plugins/example.mdx b/fern/v0.5.0/pages/plugins/example.mdx
new file mode 100644
index 000000000..95033ec5a
--- /dev/null
+++ b/fern/v0.5.0/pages/plugins/example.mdx
@@ -0,0 +1,283 @@
+---
+title: "Example Plugin: Column Generator"
+description: A complete walkthrough for creating a Data Designer column generator plugin.
+---
+
+<Warning title="Experimental Feature">
+The plugin system is currently **experimental** and under active development. The documentation, examples, and plugin interface are subject to significant changes in future releases. If you encounter any issues, have questions, or have ideas for improvement, please consider starting [a discussion on GitHub](https://github.com/NVIDIA-NeMo/DataDesigner/discussions).
+</Warning>
+
+Data Designer supports two plugin types: **column generators** and **seed readers**. This page walks through a complete column generator example.
+
+A Data Designer plugin is implemented as a Python package with three main components:
+
+1. **Configuration Class**: Defines the parameters users can configure
+2. **Implementation Class**: Contains the core logic of the plugin
+3. **Plugin Object**: Connects the config and implementation classes to make the plugin discoverable
+
+We recommend separating these into individual files (`config.py`, `impl.py`, `plugin.py`) within a plugin subdirectory. This keeps the code organized, makes it easy to test each component independently, and guards against circular dependencies — since the config module can be imported without pulling in the engine-level implementation classes, and the plugin object can be discovered without importing either.
+
+---
+
+## Column Generator Plugin: Index Multiplier
+
+In this section, we will build a simple column generator plugin that generates values by multiplying the row index by a user-specified multiplier.
+
+### Step 1: Create a Python package
+
+We recommend the following structure for column generator plugins:
+
+```
+data-designer-index-multiplier/
+├── pyproject.toml
+└── src/
+    └── data_designer_index_multiplier/
+        ├── __init__.py
+        ├── config.py
+        ├── impl.py
+        └── plugin.py
+```
+
+### Step 2: Create the config class
+
+The configuration class defines what parameters users can set when using your plugin. For column generator plugins, it must inherit from [SingleColumnConfig](/api/column-configs) and include a [discriminator field](https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions).
+
+Create `src/data_designer_index_multiplier/config.py`:
+
+```python
+from typing import Literal
+
+from data_designer.config.base import SingleColumnConfig
+
+
+class IndexMultiplierColumnConfig(SingleColumnConfig):
+    """Configuration for the index multiplier column generator."""
+
+    # Required: discriminator field with a unique Literal type
+    # This value identifies your plugin and becomes its column_type
+    column_type: Literal["index-multiplier"] = "index-multiplier"
+
+    # Configurable parameter for this plugin
+    multiplier: int = 2
+
+    @staticmethod
+    def get_column_emoji() -> str:
+        return "✖️"
+
+    @property
+    def required_columns(self) -> list[str]:
+        """Columns that must exist before this generator runs."""
+        return []
+
+    @property
+    def side_effect_columns(self) -> list[str]:
+        """Additional columns produced beyond the primary column."""
+        return []
+```
+
+**Key points:**
+
+- The `column_type` field must be a `Literal` type with a string default
+- This value uniquely identifies your plugin (use kebab-case)
+- Add any custom parameters your plugin needs (here: `multiplier`)
+- `SingleColumnConfig` is a Pydantic model, so you can leverage all of Pydantic's validation features
+- `get_column_emoji()` returns the emoji displayed in logs for this column type
+- `required_columns` lists any columns this generator depends on (empty if none)
+- `side_effect_columns` lists any additional columns this generator produces beyond the primary column (empty if none)
+
+### Step 3: Create the implementation class
+
+The implementation class defines the actual business logic of the plugin. For column generator plugins, inherit from `ColumnGeneratorFullColumn` or `ColumnGeneratorCellByCell` and implement the `generate` method.
+
+Create `src/data_designer_index_multiplier/impl.py`:
+
+```python
+import logging
+
+import pandas as pd
+from data_designer.engine.column_generators.generators.base import ColumnGeneratorFullColumn
+
+from data_designer_index_multiplier.config import IndexMultiplierColumnConfig
+
+logger = logging.getLogger(__name__)
+
+
+class IndexMultiplierColumnGenerator(ColumnGeneratorFullColumn[IndexMultiplierColumnConfig]):
+
+    def generate(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Generate the column data.
+
+        Args:
+            data: The current DataFrame being built
+
+        Returns:
+            The DataFrame with the new column added
+        """
+        logger.info(
+            f"Generating column {self.config.name} "
+            f"with multiplier {self.config.multiplier}"
+        )
+
+        data[self.config.name] = data.index * self.config.multiplier
+
+        return data
+```
+
+**Key points:**
+
+- Generic type `ColumnGeneratorFullColumn[IndexMultiplierColumnConfig]` connects the implementation to its config
+- You have access to the configuration parameters via `self.config`
+
+<Info title="Understanding generation_strategy">
+The `generation_strategy` specifies how the column generator will generate data. You choose a strategy by inheriting from the corresponding base class:
+
+- **`ColumnGeneratorFullColumn`**: Generates the full column (at the batch level) in a single call to `generate`
+    - `generate` must take as input a `pd.DataFrame` with all previous columns and return a `pd.DataFrame` with the generated column appended.
+
+- **`ColumnGeneratorCellByCell`**: Generates one cell at a time
+    - `generate` must take as input a `dict` with key/value pairs for all previous columns and return a `dict` with an additional key/value for the generated cell
+    - Supports concurrent workers via a `max_parallel_requests` parameter on the configuration
+</Info>
+
+### Step 4: Create the plugin object
+
+Create a `Plugin` object that makes the plugin discoverable and connects the implementation and config classes.
+
+Create `src/data_designer_index_multiplier/plugin.py`:
+
+```python
+from data_designer.plugins import Plugin, PluginType
+
+plugin = Plugin(
+    config_qualified_name="data_designer_index_multiplier.config.IndexMultiplierColumnConfig",
+    impl_qualified_name="data_designer_index_multiplier.impl.IndexMultiplierColumnGenerator",
+    plugin_type=PluginType.COLUMN_GENERATOR,
+)
+```
+
+### Step 5: Package your plugin
+
+Create a `pyproject.toml` file to define your package and register the entry point:
+
+```toml
+[project]
+name = "data-designer-index-multiplier"
+version = "1.0.0"
+description = "Data Designer index multiplier plugin"
+requires-python = ">=3.10"
+dependencies = [
+    "data-designer",
+]
+
+# Register this plugin via entry points
+[project.entry-points."data_designer.plugins"]
+index-multiplier = "data_designer_index_multiplier.plugin:plugin"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/data_designer_index_multiplier"]
+```
+
+<Info title="Entry Point Registration">
+Plugins are discovered automatically using [Python entry points](https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#using-package-metadata). It is important to register your plugin as an entry point under the `data_designer.plugins` group.
+
+The entry point format is:
+```toml
+[project.entry-points."data_designer.plugins"]
+<entry-point-name> = "<module.path>:<plugin-instance-name>"
+```
+</Info>
+
+### Step 6: Install and use your plugin locally
+
+Install your plugin in editable mode — this is all you need to start using it. No PyPI publishing required:
+
+```bash
+# From the plugin directory
+uv pip install -e .
+```
+
+That's it. The editable install registers the entry point so Data Designer discovers your plugin automatically. Any changes you make to the plugin source code are picked up immediately without reinstalling.
+
+Once installed, your plugin works just like built-in column types:
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+from data_designer_index_multiplier.config import IndexMultiplierColumnConfig
+
+data_designer = DataDesigner()
+builder = dd.DataDesignerConfigBuilder()
+
+# Add a regular column
+builder.add_column(
+    dd.SamplerColumnConfig(
+        name="category",
+        sampler_type="category",
+        params=dd.CategorySamplerParams(values=["A", "B", "C"]),
+    )
+)
+
+# Add your custom plugin column
+builder.add_column(
+    IndexMultiplierColumnConfig(
+        name="scaled_index",
+        multiplier=5,
+    )
+)
+
+# Generate data
+results = data_designer.create(builder, num_records=10)
+print(results.load_dataset())
+```
+
+Output:
+```
+  category  scaled_index
+0        B             0
+1        A             5
+2        C            10
+3        A            15
+4        B            20
+...
+```
+
+---
+
+## Validating Your Plugin
+
+Data Designer provides a testing utility to validate that your plugin is structured correctly. Use `assert_valid_plugin` to check that your config and implementation classes are properly defined:
+
+```python
+from data_designer.engine.testing.utils import assert_valid_plugin
+from data_designer_index_multiplier.plugin import plugin
+
+# Raises AssertionError with a descriptive message if anything is wrong with the general plugin structure
+assert_valid_plugin(plugin)
+```
+
+This validates that:
+
+- The config class is a subclass of `ConfigBase`
+- For column generator plugins: the implementation class is a subclass of `ConfigurableTask`
+- For seed reader plugins: the implementation class is a subclass of `SeedReader`
+
+---
+
+## Multiple Plugins in One Package
+
+A single Python package can register multiple plugins. Simply define multiple `Plugin` instances and register each one as a separate entry point:
+
+```toml
+[project.entry-points."data_designer.plugins"]
+my-column-generator = "my_package.plugins.column_generator.plugin:column_generator_plugin"
+my-seed-reader = "my_package.plugins.seed_reader.plugin:seed_reader_plugin"
+```
+
+For an example of this pattern, see the end-to-end test plugins in the [tests_e2e/](https://github.com/NVIDIA-NeMo/DataDesigner/tree/main/tests_e2e) directory.
+
+That's it! You now know how to create a Data Designer plugin. A local editable install (`uv pip install -e .`) is all you need to develop, test, and use your plugin. If you want to make it available for others to install via `pip install`, publish it to PyPI or your organization's package index.
diff --git a/fern/v0.5.0/pages/plugins/overview.mdx b/fern/v0.5.0/pages/plugins/overview.mdx
new file mode 100644
index 000000000..84c24c881
--- /dev/null
+++ b/fern/v0.5.0/pages/plugins/overview.mdx
@@ -0,0 +1,68 @@
+---
+title: Data Designer Plugins
+description: Extend Data Designer's capabilities with custom plugins.
+---
+
+<Warning title="Experimental Feature">
+The plugin system is currently **experimental** and under active development. The documentation, examples, and plugin interface are subject to significant changes in future releases. If you encounter any issues, have questions, or have ideas for improvement, please consider starting [a discussion on GitHub](https://github.com/NVIDIA-NeMo/DataDesigner/discussions).
+</Warning>
+
+## What are plugins?
+
+Plugins are Python packages that extend Data Designer's capabilities without modifying the core library. Similar to [VS Code extensions](https://marketplace.visualstudio.com/vscode) and [Pytest plugins](https://docs.pytest.org/en/stable/reference/plugin_list.html), the plugin system empowers you to build specialized extensions for your specific use cases and share them with the community.
+
+**Current capabilities**: Data Designer supports two plugin types:
+
+- **Column Generator Plugins**: Custom column types you pass to the config builder's `add_column` method.
+- **Seed Reader Plugins**: Custom seed dataset readers that let you load data from new sources (e.g., databases, cloud storage, custom formats).
+
+**Coming soon**: Plugin support for processors, validators, and more!
+
+## How do you use plugins?
+
+A Data Designer plugin is just a Python package configured with an [entry point](https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#using-package-metadata) that points to a Data Designer `Plugin` object. Using a plugin is as simple as installing the package:
+
+```bash
+# Install a local plugin (for development and testing)
+uv pip install -e /path/to/your/plugin
+
+# Or install a published plugin from PyPI
+pip install data-designer-{plugin-name}
+```
+
+Once installed, plugins are automatically discovered and ready to use — no additional registration or configuration needed. See the [example plugin](/docs/plugins/example) for a complete walkthrough.
+
+## How do you create plugins?
+
+Creating a plugin involves three main steps:
+
+### 1. Implement the Plugin Components
+
+Each plugin has three components, and we recommend organizing them into separate files within a plugin subdirectory:
+
+- **`config.py`** — Configuration class defining user-facing parameters
+    - Column generator plugins: inherit from `SingleColumnConfig` with a `column_type` discriminator
+    - Seed reader plugins: inherit from `SeedSource` with a `seed_type` discriminator
+- **`impl.py`** — Implementation class containing the core logic
+    - Column generator plugins: inherit from `ColumnGeneratorFullColumn` or `ColumnGeneratorCellByCell`
+    - Seed reader plugins: inherit from `SeedReader`
+- **`plugin.py`** — A `Plugin` instance that connects the config and implementation classes
+
+### 2. Package Your Plugin
+
+- Set up a Python package with `pyproject.toml`
+- Register your plugin using entry points under `data_designer.plugins`
+- Define dependencies (including `data-designer`)
+
+### 3. Install and Test Locally
+
+- Install your plugin locally with `uv pip install -e .` (editable mode)
+- No publishing required — your plugin is usable immediately after a local install
+- Iterate on your plugin code with fast feedback
+
+### 4. Share Your Plugin (Optional)
+
+- Publish to PyPI or another package index to make it installable by anyone via `pip install`
+- This step is only needed if you want others outside your environment to use the plugin
+
+**Ready to get started?** See the [Example Plugin](/docs/plugins/example) for a complete walkthrough of creating a column generator plugin.
diff --git a/fern/v0.5.0/pages/quick-start.mdx b/fern/v0.5.0/pages/quick-start.mdx
new file mode 100644
index 000000000..75fa53ed4
--- /dev/null
+++ b/fern/v0.5.0/pages/quick-start.mdx
@@ -0,0 +1,84 @@
+---
+title: Quick Start
+description: Get started with Data Designer using default model providers and configurations.
+---
+
+Get started with Data Designer using the default model providers and configurations. Data Designer ships with built-in model providers and configurations that make it easy to start generating synthetic data immediately.
+
+## Prerequisites
+
+Before you begin, you'll need an API key from one of the default providers:
+
+- **NVIDIA API Key**: Get yours from [build.nvidia.com](https://build.nvidia.com)
+- **OpenAI API Key** (optional): Get yours from [platform.openai.com](https://platform.openai.com/api-keys)
+- **OpenRouter API Key** (optional): Get yours from [openrouter.ai](https://openrouter.ai)
+
+Set your API key as an environment variable:
+
+```bash
+export NVIDIA_API_KEY="your-api-key-here"
+# Or for OpenAI
+export OPENAI_API_KEY="your-openai-api-key-here"
+# Or for OpenRouter
+export OPENROUTER_API_KEY="your-openrouter-api-key-here"
+```
+
+## Example
+
+Below we'll construct a simple Data Designer workflow that generates multilingual greetings.
+
+```python
+import os
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+# Set your API key from build.nvidia.com
+# Skip this step if you've already exported your key to the environment variable
+os.environ["NVIDIA_API_KEY"] = "your-api-key-here"
+
+# Create a DataDesigner instance
+# This automatically configures the default model providers
+data_designer = DataDesigner()
+
+# Print out all the model providers available
+data_designer.info.display(dd.InfoType.MODEL_PROVIDERS)
+
+# Create a config builder
+# This automatically loads the default model configurations
+config_builder = dd.DataDesignerConfigBuilder()
+
+# Print out all the model configurations available
+config_builder.info.display(dd.InfoType.MODEL_CONFIGS)
+
+# Add a sampler column to randomly select a language
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="language",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=["English", "Spanish", "French", "German", "Italian"],
+        ),
+    )
+)
+
+# Add an LLM text generation column
+# We'll use the built-in 'nvidia-text' model alias
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="greeting",
+        model_alias="nvidia-text",
+        prompt="Write a casual and formal greeting in '{{ language }}' language.",
+    )
+)
+
+# Run a preview to generate sample records
+preview_results = data_designer.preview(config_builder=config_builder)
+
+# Display a sample record
+preview_results.display_sample_record()
+```
+
+🎉 Congratulations, you successfully ran one iteration designing your synthetic data. Follow along to learn more.
+
+To learn more about the default providers and model configurations available, see the [Default Model Settings](/docs/concepts/models/default-model-settings) guide.
diff --git a/fern/v0.5.0/pages/recipes/code-generation/text-to-python.mdx b/fern/v0.5.0/pages/recipes/code-generation/text-to-python.mdx
new file mode 100644
index 000000000..a430b9679
--- /dev/null
+++ b/fern/v0.5.0/pages/recipes/code-generation/text-to-python.mdx
@@ -0,0 +1,292 @@
+---
+title: Text to Python
+description: Generate Python code from natural language descriptions.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_python.py)
+</Info>
+
+```python
+from pathlib import Path
+
+from data_designer.essentials import (
+    CategorySamplerParams,
+    CodeLang,
+    CodeValidatorParams,
+    DataDesigner,
+    DataDesignerConfigBuilder,
+    LLMCodeColumnConfig,
+    LLMJudgeColumnConfig,
+    LLMTextColumnConfig,
+    SamplerColumnConfig,
+    SamplerType,
+    Score,
+    SubcategorySamplerParams,
+    ValidationColumnConfig,
+    ValidatorType,
+)
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> DataDesignerConfigBuilder:
+    config_builder = DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="industry_sector",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Healthcare",
+                    "Finance",
+                    "Technology",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="topic",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="industry_sector",
+                values={
+                    "Healthcare": [
+                        "Electronic Health Records (EHR) Systems",
+                        "Telemedicine Platforms",
+                        "AI-Powered Diagnostic Tools",
+                    ],
+                    "Finance": [
+                        "Fraud Detection Software",
+                        "Automated Trading Systems",
+                        "Personal Finance Apps",
+                    ],
+                    "Technology": [
+                        "Cloud Computing Platforms",
+                        "Artificial Intelligence and Machine Learning Platforms",
+                        "DevOps and CI/CD Tools",
+                    ],
+                },
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="code_complexity",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Beginner",
+                    "Intermediate",
+                    "Advanced",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="code_concept",
+            sampler_type=SamplerType.SUBCATEGORY,
+            params=SubcategorySamplerParams(
+                category="code_complexity",
+                values={
+                    "Beginner": [
+                        "Variables",
+                        "Data Types",
+                        "Functions",
+                        "Loops",
+                        "Classes",
+                    ],
+                    "Intermediate": [
+                        "List Comprehensions",
+                        "Object-oriented programming",
+                        "Lambda Functions",
+                        "Web frameworks",
+                        "Pandas",
+                    ],
+                    "Advanced": [
+                        "Multithreading",
+                        "Context Managers",
+                        "Generators",
+                    ],
+                },
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        SamplerColumnConfig(
+            name="instruction_phrase",
+            sampler_type=SamplerType.CATEGORY,
+            params=CategorySamplerParams(
+                values=[
+                    "Write a function that",
+                    "Create a class that",
+                    "Implement a script",
+                    "Can you create a function",
+                    "Develop a module that",
+                ],
+            ),
+        ),
+    )
+
+    config_builder.add_column(
+        LLMTextColumnConfig(
+            name="instruction",
+            model_alias=model_alias,
+            system_prompt=("You are an expert at generating clear and specific programming tasks."),
+            prompt=(
+                "Generate an instruction to create Python code that solves a specific problem.\n"
+                'The instruction should begin with the following phrase: "{{ instruction_phrase }}".\n\n'
+                "Important Guidelines:\n"
+                "* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
+                "* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
+                "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
+                "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMCodeColumnConfig(
+            name="code_implementation",
+            model_alias=model_alias,
+            code_lang=CodeLang.PYTHON,
+            system_prompt=(
+                "You are an expert Python programmer who writes clean, efficient, and well-documented code."
+            ),
+            prompt=(
+                "Write Python code for the following instruction:\n"
+                "Instruction: {{ instruction }}\n\n"
+                "Important Guidelines:\n"
+                "* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
+                "* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
+                "* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
+                "* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{ code_concept }}.\n"
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        LLMJudgeColumnConfig(
+            name="code_judge_result",
+            model_alias=model_alias,
+            prompt=TEXT_TO_PYTHON_JUDGE_TEMPLATE,
+            scores=python_scoring,
+        )
+    )
+
+    config_builder.add_column(
+        ValidationColumnConfig(
+            name="code_validity_result",
+            validator_type=ValidatorType.CODE,
+            target_columns=["code_implementation"],
+            validator_params=CodeValidatorParams(
+                code_lang=CodeLang.PYTHON,
+            ),
+            batch_size=100,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+TEXT_TO_PYTHON_JUDGE_TEMPLATE = """\
+You are an expert in Python programming, with specialized knowledge in software engineering, data science, and algorithmic problem-solving.
+
+You think about potential flaws and errors in the code. You are a tough critic, but a fair one.
+
+Take a deep breath and use the Python Code Quality Rubric below to score the **Generated Python Code** based on the INSTRUCTIONS.
+
+#### INSTRUCTIONS
+The Generated Python Code should be a valid response to the Natural Language Prompt below
+
+Natural Language Prompt:
+{{ instruction }}
+
+Generated Python Code
+{{ code_implementation }}
+"""
+
+
+python_scoring = [
+    Score(
+        name="Relevance",
+        description="Adherence to INSTRUCTIONS and CONTEXT",
+        options={
+            4: "Perfectly meets all specified requirements.",
+            3: "Meets most requirements with minor deviations.",
+            2: "Moderate deviation from the instructions.",
+            1: "Significant deviations from the instructions.",
+            0: "Does not adhere to the instructions.",
+        },
+    ),
+    Score(
+        name="Pythonic",
+        description="Pythonic Code and Best Practices (Does the code follow Python conventions and best practices?)",
+        options={
+            4: "The code exemplifies Pythonic principles, making excellent use of Python-specific constructs, standard library modules and programming idioms; follows all relevant PEPs.",
+            3: "The code closely follows Python conventions and adheres to many best practices; good use of Python-specific constructs, standard library modules and programming idioms.",
+            2: "The code generally follows Python conventions but has room for better alignment with Pythonic practices.",
+            1: "The code loosely follows Python conventions, with several deviations from best practices.",
+            0: "The code does not follow Python conventions or best practices, using non-Pythonic approaches.",
+        },
+    ),
+    Score(
+        name="Readability",
+        description="Readability and Maintainability (Is the Python code easy to understand and maintain?)",
+        options={
+            4: (
+                "The code is excellently formatted, follows PEP 8 guidelines, is elegantly concise and clear, uses meaningful variable names, "
+                "ensuring high readability and ease of maintenance; organizes complex logic well. Docstrings are given in a Google Docstring format."
+            ),
+            3: "The code is well-formatted in the sense of code-as-documentation, making it relatively easy to understand and maintain; uses descriptive names and organizes logic clearly.",
+            2: "The code is somewhat readable with basic formatting and some comments, but improvements are needed; needs better use of descriptive names and organization.",
+            1: "The code has minimal formatting, making it hard to understand; lacks meaningful names and organization.",
+            0: "The code is unreadable, with no attempt at formatting or description.",
+        },
+    ),
+    Score(
+        name="Efficiency",
+        description="Efficiency and Performance (Is the code optimized for performance?)",
+        options={
+            4: "The solution is highly efficient, using appropriate data structures and algorithms; avoids unnecessary computations and optimizes for both time and space complexity.",
+            3: "The solution is efficient, with good use of Python's built-in functions and libraries; minor areas for optimization.",
+            2: "The solution is moderately efficient, but misses some opportunities for optimization; uses some inefficient patterns.",
+            1: "The solution shows poor efficiency, with notable performance issues; lacks effective optimization techniques.",
+            0: "The solution is highly inefficient; overlooks fundamental optimization practices, resulting in significant performance issues.",
+        },
+    ),
+]
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.5.0/pages/recipes/code-generation/text-to-sql.mdx b/fern/v0.5.0/pages/recipes/code-generation/text-to-sql.mdx
new file mode 100644
index 000000000..42b4dbb92
--- /dev/null
+++ b/fern/v0.5.0/pages/recipes/code-generation/text-to-sql.mdx
@@ -0,0 +1,320 @@
+---
+title: Text to SQL
+description: Generate SQL queries from natural language descriptions.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_sql.py)
+</Info>
+
+```python
+from pathlib import Path
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> dd.DataDesignerConfigBuilder:
+    config_builder = dd.DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="industry_sector",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(
+                values=["Healthcare", "Finance", "Technology"],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="topic",
+            sampler_type=dd.SamplerType.SUBCATEGORY,
+            params=dd.SubcategorySamplerParams(
+                category="industry_sector",
+                values={
+                    "Healthcare": [
+                        "Electronic Health Records (EHR) Systems",
+                        "Telemedicine Platforms",
+                        "AI-Powered Diagnostic Tools",
+                    ],
+                    "Finance": [
+                        "Fraud Detection Software",
+                        "Automated Trading Systems",
+                        "Personal Finance Apps",
+                    ],
+                    "Technology": [
+                        "Cloud Computing Platforms",
+                        "Artificial Intelligence and Machine Learning Platforms",
+                        "DevOps and CI/CD Tools",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="sql_complexity",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(
+                values=["Beginner", "Intermediate", "Advanced"],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="sql_concept",
+            sampler_type=dd.SamplerType.SUBCATEGORY,
+            params=dd.SubcategorySamplerParams(
+                category="sql_complexity",
+                values={
+                    "Beginner": [
+                        "Basic SELECT Statements",
+                        "WHERE Clauses",
+                        "Basic JOINs",
+                        "INSERT, UPDATE, DELETE",
+                    ],
+                    "Intermediate": [
+                        "Aggregation Functions",
+                        "Multiple JOINs",
+                        "Subqueries",
+                        "Views",
+                    ],
+                    "Advanced": [
+                        "Window Functions",
+                        "Common Table Expressions (CTEs)",
+                        "Stored Procedures",
+                        "Query Optimization",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="sql_task_type",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(
+                values=[
+                    "Data Retrieval",
+                    "Data Manipulation",
+                    "Analytics and Reporting",
+                    "Data Transformation",
+                ],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="instruction_phrase",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(
+                values=[
+                    "Write an SQL query that",
+                    "Create an SQL statement to",
+                    "Develop an SQL query to",
+                    "Can you write SQL that",
+                    "Formulate an SQL query that",
+                ],
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="sql_prompt",
+            model_alias=model_alias,
+            system_prompt="You are an expert at generating clear and specific SQL tasks.",
+            prompt=SQL_PROMPT_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMCodeColumnConfig(
+            name="sql_context",
+            model_alias=model_alias,
+            code_lang=dd.CodeLang.SQL_ANSI,
+            system_prompt=(
+                "You are an expert SQL database designer who creates clean, efficient, and "
+                "well-structured database schemas."
+            ),
+            prompt=SQL_CONTEXT_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMCodeColumnConfig(
+            name="sql",
+            model_alias=model_alias,
+            code_lang=dd.CodeLang.SQL_ANSI,
+            system_prompt="You are an expert SQL programmer who writes clean, efficient, and well-structured queries.",
+            prompt=SQL_CODE_TEXT,
+        )
+    )
+
+    config_builder.add_column(
+        dd.ValidationColumnConfig(
+            name="code_validity_result",
+            validator_type=dd.ValidatorType.CODE,
+            target_columns=["sql"],
+            validator_params=dd.CodeValidatorParams(
+                code_lang=dd.CodeLang.SQL_ANSI,
+            ),
+            batch_size=100,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMJudgeColumnConfig(
+            name="code_judge_result",
+            model_alias=model_alias,
+            prompt=TEXT_TO_SQL_JUDGE_TEMPLATE,
+            scores=sql_scoring,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: dd.DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+SQL_PROMPT_TEXT = (
+    "Generate an instruction to create SQL code that solves a specific problem.\n"
+    "Each instruction should begin with one of the following phrases: {{instruction_phrase}}.\n\n"
+    "Important Guidelines:\n"
+    "* Industry Relevance: Ensure the instruction pertains to the {{industry_sector}} sector and {{topic}} topic.\n"
+    "* SQL Complexity: Tailor the instruction to the {{sql_complexity}} level. Utilize relevant {{sql_concept}} "
+    "where appropriate to match the complexity level.\n"
+    "* Task Type: The instruction should involve a {{sql_task_type}} task.\n"
+    "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to "
+    "understand the requirements without being overly verbose.\n"
+    "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
+)
+
+SQL_CONTEXT_TEXT = (
+    "Generate the SQL for creating database tables that would be relevant for the following instruction:\n"
+    "Instruction: {{sql_prompt}}\n\n"
+    "Important Guidelines:\n"
+    "* Relevance: Ensure all tables are directly related to the {{industry_sector}} sector and {{topic}} topic.\n"
+    "* Completeness: Include all essential columns with appropriate data types, primary/foreign keys, and necessary constraints.\n"
+    "* Realism: Use realistic table structures typical for the specified industry.\n"
+    "* Executable SQL: Provide complete CREATE TABLE statements that can be run without modification.\n"
+    "* Consistency: Use consistent naming conventions (e.g., snake_case for table and column names).\n"
+    "* Sample Data: Include INSERT statements with sample data that makes sense for the tables (at least 5-10 rows per table)."
+)
+
+SQL_CODE_TEXT = (
+    "Write SQL code for the following instruction based on the provided database context:\n"
+    "Instruction: {{sql_prompt}}\n\n"
+    "Database Context:\n"
+    "{{sql_context}}\n\n"
+    "Important Guidelines:\n"
+    "* Code Quality: Your SQL should be clean, complete, self-contained and accurate.\n"
+    "* Code Validity: Please ensure that your SQL code is executable and does not contain any errors.\n"
+    "* Context: Base your query on the provided database context. Only reference tables and columns that "
+    "exist in the context.\n"
+    "* Complexity & Concepts: The SQL should be written at a {{sql_complexity}} level, making use of "
+    "concepts such as {{sql_concept}}.\n"
+    "* Task Type: Ensure your solution implements the appropriate {{sql_task_type}} operation.\n"
+    "* Comments: Include brief comments explaining the key parts of your query.\n"
+)
+
+
+TEXT_TO_SQL_JUDGE_TEMPLATE = """\
+You are an expert in SQL with deep knowledge of relational modeling, query semantics,
+and performance tuning across common dialects (e.g., PostgreSQL, MySQL, SQLite, SQL Server).
+You think critically about correctness, readability, and efficiency.
+
+Use the SQL Query Quality Rubric below to score the **Generated SQL Query** based on the INSTRUCTIONS.
+
+#### INSTRUCTIONS
+The Generated SQL Query should be a valid response to the Natural Language Prompt below
+
+Natural Language Prompt:
+{{ sql_prompt }}
+
+Database Context:
+{{ sql_context }}
+
+Generated SQL Query
+{{ sql }}
+"""
+
+
+sql_scoring = [
+    dd.Score(
+        name="Relevance",
+        description="Adherence to INSTRUCTIONS and CONTEXT",
+        options={
+            4: "Perfectly meets all specified requirements.",
+            3: "Meets most requirements with minor deviations.",
+            2: "Moderate deviation from the instructions.",
+            1: "Significant deviations from the instructions.",
+            0: "Does not adhere to the instructions.",
+        },
+    ),
+    dd.Score(
+        name="SQL Correctness",
+        description="Syntax and semantic correctness; returns the intended result",
+        options={
+            4: "Valid SQL with correct joins, filters, grouping/aggregation, and NULL handling; produces the intended result set under the stated/implicit dialect.",
+            3: "Generally correct with minor issues (e.g., edge-case NULLs, minor grouping detail) but still likely yields the intended result.",
+            2: "Partially correct; noticeable semantic mistakes (joins, grouping, filters) that may change results or fail in edge cases.",
+            1: "Largely incorrect; major semantic or syntactic errors likely causing failure or wrong results.",
+            0: "Invalid SQL or unrelated to the task; will not run or cannot produce a meaningful result.",
+        },
+    ),
+    dd.Score(
+        name="Readability",
+        description="Formatting, clarity, and maintainability",
+        options={
+            4: "Cleanly formatted (keywords/clauses consistently styled), clear structure (CTEs/subqueries where helpful), meaningful table/column aliases, and concise.",
+            3: "Generally readable with consistent formatting and understandable aliases; could be organized slightly better.",
+            2: "Somewhat readable but inconsistent formatting or confusing aliasing; structure is harder to follow.",
+            1: "Poorly formatted and hard to read; unclear structure and aliasing.",
+            0: "Unreadable or chaotic; no meaningful structure or styling.",
+        },
+    ),
+    dd.Score(
+        name="Efficiency",
+        description="Query performance best practices",
+        options={
+            4: "Uses sargable predicates, appropriate joins, selective filters early, avoids SELECT *, unnecessary DISTINCT, and wasteful subqueries; likely to use indexes effectively.",
+            3: "Mostly efficient; minor opportunities for improvement (e.g., simplifying expressions, reducing data early).",
+            2: "Moderate inefficiencies (e.g., non-sargable filters, unnecessary nested subqueries, broad SELECT *).",
+            1: "Notably inefficient patterns likely causing large scans or poor plans.",
+            0: "Highly inefficient; ignores basic best practices and likely to perform very poorly.",
+        },
+    ),
+]
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.5.0/pages/recipes/index.mdx b/fern/v0.5.0/pages/recipes/index.mdx
new file mode 100644
index 000000000..c12eff32d
--- /dev/null
+++ b/fern/v0.5.0/pages/recipes/index.mdx
@@ -0,0 +1,103 @@
+---
+title: Use Case Recipes
+description: Ready-to-use code examples for common Data Designer use cases.
+---
+
+Recipes are a collection of code examples that demonstrate how to leverage Data Designer in specific use cases.
+Each recipe is a self-contained example that can be run independently.
+
+<Info title="New to Data Designer?">
+Recipes provide working code for specific use cases without detailed explanations. If you're learning Data Designer for the first time, we recommend starting with our [tutorial notebooks](/docs/tutorials/overview), which offer step-by-step guidance and explain core concepts. Once you're familiar with the basics, return here for practical, ready-to-use implementations.
+</Info>
+
+<Tip title="Prerequisite">
+These recipes use the OpenAI model provider by default. Ensure your OpenAI model provider has been set up using the Data Designer CLI before running a recipe.
+</Tip>
+
+<Cards>
+  <Card
+    title="Text to Python"
+    icon="code"
+    href="/docs/recipes/code-generation/text-to-python"
+  >
+    Generate a dataset of natural language instructions paired with Python code implementations, with varying complexity levels and industry focuses.
+
+    **Demonstrates:**
+    - Python code generation
+    - Python code validation
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_python.py)
+  </Card>
+  <Card
+    title="Text to SQL"
+    icon="database"
+    href="/docs/recipes/code-generation/text-to-sql"
+  >
+    Generate a dataset of natural language instructions paired with SQL code implementations, with varying complexity levels and industry focuses.
+
+    **Demonstrates:**
+    - SQL code generation
+    - SQL code validation
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/code_generation/text_to_sql.py)
+  </Card>
+  <Card
+    title="Product Info QA"
+    icon="message-circle"
+    href="/docs/recipes/qa-and-chat/product-info-qa"
+  >
+    Generate a dataset that contains information about products and associated question/answer pairs.
+
+    **Demonstrates:**
+    - Structured outputs
+    - Expression columns
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/product_info_qa.py)
+  </Card>
+  <Card
+    title="Multi-Turn Chat"
+    icon="messages-square"
+    href="/docs/recipes/qa-and-chat/multi-turn-chat"
+  >
+    Generate a dataset of multi-turn chat conversations between a user and an AI assistant.
+
+    **Demonstrates:**
+    - Structured outputs
+    - Expression columns
+    - LLM-as-judge
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/multi_turn_chat.py)
+  </Card>
+  <Card
+    title="Basic MCP Tool Use"
+    icon="wrench"
+    href="/docs/recipes/mcp-and-tooluse/basic-mcp"
+  >
+    Minimal example of MCP tool calling with Data Designer. Defines a simple MCP server with basic tools and generates data that requires tool calls to complete.
+
+    **Demonstrates:**
+    - MCP tool calling with LocalStdioMCPProvider
+    - Simple tool server definition
+    - Tool-augmented text generation
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/mcp_and_tooluse/basic_mcp.py)
+  </Card>
+  <Card
+    title="PDF Document QA"
+    icon="file-text"
+    href="/docs/recipes/mcp-and-tooluse/pdf-qa"
+  >
+    Generate grounded Q&A pairs from PDF documents using MCP tool calls and BM25 search.
+
+    **Demonstrates:**
+    - MCP tool calling with LocalStdioMCPProvider
+    - BM25 lexical search for retrieval
+    - Retrieval-grounded QA generation
+    - Per-column trace capture
+
+    [Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/mcp_and_tooluse/pdf_qa.py)
+  </Card>
+</Cards>
diff --git a/fern/v0.5.0/pages/recipes/mcp-and-tooluse/basic-mcp.mdx b/fern/v0.5.0/pages/recipes/mcp-and-tooluse/basic-mcp.mdx
new file mode 100644
index 000000000..20b77f810
--- /dev/null
+++ b/fern/v0.5.0/pages/recipes/mcp-and-tooluse/basic-mcp.mdx
@@ -0,0 +1,280 @@
+---
+title: Basic MCP Tool Use
+description: Minimal example of MCP tool calling with Data Designer.
+---
+
+<Info title="Download Recipe">
+[Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/mcp_and_tooluse/basic_mcp.py)
+</Info>
+
+This recipe demonstrates the minimal MCP tool-calling workflow with Data Designer:
+
+1. Define a simple MCP server with basic tools (`get_fact`, `add_numbers`)
+2. Configure Data Designer to use the MCP tools
+3. Generate data that requires tool calls to complete
+
+## Prerequisites
+
+- `NVIDIA_API_KEY` environment variable for NVIDIA provider model aliases (default)
+- `OPENAI_API_KEY` environment variable for OpenAI provider model aliases
+
+## Running the Recipe
+
+```bash
+# Basic usage (generates 2 records by default)
+uv run basic_mcp.py
+
+# For help message and available options
+uv run basic_mcp.py --help
+```
+
+## Features Demonstrated
+
+- **MCP tool calling** with `LocalStdioMCPProvider`
+- **Simple tool server** definition using FastMCP
+- **Tool-augmented text generation** with tool call history capture
+
+## Code
+
+```python
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "data-designer",
+#     "mcp",
+# ]
+# ///
+"""Basic MCP Recipe: Simple Tool Use Example
+
+This recipe demonstrates the minimal MCP tool-calling workflow with Data Designer:
+
+1) Define a simple MCP server with basic tools (get_fact, add_numbers)
+2) Configure Data Designer to use the MCP tools
+3) Generate data that requires tool calls to complete
+
+Prerequisites:
+    - OPENAI_API_KEY environment variable for OpenAI provider model aliases.
+    - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases (default model alias is "nvidia-text").
+
+Run:
+    # Basic usage (generates 2 records by default)
+    uv run basic_mcp.py
+
+    # For help message and available options
+    uv run basic_mcp.py --help
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+from mcp.server.fastmcp import FastMCP
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+
+MCP_SERVER_NAME = "basic-tools"
+
+
+# =============================================================================
+# MCP Server Definition
+# =============================================================================
+
+mcp_server = FastMCP(MCP_SERVER_NAME)
+
+# Simple knowledge base for the get_fact tool
+FACTS = {
+    "python": "Python was created by Guido van Rossum and first released in 1991.",
+    "earth": "Earth is the third planet from the Sun and has one natural satellite, the Moon.",
+    "water": "Water (H2O) freezes at 0°C (32°F) and boils at 100°C (212°F) at sea level.",
+    "light": "The speed of light in a vacuum is approximately 299,792 kilometers per second.",
+}
+
+
+@mcp_server.tool()
+def get_fact(topic: str) -> str:
+    """Get a fact about a topic from the knowledge base.
+
+    Args:
+        topic: The topic to look up (e.g., "python", "earth", "water", "light")
+
+    Returns:
+        A fact about the topic, or an error message if not found.
+    """
+    topic_lower = topic.lower()
+    if topic_lower in FACTS:
+        return json.dumps({"topic": topic, "fact": FACTS[topic_lower]})
+    return json.dumps({"error": f"No fact found for topic: {topic}", "available_topics": list(FACTS.keys())})
+
+
+@mcp_server.tool()
+def add_numbers(a: float, b: float) -> str:
+    """Add two numbers together.
+
+    Args:
+        a: First number
+        b: Second number
+
+    Returns:
+        The sum of the two numbers.
+    """
+    result = a + b
+    return json.dumps({"a": a, "b": b, "sum": result})
+
+
+@mcp_server.tool()
+def list_topics() -> str:
+    """List all available topics in the knowledge base.
+
+    Returns:
+        List of available topics.
+    """
+    return json.dumps({"topics": list(FACTS.keys())})
+
+
+# =============================================================================
+# Data Designer Configuration
+# =============================================================================
+
+
+def build_config(model_alias: str, provider_name: str) -> dd.DataDesignerConfigBuilder:
+    """Build the Data Designer configuration for basic tool use."""
+    tool_config = dd.ToolConfig(
+        tool_alias="basic-tools",
+        providers=[provider_name],
+        allow_tools=["get_fact", "add_numbers", "list_topics"],
+        max_tool_call_turns=5,
+        timeout_sec=30.0,
+    )
+
+    config_builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
+
+    # Add a seed column with topics to look up
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="topic",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["python", "earth", "water", "light"]),
+        )
+    )
+
+    # Add a column that uses the get_fact tool
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="fact_response",
+            model_alias=model_alias,
+            prompt=(
+                "Use the get_fact tool to look up information about '{{ topic }}', "
+                "then provide a one-sentence summary of what you learned."
+            ),
+            system_prompt="You must call the get_fact tool before answering. Only use information from tool results.",
+            tool_alias="basic-tools",
+            with_trace=dd.TraceType.ALL_MESSAGES,
+        )
+    )
+
+    # Add a column that uses the add_numbers tool
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="num_a",
+            sampler_type=dd.SamplerType.UNIFORM,
+            params=dd.UniformSamplerParams(low=1, high=100),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="num_b",
+            sampler_type=dd.SamplerType.UNIFORM,
+            params=dd.UniformSamplerParams(low=1, high=100),
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="math_response",
+            model_alias=model_alias,
+            prompt=(
+                "Use the add_numbers tool to calculate {{ num_a }} + {{ num_b }}, "
+                "then report the result in a complete sentence."
+            ),
+            system_prompt="You must call the add_numbers tool to perform the calculation. Report the exact result.",
+            tool_alias="basic-tools",
+            with_trace=dd.TraceType.ALL_MESSAGES,
+        )
+    )
+
+    return config_builder
+
+
+# =============================================================================
+# Main Entry Points
+# =============================================================================
+
+
+def serve() -> None:
+    """Run the MCP server (called when launched as subprocess by Data Designer)."""
+    mcp_server.run()
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Basic MCP tool use example with Data Designer.")
+    subparsers = parser.add_subparsers(dest="command")
+
+    # 'serve' subcommand for running the MCP server
+    subparsers.add_parser("serve", help="Run the MCP server (used by Data Designer)")
+
+    # Default command arguments (demo mode)
+    parser.add_argument("--model-alias", type=str, default="nvidia-text", help="Model alias to use for generation")
+    parser.add_argument("--num-records", type=int, default=2, help="Number of records to generate")
+    # For compatibility with Makefile test-run-recipes target (ignored in demo mode)
+    parser.add_argument("--artifact-path", type=str, default=None, help=argparse.SUPPRESS)
+
+    return parser.parse_args()
+
+
+def main() -> None:
+    """Main entry point for the demo."""
+    args = parse_args()
+
+    # Handle 'serve' subcommand
+    if args.command == "serve":
+        serve()
+        return
+
+    # Demo mode: run Data Designer with the MCP server
+    if os.environ.get("NVIDIA_API_KEY") is None and args.model_alias.startswith("nvidia"):
+        raise RuntimeError("NVIDIA_API_KEY must be set when using NVIDIA model aliases.")
+
+    # Configure MCP provider to run via stdio transport (local subprocess)
+    mcp_provider = dd.LocalStdioMCPProvider(
+        name=MCP_SERVER_NAME,
+        command=sys.executable,
+        args=[str(Path(__file__).resolve()), "serve"],
+    )
+
+    config_builder = build_config(
+        model_alias=args.model_alias,
+        provider_name=MCP_SERVER_NAME,
+    )
+
+    data_designer = DataDesigner(mcp_providers=[mcp_provider])
+    preview_results = data_designer.preview(config_builder, num_records=args.num_records)
+
+    # Display results
+    print("\n" + "=" * 60)
+    print("GENERATED DATA")
+    print("=" * 60)
+    preview_results.display_sample_record()
+
+
+if __name__ == "__main__":
+    main()
+```
diff --git a/fern/v0.5.0/pages/recipes/mcp-and-tooluse/pdf-qa.mdx b/fern/v0.5.0/pages/recipes/mcp-and-tooluse/pdf-qa.mdx
new file mode 100644
index 000000000..090e6c97a
--- /dev/null
+++ b/fern/v0.5.0/pages/recipes/mcp-and-tooluse/pdf-qa.mdx
@@ -0,0 +1,473 @@
+---
+title: PDF Document QA
+description: Generate grounded Q&A pairs from PDF documents using MCP tool calls and BM25 search.
+---
+
+<Info title="Download Recipe">
+[Download Code](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/mcp_and_tooluse/pdf_qa.py)
+</Info>
+
+This recipe demonstrates an end-to-end MCP tool-calling workflow:
+
+1. Load one or more PDF documents from URLs or local paths
+2. Index them with BM25S for fast lexical search
+3. Use Data Designer tool calls (`search_docs`) to generate grounded Q&A pairs
+
+## Prerequisites
+
+- `NVIDIA_API_KEY` environment variable for NVIDIA provider model aliases (default)
+- `OPENAI_API_KEY` environment variable for OpenAI provider model aliases
+
+## Running the Recipe
+
+```bash
+# Basic usage with default sample PDF (generates 4 Q&A pairs)
+uv run pdf_qa.py
+
+# For help message and available options
+uv run pdf_qa.py --help
+
+# Index a custom PDF
+uv run pdf_qa.py --pdf path/to/your/document.pdf
+
+# Index multiple PDFs
+uv run pdf_qa.py --pdf doc1.pdf --pdf https://example.com/doc2.pdf
+```
+
+## Features Demonstrated
+
+- **MCP tool calling** with `LocalStdioMCPProvider`
+- **BM25 lexical search** for document retrieval
+- **Retrieval-grounded QA generation** with citations
+- **Per-column trace capture** for debugging tool calls
+- **Structured output** for Q&A pairs with Pydantic models
+
+## Architecture
+
+```
+┌─────────────────┐      ┌─────────────────┐      ┌─────────────────┐
+│   PDF Files     │      │   BM25 Index    │      │   Data Designer │
+│   (local/URL)   │ ───► │   (in-memory)   │ ◄─── │   (tool calls)  │
+└─────────────────┘      └─────────────────┘      └─────────────────┘
+                                │                          │
+                                │     search_docs()        │
+                                ◄──────────────────────────┤
+                                │                          │
+                                │     results              │
+                                ├──────────────────────────►
+                                │                          │
+                                                           ▼
+                                                    ┌─────────────────┐
+                                                    │  Q&A Pair with  │
+                                                    │  Citation       │
+                                                    └─────────────────┘
+```
+
+## Output Schema
+
+Each generated record includes:
+
+| Field | Description |
+|-------|-------------|
+| `question` | A question grounded in the document text |
+| `answer` | A concise answer grounded in the supporting passage |
+| `supporting_passage` | A 2-4 sentence excerpt from the search result |
+| `citation` | Source reference (URL, page number, etc.) |
+
+## Code
+
+```python
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "data-designer",
+#     "mcp",
+#     "bm25s",
+#     "pymupdf",
+#     "rich",
+# ]
+# ///
+"""MCP + Tool Use Recipe: Document Q&A with BM25S Lexical Search
+
+This recipe demonstrates an end-to-end MCP tool-calling workflow:
+
+1) Load one or more PDF documents from URLs or local paths.
+2) Index them with BM25S for fast lexical search.
+3) Use Data Designer tool calls (`search_docs`) to generate grounded Q&A pairs.
+
+Prerequisites:
+    - OPENAI_API_KEY environment variable for OpenAI provider model aliases.
+    - NVIDIA_API_KEY environment variable for NVIDIA provider model aliases (default model alias is "nvidia-reasoning").
+
+Run:
+    # Basic usage with default sample PDF (generates 4 Q&A pairs)
+    uv run pdf_qa.py
+
+    # For help message and available options
+    uv run pdf_qa.py --help
+"""
+
+from __future__ import annotations
+
+import argparse
+import io
+import json
+import os
+import sys
+from pathlib import Path
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+import bm25s
+import fitz
+from mcp.server.fastmcp import FastMCP
+from pydantic import BaseModel, Field
+
+import data_designer.config as dd
+from data_designer.config.preview_results import PreviewResults
+from data_designer.interface import DataDesigner
+
+DEFAULT_PDF_URL = "https://research.nvidia.com/labs/nemotron/files/NVIDIA-Nemotron-3-Nano-Technical-Report.pdf"
+MCP_SERVER_NAME = "doc-bm25-search"
+
+# Global state for the BM25 index (populated at server startup)
+_bm25_retriever: bm25s.BM25 | None = None
+_corpus: list[dict[str, str]] = []
+
+
+class QAPair(BaseModel):
+    question: str = Field(..., description="A question grounded in the document text.")
+    answer: str = Field(..., description="A concise answer grounded in the supporting passage.")
+    supporting_passage: str = Field(
+        ..., description="A short excerpt (2-4 sentences) copied from the search result that supports the answer."
+    )
+    citation: str = Field(
+        ..., description="The citation (e.g. source url, page number, etc) of the supporting passage."
+    )
+
+
+class TopicList(BaseModel):
+    topics: list[str] = Field(
+        ...,
+        description="High-level topics covered by the document.",
+    )
+
+
+def _is_url(path_or_url: str) -> bool:
+    """Check if the given string is a URL."""
+    parsed = urlparse(path_or_url)
+    return parsed.scheme in ("http", "https")
+
+
+def _get_source_name(path_or_url: str) -> str:
+    """Extract a human-readable source name from a path or URL."""
+    if _is_url(path_or_url):
+        parsed = urlparse(path_or_url)
+        return Path(parsed.path).name or parsed.netloc
+    return Path(path_or_url).name
+
+
+def extract_pdf_text(path_or_url: str) -> list[dict[str, str]]:
+    """Extract text from a PDF file or URL, returning a list of passages with metadata."""
+    passages: list[dict[str, str]] = []
+    source_name = _get_source_name(path_or_url)
+
+    if _is_url(path_or_url):
+        with urlopen(path_or_url) as response:
+            pdf_bytes = response.read()
+        doc = fitz.open(stream=io.BytesIO(pdf_bytes), filetype="pdf")
+    else:
+        doc = fitz.open(path_or_url)
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        text = page.get_text("text").strip()
+        if text:
+            passages.append(
+                {
+                    "text": text,
+                    "page": str(page_num + 1),
+                    "source": source_name,
+                }
+            )
+
+    doc.close()
+    return passages
+
+
+def build_bm25_index(passages: list[dict[str, str]]) -> bm25s.BM25:
+    """Build a BM25S index from the extracted passages."""
+    corpus_texts = [p["text"] for p in passages]
+    corpus_tokens = bm25s.tokenize(corpus_texts, stopwords="en")
+
+    retriever = bm25s.BM25()
+    retriever.index(corpus_tokens)
+
+    return retriever
+
+
+def initialize_search_index(pdf_sources: list[str]) -> None:
+    """Load PDFs from paths/URLs and build the BM25 index."""
+    global _bm25_retriever, _corpus
+
+    _corpus = []
+    for source in pdf_sources:
+        passages = extract_pdf_text(source)
+        _corpus.extend(passages)
+
+    if _corpus:
+        _bm25_retriever = build_bm25_index(_corpus)
+
+
+# MCP Server Definition
+mcp_server = FastMCP(MCP_SERVER_NAME)
+
+
+@mcp_server.tool()
+def search_docs(query: str, limit: int = 5, document: str = "", page: str = "") -> str:
+    """Search through documents using BM25 lexical search.
+
+    BM25 is a keyword-based retrieval algorithm that matches exact terms. For best results:
+
+    - Use specific keywords, not full questions
+    - Include domain-specific terms that would appear in the source text
+    - Combine multiple relevant terms to narrow results
+
+    Args:
+        query: Search query string - use specific keywords for best results
+        limit: Maximum number of results to return (default: 5)
+        document: Optional document source name to restrict search to
+        page: Optional page number to restrict search to (requires document)
+
+    Returns:
+        JSON string with search results including text excerpts and page numbers
+    """
+    global _bm25_retriever, _corpus
+
+    if _bm25_retriever is None or not _corpus:
+        return json.dumps({"error": "Search index not initialized"})
+
+    if page and not document:
+        return json.dumps({"error": "The 'page' parameter requires 'document' to be specified"})
+
+    query_tokens = bm25s.tokenize([query], stopwords="en")
+
+    retrieve_limit = len(_corpus) if (document or page) else limit
+    results, scores = _bm25_retriever.retrieve(query_tokens, k=min(retrieve_limit, len(_corpus)))
+
+    search_results: list[dict[str, str | float]] = []
+    for i in range(results.shape[1]):
+        doc_idx = results[0, i]
+        score = float(scores[0, i])
+
+        if score <= 0:
+            continue
+
+        passage = _corpus[doc_idx]
+
+        if document and passage["source"] != document:
+            continue
+
+        if page and passage["page"] != page:
+            continue
+
+        search_results.append(
+            {
+                "text": passage["text"][:2000],
+                "page": passage["page"],
+                "source": passage["source"],
+                "score": round(score, 4),
+                "url": f"file://{passage['source']}#page={passage['page']}",
+            }
+        )
+
+        if len(search_results) >= limit:
+            break
+
+    return json.dumps({"results": search_results, "query": query, "total": len(search_results)})
+
+
+@mcp_server.tool()
+def list_docs() -> str:
+    """List all documents in the search index with their page counts."""
+    global _corpus
+
+    if not _corpus:
+        return json.dumps({"error": "Search index not initialized", "documents": []})
+
+    doc_pages: dict[str, set[str]] = {}
+    for passage in _corpus:
+        source = passage["source"]
+        page = passage["page"]
+        if source not in doc_pages:
+            doc_pages[source] = set()
+        doc_pages[source].add(page)
+
+    documents = [{"source": source, "page_count": len(pages)} for source, pages in sorted(doc_pages.items())]
+
+    return json.dumps({"documents": documents, "total_documents": len(documents)})
+
+
+def build_config(model_alias: str, provider_name: str) -> dd.DataDesignerConfigBuilder:
+    """Build the Data Designer configuration for document Q&A generation."""
+    tool_config = dd.ToolConfig(
+        tool_alias="doc-search",
+        providers=[provider_name],
+        allow_tools=["list_docs", "search_docs"],
+        max_tool_call_turns=100,
+        timeout_sec=30.0,
+    )
+
+    config_builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="seed_id",
+            sampler_type=dd.SamplerType.UUID,
+            params=dd.UUIDSamplerParams(),
+            drop=True,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="topic_candidates",
+            model_alias=model_alias,
+            prompt="Extract a high-level list of all topics covered by documents our knowledge base.",
+            system_prompt=(
+                "You must call tools before answering. "
+                "Do not use outside knowledge; only use tool results. "
+                "You can use as many tool calls as required to answer the user query."
+            ),
+            output_format=TopicList,
+            tool_alias="doc-search",
+            with_trace=dd.TraceType.ALL_MESSAGES,
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="topic",
+            expr="{{ topic_candidates.topics | random }}",
+        )
+    )
+
+    qa_prompt = """\
+Create a question-answer pair on the topic "{{topic}}", with supporting text and citation.
+The supporting_passage must be a 2-4 sentence excerpt copied from the tool result that demonstrates
+why the answer is correct.
+"""
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="qa_pair",
+            model_alias=model_alias,
+            prompt=qa_prompt,
+            system_prompt=(
+                "You must call tools before answering. "
+                "Do not use outside knowledge; only use tool results. "
+                "You can use as many tool calls as required to answer the user query."
+            ),
+            output_format=QAPair,
+            tool_alias="doc-search",
+            with_trace=dd.TraceType.ALL_MESSAGES,
+            extract_reasoning_content=True,
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="question",
+            expr="{{ qa_pair.question }}",
+        )
+    )
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="answer",
+            expr="{{ qa_pair.answer }}",
+        )
+    )
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="supporting_passage",
+            expr="{{ qa_pair.supporting_passage }}",
+        )
+    )
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="citation",
+            expr="{{ qa_pair.citation }}",
+        )
+    )
+    return config_builder
+
+
+def serve() -> None:
+    """Run the MCP server (called when launched as subprocess by Data Designer)."""
+    pdf_sources_json = os.environ.get("PDF_SOURCES", "[]")
+    pdf_sources = json.loads(pdf_sources_json)
+    if not pdf_sources:
+        pdf_sources = [DEFAULT_PDF_URL]
+    initialize_search_index(pdf_sources)
+    mcp_server.run()
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Generate document Q&A pairs using MCP tool calls with BM25S search.")
+    subparsers = parser.add_subparsers(dest="command")
+
+    # 'serve' subcommand for running the MCP server
+    subparsers.add_parser("serve", help="Run the MCP server (used by Data Designer)")
+
+    # Default command arguments (demo mode)
+    parser.add_argument("--model-alias", type=str, default="nvidia-reasoning", help="Model alias to use for generation")
+    parser.add_argument("--num-records", type=int, default=4, help="Number of Q&A pairs to generate")
+    parser.add_argument(
+        "--pdf",
+        type=str,
+        action="append",
+        dest="pdfs",
+        metavar="PATH_OR_URL",
+        help="PDF file path or URL to index (can be specified multiple times). Defaults to a sample PDF if not provided.",
+    )
+    # For compatibility with Makefile test-run-recipes target (ignored in demo mode)
+    parser.add_argument("--artifact-path", type=str, default=None, help=argparse.SUPPRESS)
+
+    return parser.parse_args()
+
+
+def main() -> None:
+    """Main entry point for the demo."""
+    args = parse_args()
+
+    if args.command == "serve":
+        serve()
+        return
+
+    if os.environ.get("NVIDIA_API_KEY") is None and args.model_alias.startswith("nvidia"):
+        raise RuntimeError("NVIDIA_API_KEY must be set when using NVIDIA model aliases.")
+
+    pdf_sources = args.pdfs if args.pdfs else [DEFAULT_PDF_URL]
+
+    mcp_provider = dd.LocalStdioMCPProvider(
+        name=MCP_SERVER_NAME,
+        command=sys.executable,
+        args=[str(Path(__file__).resolve()), "serve"],
+        env={"PDF_SOURCES": json.dumps(pdf_sources)},
+    )
+
+    config_builder = build_config(
+        model_alias=args.model_alias,
+        provider_name=MCP_SERVER_NAME,
+    )
+
+    data_designer = DataDesigner(mcp_providers=[mcp_provider])
+    preview_results = data_designer.preview(config_builder, num_records=args.num_records)
+    preview_results.display_sample_record()
+
+
+if __name__ == "__main__":
+    main()
+```
diff --git a/fern/v0.5.0/pages/recipes/qa-and-chat/multi-turn-chat.mdx b/fern/v0.5.0/pages/recipes/qa-and-chat/multi-turn-chat.mdx
new file mode 100644
index 000000000..a52581280
--- /dev/null
+++ b/fern/v0.5.0/pages/recipes/qa-and-chat/multi-turn-chat.mdx
@@ -0,0 +1,205 @@
+---
+title: Multi-Turn Chat
+description: Generate multi-turn conversational dialogues.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/multi_turn_chat.py)
+</Info>
+
+```python
+from pathlib import Path
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> dd.DataDesignerConfigBuilder:
+    config_builder = dd.DataDesignerConfigBuilder()
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="domain",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["Tech Support", "Personal Finances", "Educational Guidance"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="topic",
+            sampler_type=dd.SamplerType.SUBCATEGORY,
+            params=dd.SubcategorySamplerParams(
+                category="domain",
+                values={
+                    "Tech Support": [
+                        "Troubleshooting a Laptop",
+                        "Setting Up a Home Wi-Fi Network",
+                        "Installing Software Updates",
+                    ],
+                    "Personal Finances": [
+                        "Budgeting Advice",
+                        "Understanding Taxes",
+                        "Investment Strategies",
+                    ],
+                    "Educational Guidance": [
+                        "Choosing a College Major",
+                        "Effective Studying Techniques",
+                        "Learning a New Language",
+                    ],
+                },
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="complexity",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["Basic", "Intermediate", "Advanced"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="conversation_length",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=[2, 4, 6, 8]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="user_mood",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=["happy", "silly", "sarcastic", "combative", "disappointed", "toxic"]),
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="assistant_system_prompt",
+            prompt=(
+                "Write a reasonable system prompt for a helpful AI assistant with expertise in "
+                "{{domain}} and {{topic}}. The AI assistant must not engage in harmful behaviors."
+            ),
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="user_task",
+            prompt="Define a simple task related to {{topic}} of {{complexity}} complexity for the user.",
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="conversation",
+            prompt=(
+                "<task>\n{{user_task}}\n</task>\n\n"
+                "<system_prompt>{{assistant_system_prompt}}</system_prompt>\n\n"
+                "Generate a conversation between a user and an AI assistant with <system_prompt> about <task>.\n"
+                "User is asking the assistant for advice and is in a {{user_mood}} mood.\n"
+                "The conversation must be {{conversation_length}} messages in length.\n"
+                "The conversation must come to a natural end in {{conversation_length}} messages and if the assistant is unable "
+                "to solve the user's needs by then, they should offer to continue the conversation later or redirect them to additional resources.\n"
+                "The conversation must be realistic and natural, incorporating details from both the user's and assistant's profiles.\n"
+                "The AI assistant role has the system prompt defined in <system_prompt>. All of its responses must conform to <system_prompt>:\n"
+            ),
+            output_format=ChatConversation,
+            model_alias=model_alias,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMJudgeColumnConfig(
+            name="toxicity_evaluation",
+            prompt=TOXICITY_PROMPT_TEMPLATE,
+            scores=[toxicity_score],
+            model_alias=model_alias,
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: dd.DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+toxicity_score = dd.Score(
+    name="Toxicity",
+    description="Evaluation of harmful, harassing, or hateful language in conversations",
+    options={
+        4: "No toxicity detected; conversation is respectful and appropriate.",
+        3: "Mild toxicity; slightly rude or sarcastic but not hateful or harmful.",
+        2: "Moderate toxicity; contains disrespectful or harassing language.",
+        1: "Severe toxicity; contains overt hate, harassment, or harmful content.",
+        0: "Extreme toxicity; contains dangerous, threatening, or abusive content.",
+    },
+)
+
+TOXICITY_PROMPT_TEMPLATE = (
+    "You are an expert in content moderation and digital communication ethics. "
+    "You are evaluating the toxicity level in user messages.\n"
+    "Take a deep breath and carefully analyze the conversation provided. \n"
+    "Use the rubric to score the level of toxicity in the USER'S messages only.\n"
+    "Consider:\n"
+    "- Focus primarily on harassment, hate, or harmful language\n"
+    "- Evaluate the full context of the conversation\n"
+    "- Be objective and consistent in your assessment\n"
+    "- If any single message exhibits toxicity, it should influence the final score\n"
+    "## CONVERSATION\n"
+    "{{ conversation }}"
+)
+
+
+class Message(BaseModel):
+    """A single message turn in the conversation."""
+
+    role: Literal["user", "assistant"] = Field(..., description="Which role is writing the message.")
+    content: str = Field(..., description="Message contents.")
+
+
+class ChatConversation(BaseModel):
+    """A chat conversation between a specific user and an AI assistant.
+    * All conversations are initiated by the user role.
+    * The assistant role always responds to the user message.
+    * Turns alternate between user and assistant roles.
+    * The last message is always from the assistant role.
+    * Message content can be long or short.
+    * All assistant messages are faithful responses and must be answered fully.
+    """
+
+    conversation: list[Message] = Field(..., description="List of all messages in the conversation.")
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.5.0/pages/recipes/qa-and-chat/product-info-qa.mdx b/fern/v0.5.0/pages/recipes/qa-and-chat/product-info-qa.mdx
new file mode 100644
index 000000000..d6fdd0a1f
--- /dev/null
+++ b/fern/v0.5.0/pages/recipes/qa-and-chat/product-info-qa.mdx
@@ -0,0 +1,223 @@
+---
+title: Product Info QA
+description: Generate question-answer pairs for product information.
+---
+
+<Info title="Download Recipe">
+[Download the complete recipe script](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/docs/assets/recipes/qa_and_chat/product_info_qa.py)
+</Info>
+
+```python
+import string
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+from data_designer.interface.results import DatasetCreationResults
+
+
+def build_config(model_alias: str) -> dd.DataDesignerConfigBuilder:
+    config_builder = dd.DataDesignerConfigBuilder()
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="category",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(
+                values=[
+                    "Electronics",
+                    "Clothing",
+                    "Home Appliances",
+                    "Groceries",
+                    "Toiletries",
+                    "Sports Equipment",
+                    "Toys",
+                    "Books",
+                    "Pet Supplies",
+                    "Tools & Home Improvement",
+                    "Beauty",
+                    "Health & Wellness",
+                    "Outdoor Gear",
+                    "Automotive",
+                    "Jewelry",
+                    "Watches",
+                    "Office Supplies",
+                    "Gifts",
+                    "Arts & Crafts",
+                    "Baby & Kids",
+                    "Music",
+                    "Video Games",
+                    "Movies",
+                    "Software",
+                    "Tech Devices",
+                ]
+            ),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="price_tens_of_dollars",
+            sampler_type=dd.SamplerType.UNIFORM,
+            params=dd.UniformSamplerParams(low=1, high=200),
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="product_price",
+            expr="{{ (price_tens_of_dollars * 10) - 0.01 | round(2) }}",
+            dtype="float",
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="first_letter",
+            sampler_type=dd.SamplerType.CATEGORY,
+            params=dd.CategorySamplerParams(values=list(string.ascii_uppercase)),
+        )
+    )
+
+    config_builder.add_column(
+        dd.SamplerColumnConfig(
+            name="is_hallucination",
+            sampler_type=dd.SamplerType.BERNOULLI,
+            params=dd.BernoulliSamplerParams(p=0.5),
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMStructuredColumnConfig(
+            name="product_info",
+            model_alias=model_alias,
+            prompt=(
+                "Generate a realistic product description for a product in the {{ category }} "
+                "category that costs {{ product_price }}.\n"
+                "The name of the product MUST start with the letter {{ first_letter }}.\n"
+            ),
+            output_format=ProductInfo,
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="question",
+            model_alias=model_alias,
+            prompt=("Ask a question about the following product:\n\n {{ product_info }}"),
+        )
+    )
+
+    config_builder.add_column(
+        dd.LLMTextColumnConfig(
+            name="answer",
+            model_alias=model_alias,
+            prompt=(
+                "{%- if is_hallucination == 0 -%}\n"
+                "<product_info>\n"
+                "{{ product_info }}\n"
+                "</product_info>\n"
+                "{%- endif -%}\n"
+                "User Question: {{ question }}\n"
+                "Directly and succinctly answer the user's question.\n"
+                "{%- if is_hallucination == 1 -%}\n"
+                "Make up whatever information you need to in order to answer the user's request.\n"
+                "{%- endif -%}"
+            ),
+        )
+    )
+
+    # Evaluate answer quality
+    config_builder.add_column(
+        dd.LLMJudgeColumnConfig(
+            name="llm_answer_metrics",
+            model_alias=model_alias,
+            prompt=(
+                "<product_info>\n"
+                "{{ product_info }}\n"
+                "</product_info>\n"
+                "User Question: {{question }}\n"
+                "AI Assistant Answer: {{ answer }}\n"
+                "Judge the AI assistant's response to the user's question about the product described in <product_info>."
+            ),
+            scores=answer_quality_scores,
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="completeness_result",
+            expr="{{ llm_answer_metrics.Completeness.score }}",
+        )
+    )
+
+    config_builder.add_column(
+        dd.ExpressionColumnConfig(
+            name="accuracy_result",
+            expr="{{ llm_answer_metrics.Accuracy.score }}",
+        )
+    )
+
+    return config_builder
+
+
+def create_dataset(
+    config_builder: dd.DataDesignerConfigBuilder,
+    num_records: int,
+    artifact_path: Path | str | None = None,
+) -> DatasetCreationResults:
+    data_designer = DataDesigner(artifact_path=artifact_path)
+    results = data_designer.create(config_builder, num_records=num_records)
+    return results
+
+
+class ProductInfo(BaseModel):
+    product_name: str = Field(..., description="A realistic product name for the market.")
+    key_features: list[str] = Field(..., min_length=1, max_length=3, description="Key product features.")
+    description: str = Field(
+        ...,
+        description="A short, engaging description of what the product does, highlighting a unique but believable feature.",
+    )
+    price_usd: float = Field(..., description="The price of the product", ge=10, le=1000, decimal_places=2)
+
+
+completeness_score = dd.Score(
+    name="Completeness",
+    description="Evaluation of AI assistant's thoroughness in addressing all aspects of the user's query.",
+    options={
+        "Complete": "The response thoroughly covers all key points requested in the question, providing sufficient detail to satisfy the user's information needs.",
+        "PartiallyComplete": "The response addresses the core question but omits certain important details or fails to elaborate on relevant aspects that were requested.",
+        "Incomplete": "The response significantly lacks necessary information, missing major components of what was asked and leaving the query largely unanswered.",
+    },
+)
+
+accuracy_score = dd.Score(
+    name="Accuracy",
+    description="Evaluation of how factually correct the AI assistant's response is relative to the product information.",
+    options={
+        "Accurate": "The information provided aligns perfectly with the product specifications without introducing any misleading or incorrect details.",
+        "PartiallyAccurate": "While some information is correctly stated, the response contains minor factual errors or potentially misleading statements about the product.",
+        "Inaccurate": "The response presents significantly wrong information about the product, with claims that contradict the actual product details.",
+    },
+)
+
+answer_quality_scores = [completeness_score, accuracy_score]
+
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model-alias", type=str, default="openai-text")
+    parser.add_argument("--num-records", type=int, default=5)
+    parser.add_argument("--artifact-path", type=str, default=None)
+    args = parser.parse_args()
+
+    config_builder = build_config(model_alias=args.model_alias)
+    results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
+
+    print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
+
+    results.load_analysis().to_report()
+```
diff --git a/fern/v0.5.0/pages/tutorials/editing-images-notebook.mdx b/fern/v0.5.0/pages/tutorials/editing-images-notebook.mdx
new file mode 100644
index 000000000..c7a0e8233
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/editing-images-notebook.mdx
@@ -0,0 +1,13 @@
+---
+title: "Image-to-Image Editing (Notebook)"
+description: Data Designer tutorial with executed output.
+---
+
+import { NotebookViewer } from "@/components/NotebookViewer";
+import notebook from "@/components/notebooks/6-editing-images-with-image-context";
+
+<NotebookViewer
+  notebook={notebook}
+  colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/6-editing-images-with-image-context.ipynb"
+  showOutputs
+/>
diff --git a/fern/v0.5.0/pages/tutorials/generating-images-notebook.mdx b/fern/v0.5.0/pages/tutorials/generating-images-notebook.mdx
new file mode 100644
index 000000000..6cf4dec28
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/generating-images-notebook.mdx
@@ -0,0 +1,13 @@
+---
+title: "Generating Images (Notebook)"
+description: Data Designer tutorial with executed output.
+---
+
+import { NotebookViewer } from "@/components/NotebookViewer";
+import notebook from "@/components/notebooks/5-generating-images";
+
+<NotebookViewer
+  notebook={notebook}
+  colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/5-generating-images.ipynb"
+  showOutputs
+/>
diff --git a/fern/v0.5.0/pages/tutorials/images-as-context-notebook.mdx b/fern/v0.5.0/pages/tutorials/images-as-context-notebook.mdx
new file mode 100644
index 000000000..2f97d4495
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/images-as-context-notebook.mdx
@@ -0,0 +1,13 @@
+---
+title: "Images as Context (Notebook)"
+description: Data Designer tutorial with executed output.
+---
+
+import { NotebookViewer } from "@/components/NotebookViewer";
+import notebook from "@/components/notebooks/4-providing-images-as-context";
+
+<NotebookViewer
+  notebook={notebook}
+  colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/4-providing-images-as-context.ipynb"
+  showOutputs
+/>
diff --git a/fern/v0.5.0/pages/tutorials/images-as-context.mdx b/fern/v0.5.0/pages/tutorials/images-as-context.mdx
new file mode 100644
index 000000000..d137c0e27
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/images-as-context.mdx
@@ -0,0 +1,272 @@
+---
+title: "🎨 Data Designer Tutorial: Images as Context for Vision-Based Generation"
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/4-providing-images-as-context.ipynb).
+</Info>
+
+#### 📚 What you'll learn
+
+This notebook demonstrates how to provide images as context to generate text descriptions using vision-language models.
+
+- ✨ **Visual Document Processing**: Converting images to chat-ready format for model consumption
+- 🔍 **Vision-Language Generation**: Using vision models to generate detailed summaries from images
+
+If this is your first time using Data Designer, we recommend starting with the [first tutorial](/docs/tutorials/the-basics) in this series.
+
+### 📦 Import Data Designer
+
+- `data_designer.config` provides access to the configuration API.
+- `DataDesigner` is the main interface for data generation.
+
+```python
+# Standard library imports
+import base64
+import io
+import uuid
+
+# Third-party imports
+import pandas as pd
+import rich
+from datasets import load_dataset
+from IPython.display import display
+from rich.panel import Panel
+
+# Data Designer imports
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+```
+
+### ⚙️ Initialize the Data Designer interface
+
+- `DataDesigner` is the main object is responsible for managing the data generation process.
+- When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+### 🎛️ Define model configurations
+
+- Each `ModelConfig` defines a model that can be used during the generation process.
+- The "model alias" is used to reference the model in the Data Designer config (as we will see below).
+- The "model provider" is the external service that hosts the model (see the [model config](/docs/concepts/models/default-model-settings) docs for more details).
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.
+
+```python
+# This name is set in the model provider configuration.
+MODEL_PROVIDER = "nvidia"
+
+model_configs = [
+    dd.ModelConfig(
+        alias="vision",
+        model="meta/llama-4-scout-17b-16e-instruct",
+        provider=MODEL_PROVIDER,
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=0.60,
+            top_p=0.95,
+            max_tokens=2048,
+        ),
+    ),
+]
+```
+
+### 🏗️ Initialize the Data Designer Config Builder
+
+- The Data Designer config defines the dataset schema and generation process.
+- The config builder provides an intuitive interface for building this configuration.
+- The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+### 🌱 Seed Dataset Creation
+
+In this section, we'll prepare our visual documents as a seed dataset for summarization:
+
+- **Loading Visual Documents**: We use the ColPali dataset containing document images
+- **Image Processing**: Convert images to base64 format for vision model consumption
+- **Metadata Extraction**: Preserve relevant document information (filename, page number, source, etc.)
+
+The seed dataset will be used to generate detailed text summaries of each document image.
+
+```python
+# Dataset processing configuration
+IMG_COUNT = 512  # Number of images to process
+BASE64_IMAGE_HEIGHT = 512  # Standardized height for model input
+
+# Load ColPali dataset for visual documents
+img_dataset_cfg = {"path": "vidore/colpali_train_set", "split": "train", "streaming": True}
+```
+
+```python
+def resize_image(image, height: int):
+    """
+    Resize image while maintaining aspect ratio.
+
+    Args:
+        image: PIL Image object
+        height: Target height in pixels
+
+    Returns:
+        Resized PIL Image object
+    """
+    original_width, original_height = image.size
+    width = int(original_width * (height / original_height))
+    return image.resize((width, height))
+
+
+def convert_image_to_chat_format(record, height: int) -> dict:
+    """
+    Convert PIL image to base64 format for chat template usage.
+
+    Args:
+        record: Dataset record containing image and metadata
+        height: Target height for image resizing
+
+    Returns:
+        Updated record with base64_image and uuid fields
+    """
+    # Resize image for consistent processing
+    image = resize_image(record["image"], height)
+
+    # Convert to base64 string
+    img_buffer = io.BytesIO()
+    image.save(img_buffer, format="PNG")
+    byte_data = img_buffer.getvalue()
+    base64_encoded_data = base64.b64encode(byte_data)
+    base64_string = base64_encoded_data.decode("utf-8")
+
+    # Return updated record
+    return record | {"base64_image": base64_string, "uuid": str(uuid.uuid4())}
+```
+
+```python
+# Load and process the visual document dataset
+print("📥 Loading and processing document images...")
+
+img_dataset_iter = iter(
+    load_dataset(**img_dataset_cfg).map(convert_image_to_chat_format, fn_kwargs={"height": BASE64_IMAGE_HEIGHT})
+)
+img_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])
+
+print(f"✅ Loaded {len(img_dataset)} images with columns: {list(img_dataset.columns)}")
+```
+
+```python
+img_dataset.head()
+```
+
+```python
+# Add the seed dataset containing our processed images
+df_seed = pd.DataFrame(img_dataset)[["uuid", "image_filename", "base64_image", "page", "options", "source"]]
+config_builder.with_seed_dataset(dd.DataFrameSeedSource(df=df_seed))
+```
+
+```python
+# Add a column to generate detailed document summaries
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="summary",
+        model_alias="vision",
+        prompt=(
+            "Provide a detailed summary of the content in this image in Markdown format. "
+            "Start from the top of the image and then describe it from top to bottom. "
+            "Place a summary at the bottom."
+        ),
+        multi_modal_context=[
+            dd.ImageContext(
+                column_name="base64_image",
+                data_type=dd.ModalityDataType.BASE64,
+                image_format=dd.ImageFormat.PNG,
+            )
+        ],
+    )
+)
+```
+
+### 🔁 Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly.
+2. Inspect the results for quality and format issues.
+3. Adjust column configurations, prompts, or parameters as needed.
+4. Re-run the preview until satisfied.
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+### 📊 Analyze the generated data
+
+- Data Designer automatically generates a basic statistical analysis of the generated data.
+- This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+### 🔎 Visual Inspection
+
+Let's compare the original document image with the generated summary to validate quality:
+
+```python
+# Compare original document with generated summary
+index = 0  # Change this to view different examples
+
+# Merge preview data with original images for comparison
+comparison_dataset = preview.dataset.merge(pd.DataFrame(img_dataset)[["uuid", "image"]], how="left", on="uuid")
+
+# Extract the record for display
+record = comparison_dataset.iloc[index]
+
+print("📄 Original Document Image:")
+display(resize_image(record.image, BASE64_IMAGE_HEIGHT))
+
+print("\n📝 Generated Summary:")
+rich.print(Panel(record.summary, title="Document Summary", title_align="left"))
+```
+
+### 🆙 Scale up!
+
+- Happy with your preview data?
+- Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-4")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## ⏭️ Next Steps
+
+Now that you've learned how to use visual context for image summarization in Data Designer, explore more:
+
+- Experiment with different vision models for specific document types
+- Try different prompt variations to generate specialized descriptions (e.g., technical details, key findings)
+- Combine vision-based summaries with other column types for multi-modal workflows
+- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering
diff --git a/fern/v0.5.0/pages/tutorials/overview.mdx b/fern/v0.5.0/pages/tutorials/overview.mdx
new file mode 100644
index 000000000..efabe06ed
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/overview.mdx
@@ -0,0 +1,140 @@
+---
+title: Tutorials
+description: Step-by-step tutorials for learning Data Designer.
+---
+
+Welcome to the Data Designer tutorials! These interactive notebooks guide you through the core concepts and features of Data Designer.
+
+import { CustomCard } from "@/components/CustomCard"
+
+<CustomCard 
+  title="MyTitle" 
+  text="Hello" 
+  href="https://github.com/fern-api/fern/tree/main/generators/python"
+/>
+
+
+## Getting Started
+
+Each tutorial builds on the previous one, so we recommend following them in order:
+
+<Cards>
+  <Card
+    title="The Basics"
+    icon="graduation-cap"
+    href="/docs/tutorials/the-basics"
+  >
+    Learn the fundamentals of Data Designer by generating a simple product review dataset.
+    
+    **Topics covered:**
+    - Sampler columns for categorical and numerical data
+    - LLM-generated text columns
+    - Previewing and iterating on your dataset
+  </Card>
+  <Card
+    title="Structured Outputs"
+    icon="brackets-curly"
+    href="/docs/tutorials/structured-outputs"
+  >
+    Learn advanced data generation using structured outputs and Jinja expressions.
+    
+    **Topics covered:**
+    - Pydantic models for structured output schemas
+    - Expression columns with Jinja2 templates
+    - Conditional logic in prompts
+  </Card>
+  <Card
+    title="Seeding with a Dataset"
+    icon="seedling"
+    href="/docs/tutorials/seeding-with-dataset"
+  >
+    Bootstrap generation from existing data to create domain-grounded synthetic datasets.
+    
+    **Topics covered:**
+    - Loading seed datasets from local files
+    - Referencing seed data in prompts
+    - Combining real and synthetic data
+  </Card>
+  <Card
+    title="Images as Context"
+    icon="image"
+    href="/docs/tutorials/images-as-context"
+  >
+    Use vision-language models to generate text descriptions from images.
+    
+    **Topics covered:**
+    - Processing images for model input
+    - Vision model configuration
+    - Document summarization workflows
+  </Card>
+  <Card
+    title="Generating Images"
+    icon="wand-magic-sparkles"
+    href="/docs/tutorials/generating-images-notebook"
+  >
+    Create synthetic images using diffusion and autoregressive models.
+    
+    **Topics covered:**
+    - Image generation model configuration
+    - Text-to-image generation
+    - Prompt engineering for image generation
+  </Card>
+  <Card
+    title="Image-to-Image Editing"
+    icon="palette"
+    href="/docs/tutorials/editing-images-notebook"
+  >
+    Transform and edit images using AI models.
+    
+    **Topics covered:**
+    - Image-to-image transformation workflows
+    - Style transfer and editing prompts
+    - Multi-modal context for image editing
+  </Card>
+</Cards>
+
+## Running the Tutorials
+
+Each tutorial is available as an interactive Jupyter notebook that you can run in Google Colab. Click the "Open in Colab" badge at the top of each tutorial to launch it directly in your browser.
+
+### Prerequisites
+
+Before running the tutorials, make sure you have:
+
+1. **An API key** from one of the supported providers:
+   - [NVIDIA API Key](https://build.nvidia.com) (recommended)
+   - [OpenAI API Key](https://platform.openai.com/api-keys)
+   - [OpenRouter API Key](https://openrouter.ai)
+
+2. **Set your API key** as an environment variable or in the notebook:
+   ```bash
+   export NVIDIA_API_KEY="your-api-key-here"
+   ```
+
+### Running Locally
+
+To run the tutorials locally instead of in Colab:
+
+1. Install Data Designer:
+   ```bash
+   pip install data-designer
+   ```
+
+2. Configure your model provider using the CLI:
+   ```bash
+   data-designer config add-provider nvidia
+   ```
+
+3. Clone the repository and run the notebooks:
+   ```bash
+   git clone https://github.com/NVIDIA-NeMo/DataDesigner.git
+   cd DataDesigner/docs/colab_notebooks
+   jupyter notebook
+   ```
+
+## Additional Resources
+
+- **[Concepts Guide](/docs/concepts/columns)**: Deep dive into core Data Designer concepts
+- **[Quick Start Guide](/docs/quick-start)**: A condensed introduction to Data Designer
+- **[Use Case Recipes](/docs/recipes)**: Complete working examples for specific use cases
+- **[API Reference](/api/models)**: Detailed documentation for all configuration options
diff --git a/fern/v0.5.0/pages/tutorials/seeding-with-dataset-notebook.mdx b/fern/v0.5.0/pages/tutorials/seeding-with-dataset-notebook.mdx
new file mode 100644
index 000000000..5e475a205
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/seeding-with-dataset-notebook.mdx
@@ -0,0 +1,13 @@
+---
+title: "Seeding with a Dataset (Notebook)"
+description: Data Designer tutorial with executed output.
+---
+
+import { NotebookViewer } from "@/components/NotebookViewer";
+import notebook from "@/components/notebooks/3-seeding-with-a-dataset";
+
+<NotebookViewer
+  notebook={notebook}
+  colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb"
+  showOutputs
+/>
diff --git a/fern/v0.5.0/pages/tutorials/seeding-with-dataset.mdx b/fern/v0.5.0/pages/tutorials/seeding-with-dataset.mdx
new file mode 100644
index 000000000..072182ba9
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/seeding-with-dataset.mdx
@@ -0,0 +1,261 @@
+---
+title: "🎨 Data Designer Tutorial: Seeding with an External Dataset"
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb).
+</Info>
+
+#### 📚 What you'll learn
+
+In this notebook, we will demonstrate how to seed synthetic data generation in Data Designer with an external dataset.
+
+If this is your first time using Data Designer, we recommend starting with the [first tutorial](/docs/tutorials/the-basics) in this series.
+
+### 📦 Import Data Designer
+
+- `data_designer.config` provides access to the configuration API.
+- `DataDesigner` is the main interface for data generation.
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+```
+
+### ⚙️ Initialize the Data Designer interface
+
+- `DataDesigner` is the main object is responsible for managing the data generation process.
+- When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+### 🎛️ Define model configurations
+
+- Each `ModelConfig` defines a model that can be used during the generation process.
+- The "model alias" is used to reference the model in the Data Designer config (as we will see below).
+- The "model provider" is the external service that hosts the model (see the [model config](/docs/concepts/models/default-model-settings) docs for more details).
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.
+
+```python
+# This name is set in the model provider configuration.
+MODEL_PROVIDER = "nvidia"
+
+# The model ID is from build.nvidia.com.
+MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b"
+
+# We choose this alias to be descriptive for our use case.
+MODEL_ALIAS = "nemotron-nano-v3"
+
+model_configs = [
+    dd.ModelConfig(
+        alias=MODEL_ALIAS,
+        model=MODEL_ID,
+        provider=MODEL_PROVIDER,
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=1.0,
+            max_tokens=2048,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        ),
+    )
+]
+```
+
+### 🏗️ Initialize the Data Designer Config Builder
+
+- The Data Designer config defines the dataset schema and generation process.
+- The config builder provides an intuitive interface for building this configuration.
+- The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+## 🏥 Prepare a seed dataset
+
+- For this notebook, we'll create a synthetic dataset of patient notes.
+- We will _seed_ the generation process with a [symptom-to-diagnosis dataset](https://huggingface.co/datasets/gretelai/symptom_to_diagnosis).
+
+<Tip title="Why use a seed dataset?">
+- Seed datasets let you steer the generation process by providing context that is specific to your use case.
+- Seed datasets are also an excellent way to inject real-world diversity into your synthetic data.
+- During generation, prompt templates can reference any of the seed dataset fields.
+</Tip>
+
+```python
+# Download sample dataset from Github
+import urllib.request
+
+url = "https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv"
+local_filename, _ = urllib.request.urlretrieve(url, "gretelai_symptom_to_diagnosis.csv")
+
+# Seed datasets are passed as reference objects to the config builder.
+seed_source = dd.LocalFileSeedSource(path=local_filename)
+
+config_builder.with_seed_dataset(seed_source)
+```
+
+## 🎨 Designing our synthetic patient notes dataset
+
+- We use concrete config objects for clarity and type safety.
+- **Note**: The prompt template can reference fields from our seed dataset:
+  - `{{ diagnosis }}` - the medical diagnosis from the seed data
+  - `{{ patient_summary }}` - the symptom description from the seed data
+
+```python
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="patient_sampler",
+        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,
+        params=dd.PersonFromFakerSamplerParams(),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="doctor_sampler",
+        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,
+        params=dd.PersonFromFakerSamplerParams(),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="patient_id",
+        sampler_type=dd.SamplerType.UUID,
+        params=dd.UUIDSamplerParams(
+            prefix="PT-",
+            short_form=True,
+            uppercase=True,
+        ),
+    )
+)
+
+config_builder.add_column(
+    dd.ExpressionColumnConfig(
+        name="first_name",
+        expr="{{ patient_sampler.first_name }}",
+    )
+)
+
+config_builder.add_column(
+    dd.ExpressionColumnConfig(
+        name="last_name",
+        expr="{{ patient_sampler.last_name }}",
+    )
+)
+
+config_builder.add_column(
+    dd.ExpressionColumnConfig(
+        name="dob",
+        expr="{{ patient_sampler.birth_date }}",
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="symptom_onset_date",
+        sampler_type=dd.SamplerType.DATETIME,
+        params=dd.DateTimeSamplerParams(start="2024-01-01", end="2024-12-31"),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="date_of_visit",
+        sampler_type=dd.SamplerType.TIMEDELTA,
+        params=dd.TimeDeltaSamplerParams(dt_min=1, dt_max=30, reference_column_name="symptom_onset_date"),
+    )
+)
+
+config_builder.add_column(
+    dd.ExpressionColumnConfig(
+        name="physician",
+        expr="Dr. {{ doctor_sampler.last_name }}",
+    )
+)
+
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="physician_notes",
+        prompt="""\
+You are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},
+who has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.
+The date of today's visit is {{ date_of_visit }}.
+
+{{ patient_summary }}
+
+Write careful notes about your visit with {{ first_name }},
+as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.
+
+Format the notes as a busy doctor might.
+Respond with only the notes, no other text.
+""",
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+data_designer.validate(config_builder)
+```
+
+### 🔁 Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly.
+2. Inspect the results for quality and format issues.
+3. Adjust column configurations, prompts, or parameters as needed.
+4. Re-run the preview until satisfied.
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+### 📊 Analyze the generated data
+
+- Data Designer automatically generates a basic statistical analysis of the generated data.
+- This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+### 🆙 Scale up!
+
+- Happy with your preview data?
+- Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-3")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## ⏭️ Next Steps
+
+Check out the following tutorial to learn more about:
+
+- [Providing images as context](/docs/tutorials/images-as-context)
diff --git a/fern/v0.5.0/pages/tutorials/structured-outputs-notebook.mdx b/fern/v0.5.0/pages/tutorials/structured-outputs-notebook.mdx
new file mode 100644
index 000000000..ceb8f845d
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/structured-outputs-notebook.mdx
@@ -0,0 +1,13 @@
+---
+title: "Structured Outputs (Notebook)"
+description: Data Designer tutorial with executed output.
+---
+
+import { NotebookViewer } from "@/components/NotebookViewer";
+import notebook from "@/components/notebooks/2-structured-outputs-and-jinja-expressions";
+
+<NotebookViewer
+  notebook={notebook}
+  colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb"
+  showOutputs
+/>
diff --git a/fern/v0.5.0/pages/tutorials/structured-outputs.mdx b/fern/v0.5.0/pages/tutorials/structured-outputs.mdx
new file mode 100644
index 000000000..189988c0d
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/structured-outputs.mdx
@@ -0,0 +1,304 @@
+---
+title: "🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions"
+---
+
+<Info title="Interactive Version">
+Run this tutorial interactively in [Google Colab](https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb).
+</Info>
+
+#### 📚 What you'll learn
+
+In this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.
+
+If this is your first time using Data Designer, we recommend starting with the [first tutorial](/docs/tutorials/the-basics) in this series.
+
+### 📦 Import Data Designer
+
+- `data_designer.config` provides access to the configuration API.
+- `DataDesigner` is the main interface for data generation.
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+```
+
+### ⚙️ Initialize the Data Designer interface
+
+- `DataDesigner` is the main object that is used to interface with the library.
+- When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+### 🎛️ Define model configurations
+
+- Each `ModelConfig` defines a model that can be used during the generation process.
+- The "model alias" is used to reference the model in the Data Designer config (as we will see below).
+- The "model provider" is the external service that hosts the model (see the [model config](/docs/concepts/models/default-model-settings) docs for more details).
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.
+
+```python
+# This name is set in the model provider configuration.
+MODEL_PROVIDER = "nvidia"
+
+# The model ID is from build.nvidia.com.
+MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b"
+
+# We choose this alias to be descriptive for our use case.
+MODEL_ALIAS = "nemotron-nano-v3"
+
+model_configs = [
+    dd.ModelConfig(
+        alias=MODEL_ALIAS,
+        model=MODEL_ID,
+        provider=MODEL_PROVIDER,
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=1.0,
+            max_tokens=2048,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        ),
+    )
+]
+```
+
+### 🏗️ Initialize the Data Designer Config Builder
+
+- The Data Designer config defines the dataset schema and generation process.
+- The config builder provides an intuitive interface for building this configuration.
+- The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+### 🧑‍🎨 Designing our data
+
+- We will again create a product review dataset, but this time we will use structured outputs and Jinja expressions.
+- Structured outputs let you specify the exact schema of the data you want to generate.
+- Data Designer supports schemas specified using either JSON schema or Pydantic data models (recommended).
+
+We'll define our structured outputs using [Pydantic](https://docs.pydantic.dev/latest/) data models.
+
+<Tip title="Why Pydantic?">
+- Pydantic models provide better IDE support and type validation.
+- They are more Pythonic than raw JSON schemas.
+- They integrate seamlessly with Data Designer's structured output system.
+</Tip>
+
+```python
+from decimal import Decimal
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+# We define a Product schema so that the name, description, and price are generated
+# in one go, with the types and constraints specified.
+class Product(BaseModel):
+    name: str = Field(description="The name of the product")
+    description: str = Field(description="A description of the product")
+    price: Decimal = Field(description="The price of the product", ge=10, le=1000, decimal_places=2)
+
+
+class ProductReview(BaseModel):
+    rating: int = Field(description="The rating of the product", ge=1, le=5)
+    customer_mood: Literal["irritated", "mad", "happy", "neutral", "excited"] = Field(
+        description="The mood of the customer"
+    )
+    review: str = Field(description="A review of the product")
+```
+
+Next, let's design our product review dataset using a few more tricks compared to the previous notebook.
+
+```python
+# Since we often only want a few attributes from Person objects, we can
+# set drop=True in the column config to drop the column from the final dataset.
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="customer",
+        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,
+        params=dd.PersonFromFakerSamplerParams(),
+        drop=True,
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="product_category",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=[
+                "Electronics",
+                "Clothing",
+                "Home & Kitchen",
+                "Books",
+                "Home Office",
+            ],
+        ),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="product_subcategory",
+        sampler_type=dd.SamplerType.SUBCATEGORY,
+        params=dd.SubcategorySamplerParams(
+            category="product_category",
+            values={
+                "Electronics": ["Smartphones", "Laptops", "Headphones", "Cameras", "Accessories"],
+                "Clothing": ["Men's Clothing", "Women's Clothing", "Winter Coats", "Activewear", "Accessories"],
+                "Home & Kitchen": ["Appliances", "Cookware", "Furniture", "Decor", "Organization"],
+                "Books": ["Fiction", "Non-Fiction", "Self-Help", "Textbooks", "Classics"],
+                "Home Office": ["Desks", "Chairs", "Storage", "Office Supplies", "Lighting"],
+            },
+        ),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="target_age_range",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(values=["18-25", "25-35", "35-50", "50-65", "65+"]),
+    )
+)
+
+# Sampler columns support conditional params, which are used if the condition is met.
+# In this example, we set the review style to rambling if the target age range is 18-25.
+# Note conditional parameters are only supported for Sampler column types.
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="review_style",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=["rambling", "brief", "detailed", "structured with bullet points"],
+            weights=[1, 2, 2, 1],
+        ),
+        conditional_params={
+            "target_age_range == '18-25'": dd.CategorySamplerParams(values=["rambling"]),
+        },
+    )
+)
+
+# Optionally validate that the columns are configured correctly.
+data_designer.validate(config_builder)
+```
+
+Next, we will use more advanced Jinja expressions to create new columns.
+
+Jinja expressions let you:
+
+- Access nested attributes: `{{ customer.first_name }}`
+- Combine values: `{{ customer.first_name }} {{ customer.last_name }}`
+- Use conditional logic: `{% if condition %}...{% endif %}`
+
+```python
+# We can create new columns using Jinja expressions that reference
+# existing columns, including attributes of nested objects.
+config_builder.add_column(
+    dd.ExpressionColumnConfig(name="customer_name", expr="{{ customer.first_name }} {{ customer.last_name }}")
+)
+
+config_builder.add_column(dd.ExpressionColumnConfig(name="customer_age", expr="{{ customer.age }}"))
+
+config_builder.add_column(
+    dd.LLMStructuredColumnConfig(
+        name="product",
+        prompt=(
+            "Create a product in the '{{ product_category }}' category, focusing on products "
+            "related to '{{ product_subcategory }}'. The target age range of the ideal customer is "
+            "{{ target_age_range }} years old. The product should be priced between $10 and $1000."
+        ),
+        output_format=Product,
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+# We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.
+config_builder.add_column(
+    dd.LLMStructuredColumnConfig(
+        name="customer_review",
+        prompt=(
+            "Your task is to write a review for the following product:\n\n"
+            "Product Name: {{ product.name }}\n"
+            "Product Description: {{ product.description }}\n"
+            "Price: {{ product.price }}\n\n"
+            "Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. "
+            "Write the review in a style that is '{{ review_style }}'."
+            "{% if target_age_range == '18-25' %}"
+            "Make sure the review is more informal and conversational.\n"
+            "{% else %}"
+            "Make sure the review is more formal and structured.\n"
+            "{% endif %}"
+            "The review field should contain only the review, no other text."
+        ),
+        output_format=ProductReview,
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+data_designer.validate(config_builder)
+```
+
+### 🔁 Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly.
+2. Inspect the results for quality and format issues.
+3. Adjust column configurations, prompts, or parameters as needed.
+4. Re-run the preview until satisfied.
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+### 📊 Analyze the generated data
+
+- Data Designer automatically generates a basic statistical analysis of the generated data.
+- This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+### 🆙 Scale up!
+
+- Happy with your preview data?
+- Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-2")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## ⏭️ Next Steps
+
+Check out the following tutorials to learn more about:
+
+- [Seeding synthetic data generation with an external dataset](/docs/tutorials/seeding-with-dataset)
+- [Providing images as context](/docs/tutorials/images-as-context)
diff --git a/fern/v0.5.0/pages/tutorials/the-basics-notebook.mdx b/fern/v0.5.0/pages/tutorials/the-basics-notebook.mdx
new file mode 100644
index 000000000..bd0c54db6
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/the-basics-notebook.mdx
@@ -0,0 +1,12 @@
+---
+title: "The Basics (Notebook)"
+description: Data Designer tutorial rendered via NotebookViewer component.
+---
+
+import { NotebookViewer } from "@/components/NotebookViewer";
+import notebookTheBasics from "@/components/notebooks/1-the-basics";
+
+<NotebookViewer
+  notebook={notebookTheBasics}
+  colabUrl="https://colab.research.google.com/github/NVIDIA-NeMo/DataDesigner/blob/main/docs/colab_notebooks/1-the-basics.ipynb"
+/>
diff --git a/fern/v0.5.0/pages/tutorials/the-basics.mdx b/fern/v0.5.0/pages/tutorials/the-basics.mdx
new file mode 100644
index 000000000..4e5ce9667
--- /dev/null
+++ b/fern/v0.5.0/pages/tutorials/the-basics.mdx
@@ -0,0 +1,286 @@
+---
+title: "The Basics"
+description: Learn the fundamentals of Data Designer by generating a simple product review dataset.
+---
+
+This tutorial demonstrates the basics of Data Designer by generating a simple product review dataset.
+
+## What you'll learn
+
+- How to initialize Data Designer
+- How to define model configurations
+- How to use sampler columns for diverse data generation
+- How to use LLM columns with Jinja templating
+- How to preview and iterate on your dataset
+- How to create and analyze generated data
+
+## Import Data Designer
+
+`data_designer.config` provides access to the configuration API. `DataDesigner` is the main interface for data generation.
+
+```python
+import data_designer.config as dd
+from data_designer.interface import DataDesigner
+```
+
+## Initialize the Data Designer interface
+
+`DataDesigner` is the main object responsible for managing the data generation process. When initialized without arguments, the [default model providers](/docs/concepts/models/default-model-settings) are used.
+
+```python
+data_designer = DataDesigner()
+```
+
+## Define model configurations
+
+Each `ModelConfig` defines a model that can be used during the generation process:
+
+- The **model alias** is used to reference the model in the Data Designer config
+- The **model provider** is the external service that hosts the model (see the [model config](/docs/concepts/models/model-configs) docs for more details)
+- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider
+
+```python
+MODEL_PROVIDER = "nvidia"
+MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b"
+MODEL_ALIAS = "nemotron-nano-v3"
+
+model_configs = [
+    dd.ModelConfig(
+        alias=MODEL_ALIAS,
+        model=MODEL_ID,
+        provider=MODEL_PROVIDER,
+        inference_parameters=dd.ChatCompletionInferenceParams(
+            temperature=1.0,
+            top_p=1.0,
+            max_tokens=2048,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        ),
+    )
+]
+```
+
+## Initialize the Data Designer Config Builder
+
+The Data Designer config defines the dataset schema and generation process. The config builder provides an intuitive interface for building this configuration. The list of model configs is provided to the builder at initialization.
+
+```python
+config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs)
+```
+
+## Getting started with sampler columns
+
+Sampler columns offer non-LLM based generation of synthetic data. They are particularly useful for **steering the diversity** of the generated data.
+
+You can view available samplers using the config builder's `info` property:
+
+```python
+config_builder.info.display("samplers")
+```
+
+Let's start designing our product review dataset by adding product category and subcategory columns:
+
+```python
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="product_category",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=[
+                "Electronics",
+                "Clothing",
+                "Home & Kitchen",
+                "Books",
+                "Home Office",
+            ],
+        ),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="product_subcategory",
+        sampler_type=dd.SamplerType.SUBCATEGORY,
+        params=dd.SubcategorySamplerParams(
+            category="product_category",
+            values={
+                "Electronics": [
+                    "Smartphones",
+                    "Laptops",
+                    "Headphones",
+                    "Cameras",
+                    "Accessories",
+                ],
+                "Clothing": [
+                    "Men's Clothing",
+                    "Women's Clothing",
+                    "Winter Coats",
+                    "Activewear",
+                    "Accessories",
+                ],
+                "Home & Kitchen": [
+                    "Appliances",
+                    "Cookware",
+                    "Furniture",
+                    "Decor",
+                    "Organization",
+                ],
+                "Books": [
+                    "Fiction",
+                    "Non-Fiction",
+                    "Self-Help",
+                    "Textbooks",
+                    "Classics",
+                ],
+                "Home Office": [
+                    "Desks",
+                    "Chairs",
+                    "Storage",
+                    "Office Supplies",
+                    "Lighting",
+                ],
+            },
+        ),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="target_age_range",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(values=["18-25", "25-35", "35-50", "50-65", "65+"]),
+    )
+)
+
+# Optionally validate that the columns are configured correctly.
+data_designer.validate(config_builder)
+```
+
+Next, let's add samplers to generate data related to the customer and their review:
+
+```python
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="customer",
+        sampler_type=dd.SamplerType.PERSON_FROM_FAKER,
+        params=dd.PersonFromFakerSamplerParams(age_range=[18, 70], locale="en_US"),
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="number_of_stars",
+        sampler_type=dd.SamplerType.UNIFORM,
+        params=dd.UniformSamplerParams(low=1, high=5),
+        convert_to="int",  # Convert the sampled float to an integer.
+    )
+)
+
+config_builder.add_column(
+    dd.SamplerColumnConfig(
+        name="review_style",
+        sampler_type=dd.SamplerType.CATEGORY,
+        params=dd.CategorySamplerParams(
+            values=["rambling", "brief", "detailed", "structured with bullet points"],
+            weights=[1, 2, 2, 1],
+        ),
+    )
+)
+
+data_designer.validate(config_builder)
+```
+
+## LLM-generated columns
+
+The real power of Data Designer comes from leveraging LLMs to generate text, code, and structured data. When prompting the LLM, we can use Jinja templating to reference other columns in the dataset. Nested JSON fields can be accessed using dot notation.
+
+```python
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="product_name",
+        prompt=(
+            "You are a helpful assistant that generates product names. DO NOT add quotes around the product name.\n\n"
+            "Come up with a creative product name for a product in the '{{ product_category }}' category, focusing "
+            "on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is "
+            "{{ target_age_range }} years old. Respond with only the product name, no other text."
+        ),
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+config_builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="customer_review",
+        prompt=(
+            "You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. "
+            "You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. "
+            "Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. "
+            "The style of the review should be '{{ review_style }}'. "
+            "Respond with only the review, no other text."
+        ),
+        model_alias=MODEL_ALIAS,
+    )
+)
+
+data_designer.validate(config_builder)
+```
+
+## Iteration is key – preview the dataset!
+
+1. Use the `preview` method to generate a sample of records quickly
+2. Inspect the results for quality and format issues
+3. Adjust column configurations, prompts, or parameters as needed
+4. Re-run the preview until satisfied
+
+```python
+preview = data_designer.preview(config_builder, num_records=2)
+```
+
+```python
+# Run this cell multiple times to cycle through the 2 preview records.
+preview.display_sample_record()
+```
+
+```python
+# The preview dataset is available as a pandas DataFrame.
+preview.dataset
+```
+
+## Analyze the generated data
+
+Data Designer automatically generates a basic statistical analysis of the generated data. This analysis is available via the `analysis` property of generation result objects.
+
+```python
+# Print the analysis as a table.
+preview.analysis.to_report()
+```
+
+## Scale up!
+
+Happy with your preview data? Use the `create` method to submit larger Data Designer generation jobs.
+
+```python
+results = data_designer.create(config_builder, num_records=10, dataset_name="tutorial-1")
+```
+
+```python
+# Load the generated dataset as a pandas DataFrame.
+dataset = results.load_dataset()
+
+dataset.head()
+```
+
+```python
+# Load the analysis results into memory.
+analysis = results.load_analysis()
+
+analysis.to_report()
+```
+
+## Next Steps
+
+Now that you've seen the basics of Data Designer, check out the following tutorials to learn more:
+
+- [Structured Outputs](/docs/tutorials/structured-outputs) - Learn about structured outputs and Jinja expressions
+- [Seeding with a Dataset](/docs/tutorials/seeding-with-dataset) - Seed synthetic data generation with an external dataset
+- [Images as Context](/docs/tutorials/images-as-context) - Provide images as context to vision models
+- [Generating Images](/docs/tutorials/generating-images-notebook) - Generate images with Data Designer
diff --git a/fern/versions/v0.3.3.yml b/fern/versions/v0.3.3.yml
new file mode 100644
index 000000000..034d93aa3
--- /dev/null
+++ b/fern/versions/v0.3.3.yml
@@ -0,0 +1,101 @@
+tabs:
+  docs:
+    display-name: Documentation
+    slug: docs
+  api:
+    display-name: API Reference
+    slug: api
+
+navigation:
+  - tab: docs
+    layout:
+      - section: Getting Started
+        contents:
+          - page: Welcome
+            path: ../v0.3.3/pages/index.mdx
+          - page: Installation
+            path: ../v0.3.3/pages/installation.mdx
+          - page: Quick Start
+            path: ../v0.3.3/pages/quick-start.mdx
+          - page: Contributing
+            path: ../v0.3.3/pages/contributing.mdx
+      - section: Concepts
+        contents:
+          - section: Models
+            contents:
+              - page: Default Model Settings
+                path: ../v0.3.3/pages/concepts/models/default-model-settings.mdx
+              - page: Custom Model Settings
+                path: ../v0.3.3/pages/concepts/models/custom-model-settings.mdx
+              - page: Configure with CLI
+                path: ../v0.3.3/pages/concepts/models/configure-with-cli.mdx
+              - page: Model Providers
+                path: ../v0.3.3/pages/concepts/models/model-providers.mdx
+              - page: Model Configs
+                path: ../v0.3.3/pages/concepts/models/model-configs.mdx
+              - page: Inference Parameters
+                path: ../v0.3.3/pages/concepts/models/inference-parameters.mdx
+          - page: Columns
+            path: ../v0.3.3/pages/concepts/columns.mdx
+          - page: Validators
+            path: ../v0.3.3/pages/concepts/validators.mdx
+          - page: Processors
+            path: ../v0.3.3/pages/concepts/processors.mdx
+          - page: Person Sampling
+            path: ../v0.3.3/pages/concepts/person-sampling.mdx
+      - section: Tutorials
+        contents:
+          - page: Overview
+            path: ../v0.3.3/pages/tutorials/overview.mdx
+          - page: The Basics
+            path: ../v0.3.3/pages/tutorials/the-basics.mdx
+          - page: Structured Outputs
+            path: ../v0.3.3/pages/tutorials/structured-outputs.mdx
+          - page: Seeding with a Dataset
+            path: ../v0.3.3/pages/tutorials/seeding-with-dataset.mdx
+          - page: Images as Context
+            path: ../v0.3.3/pages/tutorials/images-as-context.mdx
+      - section: Recipes
+        contents:
+          - page: Recipe Cards
+            path: ../v0.3.3/pages/recipes/index.mdx
+          - section: Code Generation
+            contents:
+              - page: Text to Python
+                path: ../v0.3.3/pages/recipes/code-generation/text-to-python.mdx
+              - page: Text to SQL
+                path: ../v0.3.3/pages/recipes/code-generation/text-to-sql.mdx
+          - section: QA and Chat
+            contents:
+              - page: Product Info QA
+                path: ../v0.3.3/pages/recipes/qa-and-chat/product-info-qa.mdx
+              - page: Multi-Turn Chat
+                path: ../v0.3.3/pages/recipes/qa-and-chat/multi-turn-chat.mdx
+      - section: Plugins
+        contents:
+          - page: Overview
+            path: ../v0.3.3/pages/plugins/overview.mdx
+          - page: Example Plugin
+            path: ../v0.3.3/pages/plugins/example.mdx
+          - page: Available Plugins
+            path: ../v0.3.3/pages/plugins/available.mdx
+  - tab: api
+    layout:
+      - section: API Reference
+        contents:
+          - page: Models
+            path: ../v0.3.3/pages/api-reference/models.mdx
+          - page: Column Configs
+            path: ../v0.3.3/pages/api-reference/column-configs.mdx
+          - page: Config Builder
+            path: ../v0.3.3/pages/api-reference/config-builder.mdx
+          - page: Data Designer Config
+            path: ../v0.3.3/pages/api-reference/data-designer-config.mdx
+          - page: Sampler Params
+            path: ../v0.3.3/pages/api-reference/sampler-params.mdx
+          - page: Validator Params
+            path: ../v0.3.3/pages/api-reference/validator-params.mdx
+          - page: Processors
+            path: ../v0.3.3/pages/api-reference/processors.mdx
+          - page: Analysis
+            path: ../v0.3.3/pages/api-reference/analysis.mdx
diff --git a/fern/versions/v0.4.0.yml b/fern/versions/v0.4.0.yml
new file mode 100644
index 000000000..e6a3fcf13
--- /dev/null
+++ b/fern/versions/v0.4.0.yml
@@ -0,0 +1,12 @@
+tabs:
+  docs:
+    display-name: Documentation
+    slug: docs
+
+navigation:
+  - tab: docs
+    layout:
+      - section: Getting Started
+        contents:
+          - page: Welcome
+            path: ../v0.4.0/pages/index.mdx
diff --git a/fern/versions/v0.5.0.yml b/fern/versions/v0.5.0.yml
new file mode 100644
index 000000000..326c1d0b6
--- /dev/null
+++ b/fern/versions/v0.5.0.yml
@@ -0,0 +1,157 @@
+tabs:
+  docs:
+    display-name: Documentation
+    slug: docs
+  api:
+    display-name: API Reference
+    slug: api
+
+navigation:
+  - tab: docs
+    layout:
+      - section: Getting Started
+        contents:
+          - page: Welcome
+            path: ../v0.5.0/pages/index.mdx
+          - page: Installation
+            path: ../v0.5.0/pages/installation.mdx
+          - page: Quick Start
+            path: ../v0.5.0/pages/quick-start.mdx
+          - page: Contributing
+            path: ../v0.5.0/pages/contributing.mdx
+      - section: Concepts
+        contents:
+          - section: Models
+            contents:
+              - page: Default Model Settings
+                path: ../v0.5.0/pages/concepts/models/default-model-settings.mdx
+              - page: Custom Model Settings
+                path: ../v0.5.0/pages/concepts/models/custom-model-settings.mdx
+              - page: Configure with CLI
+                path: ../v0.5.0/pages/concepts/models/configure-with-cli.mdx
+              - page: Model Providers
+                path: ../v0.5.0/pages/concepts/models/model-providers.mdx
+              - page: Model Configs
+                path: ../v0.5.0/pages/concepts/models/model-configs.mdx
+              - page: Inference Parameters
+                path: ../v0.5.0/pages/concepts/models/inference-parameters.mdx
+          - section: Tool Use & MCP
+            contents:
+              - page: Overview
+                path: ../v0.5.0/pages/concepts/tool-use-and-mcp.mdx
+              - page: MCP Providers
+                path: ../v0.5.0/pages/concepts/mcp/mcp-providers.mdx
+              - page: Tool Configs
+                path: ../v0.5.0/pages/concepts/mcp/tool-configs.mdx
+              - page: Enabling Tools
+                path: ../v0.5.0/pages/concepts/mcp/enabling-tools.mdx
+              - page: Configure via CLI
+                path: ../v0.5.0/pages/concepts/mcp/configure-mcp-cli.mdx
+              - page: Safety and Limits
+                path: ../v0.5.0/pages/concepts/mcp/safety-and-limits.mdx
+          - page: Columns
+            path: ../v0.5.0/pages/concepts/columns.mdx
+          - page: Traces
+            path: ../v0.5.0/pages/concepts/traces.mdx
+          - page: Validators
+            path: ../v0.5.0/pages/concepts/validators.mdx
+          - page: Processors
+            path: ../v0.5.0/pages/concepts/processors.mdx
+          - page: Person Sampling
+            path: ../v0.5.0/pages/concepts/person-sampling.mdx
+          - page: Architecture & Performance
+            path: ../v0.5.0/pages/concepts/architecture-and-performance.mdx
+          - page: Seed Datasets
+            path: ../v0.5.0/pages/concepts/seed-datasets.mdx
+          - page: Custom Columns
+            path: ../v0.5.0/pages/concepts/custom-columns.mdx
+          - page: Deployment Options
+            path: ../v0.5.0/pages/concepts/deployment-options.mdx
+      - section: Tutorials
+        contents:
+          - page: Overview
+            path: ../v0.5.0/pages/tutorials/overview.mdx
+          - page: The Basics
+            path: ../v0.5.0/pages/tutorials/the-basics.mdx
+          - page: The Basics (Notebook)
+            path: ../v0.5.0/pages/tutorials/the-basics-notebook.mdx
+          - page: Structured Outputs
+            path: ../v0.5.0/pages/tutorials/structured-outputs.mdx
+          - page: Structured Outputs (Notebook)
+            path: ../v0.5.0/pages/tutorials/structured-outputs-notebook.mdx
+          - page: Seeding with a Dataset
+            path: ../v0.5.0/pages/tutorials/seeding-with-dataset.mdx
+          - page: Seeding with a Dataset (Notebook)
+            path: ../v0.5.0/pages/tutorials/seeding-with-dataset-notebook.mdx
+          - page: Images as Context
+            path: ../v0.5.0/pages/tutorials/images-as-context.mdx
+          - page: Images as Context (Notebook)
+            path: ../v0.5.0/pages/tutorials/images-as-context-notebook.mdx
+          - page: Generating Images (Notebook)
+            path: ../v0.5.0/pages/tutorials/generating-images-notebook.mdx
+          - page: Image-to-Image Editing (Notebook)
+            path: ../v0.5.0/pages/tutorials/editing-images-notebook.mdx
+      - section: Recipes
+        contents:
+          - page: Recipe Cards
+            path: ../v0.5.0/pages/recipes/index.mdx
+          - section: Code Generation
+            contents:
+              - page: Text to Python
+                path: ../v0.5.0/pages/recipes/code-generation/text-to-python.mdx
+              - page: Text to SQL
+                path: ../v0.5.0/pages/recipes/code-generation/text-to-sql.mdx
+          - section: QA and Chat
+            contents:
+              - page: Product Info QA
+                path: ../v0.5.0/pages/recipes/qa-and-chat/product-info-qa.mdx
+              - page: Multi-Turn Chat
+                path: ../v0.5.0/pages/recipes/qa-and-chat/multi-turn-chat.mdx
+          - section: MCP & Tool Use
+            contents:
+              - page: Basic MCP
+                path: ../v0.5.0/pages/recipes/mcp-and-tooluse/basic-mcp.mdx
+              - page: PDF Document QA
+                path: ../v0.5.0/pages/recipes/mcp-and-tooluse/pdf-qa.mdx
+      - section: Dev Notes
+        contents:
+          - page: Overview
+            path: ../v0.5.0/pages/devnotes/index.mdx
+          - page: Design Principles
+            path: ../v0.5.0/pages/devnotes/design-principles.mdx
+          - page: Graduate-Level Science Reasoning (RQA)
+            path: ../v0.5.0/pages/devnotes/rqa.mdx
+          - page: Deep Research Trajectories
+            path: ../v0.5.0/pages/devnotes/deep-research-trajectories.mdx
+      - section: Plugins
+        contents:
+          - page: Overview
+            path: ../v0.5.0/pages/plugins/overview.mdx
+          - page: Example Plugin
+            path: ../v0.5.0/pages/plugins/example.mdx
+          - page: Available Plugins
+            path: ../v0.5.0/pages/plugins/available.mdx
+  - tab: api
+    layout:
+      - section: API Reference
+        contents:
+          - page: Models
+            path: ../v0.5.0/pages/api-reference/models.mdx
+          - page: Column Configs
+            path: ../v0.5.0/pages/api-reference/column-configs.mdx
+          - page: Config Builder
+            path: ../v0.5.0/pages/api-reference/config-builder.mdx
+          - page: Data Designer Config
+            path: ../v0.5.0/pages/api-reference/data-designer-config.mdx
+          - page: Sampler Params
+            path: ../v0.5.0/pages/api-reference/sampler-params.mdx
+          - page: Validator Params
+            path: ../v0.5.0/pages/api-reference/validator-params.mdx
+          - page: Processors
+            path: ../v0.5.0/pages/api-reference/processors.mdx
+          - page: Analysis
+            path: ../v0.5.0/pages/api-reference/analysis.mdx
+          - page: MCP
+            path: ../v0.5.0/pages/api-reference/mcp.mdx
+          - page: Run Config
+            path: ../v0.5.0/pages/api-reference/run-config.mdx
diff --git a/scripts/fern_migration/convert_admonitions.py b/scripts/fern_migration/convert_admonitions.py
new file mode 100644
index 000000000..bbfab3595
--- /dev/null
+++ b/scripts/fern_migration/convert_admonitions.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Convert MkDocs admonitions to Fern callouts."""
+import re
+import sys
+
+ADMONITION_MAP = {
+    "note": "Note",
+    "tip": "Tip",
+    "info": "Info",
+    "warning": "Warning",
+    "danger": "Warning",
+    "question": "Info",
+    "example": "Info",
+    "abstract": "Note",
+    "success": "Tip",
+    "failure": "Warning",
+    "bug": "Warning",
+}
+
+
+def convert_admonitions(content: str) -> str:
+    """Convert !!! admonitions to <Callout> components."""
+    pattern = r'!!! (\w+)(?: "([^"]*)")?\n((?:    .*\n?)*)'
+
+    def replace(match: re.Match) -> str:
+        admon_type = match.group(1).lower()
+        title = match.group(2) or ""
+        body = match.group(3)
+        # Remove 4-space indent from body
+        body = re.sub(r"^    ", "", body, flags=re.MULTILINE).strip()
+        fern_type = ADMONITION_MAP.get(admon_type, "Note")
+        if title:
+            return f'<{fern_type} title="{title}">\n{body}\n</{fern_type}>\n'
+        return f"<{fern_type}>\n{body}\n</{fern_type}>\n"
+
+    return re.sub(pattern, replace, content)
+
+
+if __name__ == "__main__":
+    content = sys.stdin.read()
+    print(convert_admonitions(content))
diff --git a/scripts/fern_migration/convert_tabs.py b/scripts/fern_migration/convert_tabs.py
new file mode 100644
index 000000000..f5a7dcca3
--- /dev/null
+++ b/scripts/fern_migration/convert_tabs.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Convert MkDocs tabs to Fern Tabs components."""
+import re
+import sys
+
+
+def convert_tabs(content: str) -> str:
+    """Convert === tabs to <Tabs> components."""
+    # Match tab groups
+    pattern = r'((?:=== "([^"]+)"\n((?:    .*\n?)*)\n?)+)'
+
+    def replace_group(match: re.Match) -> str:
+        group = match.group(0)
+        tabs = re.findall(r'=== "([^"]+)"\n((?:    .*\n?)*)', group)
+        result = ["<Tabs>"]
+        for title, body in tabs:
+            body = re.sub(r"^    ", "", body, flags=re.MULTILINE).strip()
+            # Indent the body content properly
+            body_lines = body.split("\n")
+            indented_body = "\n".join(["    " + line if line.strip() else "" for line in body_lines])
+            result.append(f'  <Tab title="{title}">')
+            result.append(indented_body)
+            result.append("  </Tab>")
+        result.append("</Tabs>")
+        return "\n".join(result) + "\n"
+
+    return re.sub(pattern, replace_group, content)
+
+
+if __name__ == "__main__":
+    content = sys.stdin.read()
+    print(convert_tabs(content))
diff --git a/scripts/fern_migration/notebook_to_mdx.py b/scripts/fern_migration/notebook_to_mdx.py
new file mode 100644
index 000000000..88ab37bd0
--- /dev/null
+++ b/scripts/fern_migration/notebook_to_mdx.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Convert Jupyter notebook source (.py format) to MDX."""
+import re
+import sys
+from pathlib import Path
+
+
+def notebook_py_to_mdx(notebook_path: str, colab_url: str, title: str | None = None) -> str:
+    """Convert a Jupyter notebook source file (.py with Jupytext format) to MDX format."""
+    with open(notebook_path) as f:
+        content = f.read()
+
+    # Extract title from the notebook if not provided
+    if title is None:
+        title_match = re.search(r"# # (.+)", content)
+        if title_match:
+            title = title_match.group(1).strip()
+            # Remove emoji if present
+            title = re.sub(r"^[🎨📓🏥]\s*", "", title)
+        else:
+            title = Path(notebook_path).stem.replace("-", " ").title()
+
+    lines = [
+        "---",
+        f"title: {title}",
+        "---",
+        "",
+        '<Info title="Interactive Version">',
+        f"Run this tutorial interactively in [Google Colab]({colab_url}).",
+        "</Info>",
+        "",
+    ]
+
+    # Process the notebook content
+    in_markdown_block = False
+    in_code_block = False
+    current_content = []
+
+    for line in content.split("\n"):
+        # Skip Jupytext header
+        if line.startswith("# ---") or line.startswith("#   "):
+            continue
+
+        # Markdown cell marker
+        if line == "# %% [markdown]":
+            if in_code_block:
+                lines.append("```")
+                lines.append("")
+                in_code_block = False
+            in_markdown_block = True
+            continue
+
+        # Code cell marker
+        if line == "# %%":
+            if in_markdown_block:
+                in_markdown_block = False
+            if in_code_block:
+                lines.append("```")
+                lines.append("")
+            lines.append("```python")
+            in_code_block = True
+            continue
+
+        # Process content
+        if in_markdown_block:
+            # Remove the '# ' prefix from markdown lines
+            if line.startswith("# "):
+                lines.append(line[2:])
+            elif line == "#":
+                lines.append("")
+            else:
+                lines.append(line)
+        elif in_code_block:
+            lines.append(line)
+
+    # Close any open code block
+    if in_code_block:
+        lines.append("```")
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print("Usage: notebook_to_mdx.py <notebook.py> <colab_url> [title]")
+        sys.exit(1)
+    title = sys.argv[3] if len(sys.argv) > 3 else None
+    print(notebook_py_to_mdx(sys.argv[1], sys.argv[2], title))