From ed4c3de1c15439c628da3dfc2ad6b9cc6b8fc5ef Mon Sep 17 00:00:00 2001
From: "Jonathan D. Rhyne" <jonathan@pspdfkit.com>
Date: Sat, 14 Mar 2026 15:40:23 -0400
Subject: [PATCH 1/2] refactor(skill): restructure Nutrient DWS skill as a
 modular router

Align the skill package with Codex-style packaging and turn the top-level skill into a compact router. Split the old monolithic reference into focused DWS cookbooks and add explicit coverage for generation, PDF/A, PDF/UA, optimization, and linearization so the skill stays broad without bloating invocation cost.
---
 README.md                                     |   7 +-
 .../{LICENSE => LICENSE.txt}                  |   0
 nutrient-document-processing/SKILL.md         | 351 ++--------
 .../agents/openai.yaml                        |   6 +
 .../assets/nutrient.svg                       |   4 +
 .../references/REFERENCE.md                   | 608 +-----------------
 .../references/compliance-and-optimization.md | 128 ++++
 .../references/extraction-and-ocr.md          | 100 +++
 .../references/generation-and-conversion.md   | 119 ++++
 .../references/request-basics.md              | 118 ++++
 .../references/security-signing-and-forms.md  | 141 ++++
 .../references/workflow-recipes.md            |  84 +++
 12 files changed, 793 insertions(+), 873 deletions(-)
 rename nutrient-document-processing/{LICENSE => LICENSE.txt} (100%)
 create mode 100644 nutrient-document-processing/agents/openai.yaml
 create mode 100644 nutrient-document-processing/assets/nutrient.svg
 create mode 100644 nutrient-document-processing/references/compliance-and-optimization.md
 create mode 100644 nutrient-document-processing/references/extraction-and-ocr.md
 create mode 100644 nutrient-document-processing/references/generation-and-conversion.md
 create mode 100644 nutrient-document-processing/references/request-basics.md
 create mode 100644 nutrient-document-processing/references/security-signing-and-forms.md
 create mode 100644 nutrient-document-processing/references/workflow-recipes.md
diff --git a/README.md b/README.md
index 6fb3f73..fa715f5 100644
--- a/README.md
+++ b/README.md
@@ -3,13 +3,13 @@
 <p align="center">
   <a href="https://www.nutrient.io/api/"><img src="https://img.shields.io/badge/Nutrient-DWS%20API-blue" alt="Nutrient DWS API"></a>
   <a href="https://www.npmjs.com/package/@nutrient-sdk/dws-mcp-server"><img src="https://img.shields.io/npm/v/@nutrient-sdk/dws-mcp-server" alt="npm version"></a>
-  <a href="LICENSE"><img src="https://img.shields.io/badge/license-Apache--2.0-green" alt="License"></a>
+  <a href="nutrient-document-processing/LICENSE.txt"><img src="https://img.shields.io/badge/license-Apache--2.0-green" alt="License"></a>
   <a href="https://agentskills.io"><img src="https://img.shields.io/badge/Agent%20Skills-compatible-purple" alt="Agent Skills"></a>
 </p>
 
 <p align="center">
   <strong>Give your AI agent PDF superpowers — in one command.</strong><br>
-  Convert, extract, OCR, redact, sign, and fill documents from any coding agent.
+  Generate, convert, extract, OCR, redact, sign, archive, and optimize documents from any coding agent.
 </p>
 
 <p align="center">
@@ -120,6 +120,7 @@ patient-records.pdf (contains PII)
 
 | Capability | Description | Example prompt |
 |------------|-------------|----------------|
+| ✨ **Generate** | Create PDFs from HTML templates, uploaded assets, or remote URLs | *"Generate a PDF proposal from this HTML template"* |
 | 📄 **Convert** | PDF ↔ DOCX/XLSX/PPTX, HTML → PDF, images → PDF | *"Convert report.docx to PDF"* |
 | 📝 **Extract** | Text, tables, and key-value pairs from PDFs | *"Extract all tables from invoice.pdf as Excel"* |
 | 🔍 **OCR** | Multi-language OCR for scanned documents | *"OCR this German scan and extract the text"* |
@@ -127,6 +128,8 @@ patient-records.pdf (contains PII)
 | 💧 **Watermark** | Text or image watermarks with full styling | *"Add a DRAFT watermark to proposal.pdf"* |
 | ✍️ **Sign** | CMS and CAdES digital signatures | *"Digitally sign contract.pdf"* |
 | 📋 **Fill Forms** | Programmatic PDF form filling | *"Fill the tax form with these values…"* |
+| 🗂️ **Compliance** | Convert PDFs for archival or accessibility targets like PDF/A and PDF/UA | *"Convert this PDF to PDF/A-2a"* |
+| ⚡ **Optimize** | Optimize and linearize PDFs for web delivery and download performance | *"Linearize this PDF for fast web viewing"* |
 | 📊 **Credits** | Monitor API usage and balance | *"How many API credits do I have left?"* |
 
 ---
diff --git a/nutrient-document-processing/LICENSE b/nutrient-document-processing/LICENSE.txt
similarity index 100%
rename from nutrient-document-processing/LICENSE
rename to nutrient-document-processing/LICENSE.txt
diff --git a/nutrient-document-processing/SKILL.md b/nutrient-document-processing/SKILL.md
index 1e9a563..fce5dcd 100644
--- a/nutrient-document-processing/SKILL.md
+++ b/nutrient-document-processing/SKILL.md
@@ -1,290 +1,75 @@
 ---
 name: nutrient-document-processing
-description: >-
-  Process documents with the Nutrient DWS API. Use this skill when the user wants to convert documents
-  (PDF, DOCX, XLSX, PPTX, HTML, images), extract text or tables from PDFs, OCR scanned documents,
-  redact sensitive information (PII, SSN, emails, credit cards), add watermarks, digitally sign PDFs,
-  fill PDF forms, or check API credit usage. Activates on keywords: PDF, document, convert, extract,
-  OCR, redact, watermark, sign, merge, compress, form fill, document processing.
-license: Apache-2.0
+description: Use when tasks involve generating PDFs from HTML or URLs, converting Office/images/PDFs, OCRing and extracting content, redacting, watermarking, signing, filling, merging, or producing compliance outputs like PDF/A, PDF/UA, and linearized PDFs with Nutrient DWS; prefer the Nutrient MCP server when it is already configured, otherwise call the API directly.
 metadata:
-  author: nutrient-sdk
-  version: "1.0"
-  homepage: "https://www.nutrient.io/api/"
-  repository: "https://github.com/PSPDFKit-labs/nutrient-agent-skill"
-  compatibility: "Requires Node.js 18+ and internet. Works with Claude Code, Codex CLI, Gemini CLI, OpenCode, Cursor, Windsurf, GitHub Copilot, Amp, or any Agent Skills-compatible product."
+  short-description: Generate, convert, OCR, redact, sign, archive, and optimize documents
 ---
 
 # Nutrient Document Processing
 
-Process, convert, extract, redact, sign, and manipulate documents using the [Nutrient DWS Processor API](https://www.nutrient.io/api/).
-
-## Setup
-
-You need a Nutrient DWS API key. Get one free at <https://dashboard.nutrient.io/sign_up/?product=processor>.
-
-### Option 1: MCP Server (Recommended)
-
-If your agent supports MCP (Model Context Protocol), use the Nutrient DWS MCP Server. It provides all operations as native tools.
-
-**Configure your MCP client** (e.g., `claude_desktop_config.json` or `.mcp.json`):
-
-```json
-{
-  "mcpServers": {
-    "nutrient-dws": {
-      "command": "npx",
-      "args": ["-y", "@nutrient-sdk/dws-mcp-server"],
-      "env": {
-        "NUTRIENT_DWS_API_KEY": "YOUR_API_KEY",
-        "SANDBOX_PATH": "/path/to/working/directory"
-      }
-    }
-  }
-}
-```
-
-Then use the MCP tools directly (e.g., `convert_to_pdf`, `extract_text`, `redact`, etc.).
-
-### Option 2: Direct API (curl)
-
-For agents without MCP support, call the API directly:
-
-```bash
-export NUTRIENT_API_KEY="your_api_key_here"
-```
-
-All requests go to `https://api.nutrient.io/build` as multipart POST with an `instructions` JSON field.
-
-## Operations
-
-### 1. Convert Documents
-
-Convert between PDF, DOCX, XLSX, PPTX, HTML, and image formats.
-
-**HTML to PDF:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "index.html=@index.html" \
-  -F 'instructions={"parts":[{"html":"index.html"}]}' \
-  -o output.pdf
-```
-
-**DOCX to PDF:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.docx=@document.docx" \
-  -F 'instructions={"parts":[{"file":"document.docx"}]}' \
-  -o output.pdf
-```
-
-**PDF to DOCX/XLSX/PPTX:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"output":{"type":"docx"}}' \
-  -o output.docx
-```
-
-**Image to PDF:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "image.jpg=@image.jpg" \
-  -F 'instructions={"parts":[{"file":"image.jpg"}]}' \
-  -o output.pdf
-```
-
-### 2. Extract Text and Data
-
-**Extract plain text:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"output":{"type":"text"}}' \
-  -o output.txt
-```
-
-**Extract tables (as JSON, CSV, or Excel):**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"output":{"type":"xlsx"}}' \
-  -o tables.xlsx
-```
-
-**Extract key-value pairs:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"extraction","strategy":"key-values"}]}' \
-  -o result.json
-```
-
-### 3. OCR Scanned Documents
-
-Apply OCR to scanned PDFs or images, producing searchable PDFs with selectable text.
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "scanned.pdf=@scanned.pdf" \
-  -F 'instructions={"parts":[{"file":"scanned.pdf"}],"actions":[{"type":"ocr","language":"english"}]}' \
-  -o searchable.pdf
-```
-
-Supported languages: `english`, `german`, `french`, `spanish`, `italian`, `portuguese`, `dutch`, `swedish`, `danish`, `norwegian`, `finnish`, `polish`, `czech`, `turkish`, `japanese`, `korean`, `chinese-simplified`, `chinese-traditional`, `arabic`, `hebrew`, `thai`, `hindi`, `russian`, and more.
-
-### 4. Redact Sensitive Information
-
-**Pattern-based redaction** (preset patterns):
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"redaction","strategy":"preset","preset":"social-security-number"}]}' \
-  -o redacted.pdf
-```
-
-Available presets: `social-security-number`, `credit-card-number`, `email-address`, `north-american-phone-number`, `international-phone-number`, `date`, `url`, `ipv4`, `ipv6`, `mac-address`, `us-zip-code`, `vin`, `time`.
-
-**Regex-based redaction:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"redaction","strategy":"regex","regex":"\\b[A-Z]{2}\\d{6}\\b"}]}' \
-  -o redacted.pdf
-```
-
-**AI-powered PII redaction:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"ai_redaction","criteria":"All personally identifiable information"}]}' \
-  -o redacted.pdf
-```
-
-The `criteria` field accepts natural language (e.g., "Names and phone numbers", "Protected health information", "Financial account numbers").
-
-### 5. Add Watermarks
-
-**Text watermark:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"watermark","text":"CONFIDENTIAL","fontSize":48,"fontColor":"#FF0000","opacity":0.5,"rotation":45,"width":"50%","height":"50%"}]}' \
-  -o watermarked.pdf
-```
-
-**Image watermark:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F "logo.png=@logo.png" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"watermark","imagePath":"logo.png","width":"30%","height":"30%","opacity":0.3}]}' \
-  -o watermarked.pdf
-```
-
-### 6. Digital Signatures
-
-**Sign a PDF with CMS signature:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"sign","signatureType":"cms","signerName":"John Doe","reason":"Approval","location":"New York"}]}' \
-  -o signed.pdf
-```
-
-**Sign with CAdES-B-LT (long-term validation):**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"sign","signatureType":"cades","cadesLevel":"b-lt","signerName":"Jane Smith"}]}' \
-  -o signed.pdf
-```
-
-### 7. Form Filling (Instant JSON)
-
-Fill PDF form fields using Instant JSON format:
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "form.pdf=@form.pdf" \
-  -F 'instructions={"parts":[{"file":"form.pdf"}],"actions":[{"type":"fillForm","fields":[{"name":"firstName","value":"John"},{"name":"lastName","value":"Doe"},{"name":"email","value":"john@example.com"}]}]}' \
-  -o filled.pdf
-```
-
-### 8. Merge and Split PDFs
-
-**Merge multiple PDFs:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "doc1.pdf=@doc1.pdf" \
-  -F "doc2.pdf=@doc2.pdf" \
-  -F 'instructions={"parts":[{"file":"doc1.pdf"},{"file":"doc2.pdf"}]}' \
-  -o merged.pdf
-```
-
-**Extract specific pages:**
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf","pages":{"start":0,"end":4}}]}' \
-  -o pages1-5.pdf
-```
-
-### 9. Render PDF Pages as Images
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf","pages":{"start":0,"end":0}}],"output":{"type":"png","dpi":300}}' \
-  -o page1.png
-```
-
-### 10. Check Credits
-
-```bash
-curl -X GET https://api.nutrient.io/credits \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY"
-```
-
-## Best Practices
-
-1. **Use the MCP server** when your agent supports it — it handles file I/O, error handling, and sandboxing automatically.
-2. **Set `SANDBOX_PATH`** to restrict file access to a specific directory.
-3. **Check credit balance** before batch operations to avoid interruptions.
-4. **Use AI redaction** for complex PII detection; use preset/regex redaction for known patterns (faster, cheaper).
-5. **Chain operations** — the API supports multiple actions in a single call (e.g., OCR then redact).
-
-## Troubleshooting
-
-| Issue | Solution |
-|-------|----------|
-| 401 Unauthorized | Check your API key is valid and has credits |
-| 413 Payload Too Large | Files must be under 100 MB |
-| Slow AI redaction | AI analysis takes 60–120 seconds; this is normal |
-| OCR quality poor | Try a different language parameter or improve scan quality |
-| Missing text in extraction | Run OCR first on scanned documents |
-
-## More Information
-
-- [Full API reference](references/REFERENCE.md) — Detailed endpoints, parameters, and error codes
-- [API Playground](https://dashboard.nutrient.io/processor-api/playground/) — Interactive API testing
-- [API Documentation](https://www.nutrient.io/guides/dws-processor/) — Official guides
-- [MCP Server repo](https://github.com/PSPDFKit/nutrient-dws-mcp-server) — Source code and issues
+Use Nutrient DWS for managed document workflows where fidelity, compliance, or multi-step processing matters more than local-tool convenience.
+
+## When to use
+- Generate PDFs from HTML templates, uploaded assets, or remote URLs.
+- Convert Office, HTML, image, and PDF files between supported formats.
+- OCR scans and extract text, tables, or key-value pairs.
+- Redact PII, watermark, sign, fill forms, merge, split, rotate, flatten, or encrypt PDFs.
+- Produce delivery targets like PDF/A, PDF/UA, optimized PDFs, or linearized PDFs.
+- Check credits before large, batch, or AI-heavy runs.
+
+## Tool preference
+1. Prefer the Nutrient MCP server when it is already configured. It handles file I/O and reduces multipart-request boilerplate.
+2. Fall back to direct API calls when MCP is unavailable or the workflow is easier to express as an explicit payload.
+3. Use local PDF utilities only for lightweight inspection. Use Nutrient when output fidelity or compliance matters.
+
+## Request model
+- Most workflows use `POST https://api.nutrient.io/build`.
+- Use multipart requests when uploading local files. Use JSON requests when all inputs are remote URLs.
+- `parts` describes source files, HTML inputs, remote URLs, page ranges, and passwords.
+- `actions` applies ordered transformations such as OCR, redaction, watermarking, signing, flattening, or rotation.
+- `output` selects the final format and delivery options such as `pdf`, `text`, `docx`, `png`, `pdfa`, `pdfua`, or optimized PDF output.
+- Dedicated endpoints also exist for some tools such as PDF/UA auto-tagging, but `/build` is the default mental model.
+
+Minimal direct-call template:
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}]}' \
+  -o result.pdf
+```
+
+## Workflow
+1. Identify the source type and the required final artifact.
+2. Decide whether the job is generation, conversion, extraction, security/compliance, or a chained workflow.
+3. Express the full pipeline in one payload when the ordering is clear and the artifact should stay in-memory on the server.
+4. Save outputs with stable suffixes such as `-ocr`, `-redacted`, `-pdfa`, `-pdfua`, or `-linearized`.
+
+## Decision rules
+- If you control the source markup, prefer HTML generation over browser print workflows.
+- Use remote `file.url` inputs when the source already lives at a stable URL and you want to avoid local uploads.
+- Use `output.type` for conversion and finalization targets. Use `actions` for transformations.
+- OCR before text extraction, key-value extraction, or semantic redaction on scans.
+- Prefer preset or regex redaction when the target is explicit. Use AI redaction only for contextual or natural-language requests.
+- Treat PDF/A and PDF/UA as compliance targets, not cosmetic export formats. Choose the target up front and validate final artifacts when requirements are contractual.
+- For PDF/UA, clean born-digital inputs and structured HTML usually tag better than rasterized or flattened source PDFs.
+- For delivery optimization, linearize or optimize unsigned output artifacts instead of mutating already signed files.
+- When the user asks for multiple steps, keep destructive or final steps late in the sequence. Use the workflow recipes when ordering is ambiguous.
+
+## Reference map
+Read only what you need:
+
+- `references/request-basics.md` -> endpoint model, auth, multipart vs JSON, credits, limits, and errors
+- `references/generation-and-conversion.md` -> HTML/URL generation and format conversion
+- `references/extraction-and-ocr.md` -> OCR, text extraction, tables, and key-value workflows
+- `references/security-signing-and-forms.md` -> redaction, watermarking, signatures, forms, and passwords
+- `references/compliance-and-optimization.md` -> PDF/A, PDF/UA, optimization, and linearization
+- `references/workflow-recipes.md` -> end-to-end sequencing patterns for common business document workflows
+
+## References
+- [Reference index](references/REFERENCE.md)
+- [API docs](https://www.nutrient.io/api/documentation/)
+- [Processor API overview](https://www.nutrient.io/api/processor-api/)
+- [API playground](https://dashboard.nutrient.io/processor-api/playground/)
+- [MCP server](https://github.com/PSPDFKit/nutrient-dws-mcp-server)
diff --git a/nutrient-document-processing/agents/openai.yaml b/nutrient-document-processing/agents/openai.yaml
new file mode 100644
index 0000000..8d03fef
--- /dev/null
+++ b/nutrient-document-processing/agents/openai.yaml
@@ -0,0 +1,6 @@
+interface:
+  display_name: "Nutrient Document Processing"
+  short_description: "Generate, convert, OCR, redact, sign, archive, and optimize documents"
+  icon_small: "./assets/nutrient.svg"
+  icon_large: "./assets/nutrient.svg"
+  default_prompt: "Use $nutrient-document-processing to generate, convert, OCR, extract, redact, sign, fill, archive, optimize, or linearize this document, then return the output files and a concise summary."
diff --git a/nutrient-document-processing/assets/nutrient.svg b/nutrient-document-processing/assets/nutrient.svg
new file mode 100644
index 0000000..3f0d8f5
--- /dev/null
+++ b/nutrient-document-processing/assets/nutrient.svg
@@ -0,0 +1,4 @@
+<svg width="48" height="48" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect width="48" height="48" rx="8" fill="white"/>
+<path d="M9.83333 26.8471C8.26792 26.8471 7 25.5729 7 23.9998C7 22.4266 8.26792 21.1524 9.83333 21.1524C11.3987 21.1524 12.6667 22.4266 12.6667 23.9998C12.6667 25.5729 11.3987 26.8471 9.83333 26.8471ZM38.1667 21.1524C36.6012 21.1524 35.3333 22.4266 35.3333 23.9998C35.3333 25.5729 36.6012 26.8471 38.1667 26.8471C39.7321 26.8471 41 25.5729 41 23.9998C41 22.4266 39.7321 21.1524 38.1667 21.1524ZM11.3265 30.9701C10.128 31.9809 9.97075 33.7776 10.9766 34.982C11.9824 36.1865 13.7702 36.3445 14.9688 35.3337C16.1672 34.3229 16.3245 32.5262 15.3187 31.3217C14.3128 30.1173 12.525 29.9593 11.3265 30.9701ZM36.6735 17.0294C37.872 16.0186 38.0292 14.2219 37.0234 13.0175C36.0176 11.8131 34.2297 11.6551 33.0312 12.6659C31.8328 13.6767 31.6755 15.4734 32.6813 16.6778C33.6872 17.8822 35.475 18.0402 36.6735 17.0294ZM14.9688 12.6673C13.7702 11.6565 11.9824 11.8131 10.9766 13.0189C9.97075 14.2248 10.1266 16.02 11.3265 17.0309C12.5264 18.0417 14.3128 17.8851 15.3187 16.6792C16.3245 15.4734 16.1687 13.6781 14.9688 12.6673ZM36.6735 30.9701C35.475 29.9593 33.6872 30.1159 32.6813 31.3217C31.6755 32.5262 31.8313 34.3229 33.0312 35.3337C34.2297 36.3445 36.0176 36.1879 37.0234 34.982C38.0292 33.7776 37.8734 31.9809 36.6735 30.9701ZM29.4386 24.8682C28.2401 23.8574 26.4523 24.014 25.4464 25.2199C24.4406 26.4257 24.5964 28.221 25.7963 29.2318C26.9962 30.2426 28.7827 30.086 29.7885 28.8801C30.7943 27.6743 30.6385 25.879 29.4386 24.8682ZM22.2037 18.7677C21.0052 17.7569 19.2173 17.9135 18.2115 19.1194C17.2057 20.3253 17.3615 22.1205 18.5614 23.1313C19.7613 24.1421 21.5478 23.9855 22.5536 22.7797C23.5594 21.5738 23.4036 19.7786 22.2037 18.7677Z" fill="#1A1414"/>
+</svg>
diff --git a/nutrient-document-processing/references/REFERENCE.md b/nutrient-document-processing/references/REFERENCE.md
index 0ac9b2b..ce0cdb4 100644
--- a/nutrient-document-processing/references/REFERENCE.md
+++ b/nutrient-document-processing/references/REFERENCE.md
@@ -1,598 +1,30 @@
-# Nutrient DWS Processor API Reference
+# Nutrient DWS Reference Index
 
-Complete API reference for the Nutrient Document Web Services (DWS) Processor API.
+Use this folder as a modular cookbook. Keep `SKILL.md` loaded as the router and open only the reference file that matches the task at hand.
 
-## Base URL
+## Reference map
 
-```
-https://api.nutrient.io
-```
+- [request-basics.md](request-basics.md)
+  Endpoint model, authentication, multipart vs JSON requests, credits, limits, and common errors.
 
-## Authentication
+- [generation-and-conversion.md](generation-and-conversion.md)
+  PDF generation from HTML or URLs, Office/image conversion, and output-format selection.
 
-All requests require a Bearer token in the `Authorization` header:
+- [extraction-and-ocr.md](extraction-and-ocr.md)
+  OCR, text extraction, table extraction, and key-value workflows.
 
-```
-Authorization: Bearer YOUR_API_KEY
-```
+- [security-signing-and-forms.md](security-signing-and-forms.md)
+  Redaction, watermarking, signing, form fill, and password-protected PDFs.
 
-Get your API key at <https://dashboard.nutrient.io/sign_up/?product=processor>.
+- [compliance-and-optimization.md](compliance-and-optimization.md)
+  PDF/A archival output, PDF/UA auto-tagging, PDF optimization, and linearization.
 
-## Main Endpoint
+- [workflow-recipes.md](workflow-recipes.md)
+  Sequencing guidance for common multi-step business document workflows.
 
-### POST /build
+## Official docs
 
-The primary endpoint for all document processing operations. Accepts multipart form data with file uploads and a JSON `instructions` field.
-
-**Request format:**
-```
-POST https://api.nutrient.io/build
-Content-Type: multipart/form-data
-Authorization: Bearer YOUR_API_KEY
-
-Form fields:
-  - One or more file uploads (name must match what's referenced in instructions)
-  - instructions: JSON object defining the processing pipeline
-```
-
-**Instructions JSON structure:**
-```json
-{
-  "parts": [
-    {
-      "file": "filename.pdf",
-      "pages": { "start": 0, "end": -1 },
-      "password": "optional-password"
-    }
-  ],
-  "actions": [
-    { "type": "action_type", ...action_params }
-  ],
-  "output": {
-    "type": "pdf",
-    "owner_password": "optional",
-    "user_password": "optional"
-  }
-}
-```
-
-## Document Conversion
-
-### Office to PDF
-
-Convert DOCX, XLSX, PPTX to PDF.
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.docx=@document.docx" \
-  -F 'instructions={"parts":[{"file":"document.docx"}]}' \
-  -o output.pdf
-```
-
-Supported input formats: `.docx`, `.xlsx`, `.pptx`, `.doc`, `.xls`, `.ppt`, `.odt`, `.ods`, `.odp`, `.rtf`.
-
-### HTML to PDF
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "index.html=@index.html" \
-  -F 'instructions={"parts":[{"html":"index.html"}]}' \
-  -o output.pdf
-```
-
-**With layout options:**
-```json
-{
-  "parts": [{ "html": "index.html" }],
-  "output": {
-    "type": "pdf",
-    "layout": {
-      "orientation": "landscape",
-      "size": "A4",
-      "margin": { "top": 20, "bottom": 20, "left": 15, "right": 15 }
-    }
-  }
-}
-```
-
-Supported sizes: `A0`–`A8`, `Letter`, `Legal`, or custom `{ "width": N, "height": N }` in points.
-
-### Image to PDF
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "photo.jpg=@photo.jpg" \
-  -F 'instructions={"parts":[{"file":"photo.jpg"}]}' \
-  -o output.pdf
-```
-
-Supported image formats: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`, `.tiff`, `.bmp`.
-
-### PDF to Office
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"output":{"type":"docx"}}' \
-  -o output.docx
-```
-
-Supported output types: `docx`, `xlsx`, `pptx`.
-
-### PDF to Image
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf","pages":{"start":0,"end":0}}],"output":{"type":"png","dpi":150}}' \
-  -o page.png
-```
-
-**Output parameters:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `type` | string | `pdf` | `png`, `jpeg`, `webp` |
-| `dpi` | number | 150 | Resolution (72–600) |
-| `width` | number | — | Output width in pixels |
-| `height` | number | — | Output height in pixels |
-
-## Text and Data Extraction
-
-### Extract Text
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"output":{"type":"text"}}' \
-  -o extracted.txt
-```
-
-### Extract Tables
-
-Export tables to Excel, XML, JSON, or CSV:
-
-```bash
-# To Excel
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"extraction","strategy":"tables"}],"output":{"type":"xlsx"}}' \
-  -o tables.xlsx
-```
-
-### Extract Key-Value Pairs
-
-Detect structured data like names, dates, addresses, phone numbers:
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"extraction","strategy":"key-values"}],"output":{"type":"json"}}' \
-  -o pairs.json
-```
-
-## OCR (Optical Character Recognition)
-
-### Basic OCR
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "scanned.pdf=@scanned.pdf" \
-  -F 'instructions={"parts":[{"file":"scanned.pdf"}],"actions":[{"type":"ocr","language":"english"}]}' \
-  -o searchable.pdf
-```
-
-### Multi-Language OCR
-
-```json
-{
-  "parts": [{ "file": "scanned.pdf" }],
-  "actions": [{ "type": "ocr", "language": ["english", "german", "french"] }]
-}
-```
-
-### Supported OCR Languages
-
-| Language | Code | Language | Code |
-|----------|------|----------|------|
-| English | `english` | Japanese | `japanese` |
-| German | `german` | Korean | `korean` |
-| French | `french` | Chinese (Simplified) | `chinese-simplified` |
-| Spanish | `spanish` | Chinese (Traditional) | `chinese-traditional` |
-| Italian | `italian` | Arabic | `arabic` |
-| Portuguese | `portuguese` | Hebrew | `hebrew` |
-| Dutch | `dutch` | Thai | `thai` |
-| Swedish | `swedish` | Hindi | `hindi` |
-| Danish | `danish` | Russian | `russian` |
-| Norwegian | `norwegian` | Polish | `polish` |
-| Finnish | `finnish` | Czech | `czech` |
-| Turkish | `turkish` | | |
-
-### OCR on Images
-
-Also works on standalone images (JPEG, PNG, TIFF):
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "scan.jpg=@scan.jpg" \
-  -F 'instructions={"parts":[{"file":"scan.jpg"}],"actions":[{"type":"ocr","language":"english"}]}' \
-  -o searchable.pdf
-```
-
-## Redaction
-
-### Preset Pattern Redaction
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"redaction","strategy":"preset","preset":"email-address"}]}' \
-  -o redacted.pdf
-```
-
-**Available presets:**
-
-| Preset | Matches |
-|--------|---------|
-| `social-security-number` | US SSNs (XXX-XX-XXXX) |
-| `credit-card-number` | Visa, MasterCard, Amex, etc. |
-| `email-address` | Email addresses |
-| `north-american-phone-number` | US/Canada phone numbers |
-| `international-phone-number` | International format numbers |
-| `date` | Common date formats |
-| `url` | Web URLs |
-| `ipv4` | IPv4 addresses |
-| `ipv6` | IPv6 addresses |
-| `mac-address` | MAC addresses |
-| `us-zip-code` | US ZIP codes |
-| `vin` | Vehicle identification numbers |
-| `time` | Time values |
-
-### Regex Redaction
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"redaction","strategy":"regex","regex":"\\b\\d{3}-\\d{2}-\\d{4}\\b","caseSensitive":false}]}' \
-  -o redacted.pdf
-```
-
-### Text-Match Redaction
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"redaction","strategy":"text","text":"CLASSIFIED","caseSensitive":false}]}' \
-  -o redacted.pdf
-```
-
-### AI-Powered Redaction
-
-Uses AI to detect and redact PII based on natural-language criteria. Takes 60–120 seconds.
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"ai_redaction","criteria":"All personally identifiable information"}]}' \
-  -o redacted.pdf
-```
-
-**Example criteria:**
-- `"All personally identifiable information"`
-- `"Names, email addresses, and phone numbers"`
-- `"Protected health information (PHI)"`
-- `"Social security numbers and credit card numbers"`
-- `"Financial account numbers and routing numbers"`
-
-**Redaction action parameters:**
-
-| Parameter | Type | Description |
-|-----------|------|-------------|
-| `strategy` | string | `preset`, `regex`, `text`, or (for AI) use `type: ai_redaction` |
-| `preset` | string | Preset pattern name (when strategy=preset) |
-| `regex` | string | Regular expression (when strategy=regex) |
-| `text` | string | Exact text to match (when strategy=text) |
-| `caseSensitive` | boolean | Default: true for regex, false for text |
-| `startPage` | integer | Start page index (0-based) |
-| `pageLimit` | integer | Number of pages to process |
-| `includeAnnotations` | boolean | Also redact matching annotations (default: true) |
-| `criteria` | string | Natural language criteria (for ai_redaction) |
-
-## Watermarking
-
-### Text Watermark
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"watermark","watermarkType":"text","text":"DRAFT","fontSize":72,"fontColor":"#FF0000","opacity":0.3,"rotation":45,"width":"50%","height":"50%"}]}' \
-  -o watermarked.pdf
-```
-
-### Image Watermark
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F "logo.png=@logo.png" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"watermark","watermarkType":"image","imagePath":"logo.png","width":"25%","height":"25%","opacity":0.5}]}' \
-  -o watermarked.pdf
-```
-
-**Watermark parameters:**
-
-| Parameter | Type | Description |
-|-----------|------|-------------|
-| `watermarkType` | string | `text` or `image` |
-| `text` | string | Watermark text (when type=text) |
-| `imagePath` | string | Image filename (when type=image) |
-| `fontSize` | number | Font size in points |
-| `fontColor` | string | Hex color (e.g., `#FF0000`) |
-| `opacity` | number | 0–1 (default: 0.7) |
-| `rotation` | number | Degrees counter-clockwise |
-| `width` | string/number | Width in points or percentage (e.g., `"50%"`) |
-| `height` | string/number | Height in points or percentage |
-
-## Digital Signatures
-
-### CMS Signature
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"sign","signatureType":"cms","signerName":"John Doe","reason":"Document approval","location":"San Francisco"}]}' \
-  -o signed.pdf
-```
-
-### CAdES Signature
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"sign","signatureType":"cades","cadesLevel":"b-lt","signerName":"Jane Smith"}]}' \
-  -o signed.pdf
-```
-
-### Visible Signature
-
-Add a visual signature appearance on a specific page:
-
-```json
-{
-  "parts": [{ "file": "document.pdf" }],
-  "actions": [{
-    "type": "sign",
-    "signatureType": "cms",
-    "signerName": "John Doe",
-    "pageIndex": 0,
-    "rect": [50, 700, 200, 50]
-  }]
-}
-```
-
-**Signature parameters:**
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `signatureType` | string | `cms` | `cms` or `cades` |
-| `cadesLevel` | string | `b-lt` | `b-b`, `b-t`, or `b-lt` (CAdES only) |
-| `signerName` | string | — | Signer's name |
-| `reason` | string | — | Reason for signing |
-| `location` | string | — | Signing location |
-| `pageIndex` | integer | — | Page for visible signature (0-based). Omit for invisible. |
-| `rect` | array | — | `[left, top, width, height]` in PDF points |
-| `flatten` | boolean | false | Flatten document before signing |
-| `graphicImagePath` | string | — | Path to signature graphic |
-| `watermarkImagePath` | string | — | Path to watermark image overlay |
-
-## PDF Manipulation
-
-### Merge PDFs
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "doc1.pdf=@doc1.pdf" \
-  -F "doc2.pdf=@doc2.pdf" \
-  -F "doc3.pdf=@doc3.pdf" \
-  -F 'instructions={"parts":[{"file":"doc1.pdf"},{"file":"doc2.pdf"},{"file":"doc3.pdf"}]}' \
-  -o merged.pdf
-```
-
-### Extract Page Ranges
-
-```bash
-curl -X POST https://api.nutrient.io/build \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
-  -F "document.pdf=@document.pdf" \
-  -F 'instructions={"parts":[{"file":"document.pdf","pages":{"start":0,"end":4}}]}' \
-  -o first-5-pages.pdf
-```
-
-Pages use 0-based indexing. `"end": -1` means the last page.
-
-### Flatten PDF
-
-Remove interactive elements (form fields, annotations):
-
-```json
-{
-  "parts": [{ "file": "document.pdf" }],
-  "actions": [{ "type": "flatten" }]
-}
-```
-
-### Rotate Pages
-
-```json
-{
-  "parts": [{ "file": "document.pdf" }],
-  "actions": [{ "type": "rotate", "rotation": 90, "pages": [0, 1, 2] }]
-}
-```
-
-## Form Filling
-
-### Fill Form Fields
-
-```json
-{
-  "parts": [{ "file": "form.pdf" }],
-  "actions": [{
-    "type": "fillForm",
-    "fields": [
-      { "name": "firstName", "value": "John" },
-      { "name": "lastName", "value": "Doe" },
-      { "name": "email", "value": "john@example.com" },
-      { "name": "agree", "value": true }
-    ]
-  }]
-}
-```
-
-## Password Protection
-
-### Encrypt a PDF
-
-```json
-{
-  "parts": [{ "file": "document.pdf" }],
-  "output": {
-    "type": "pdf",
-    "owner_password": "owner123",
-    "user_password": "user456"
-  }
-}
-```
-
-### Open a Password-Protected PDF
-
-```json
-{
-  "parts": [{ "file": "protected.pdf", "password": "user456" }]
-}
-```
-
-## Credit Management
-
-### Check Balance
-
-```bash
-curl -X GET https://api.nutrient.io/credits \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY"
-```
-
-**Response:**
-```json
-{
-  "remaining": 9500,
-  "total": 10000,
-  "usage": {
-    "period": "week",
-    "used": 500
-  }
-}
-```
-
-### Check Usage by Operation
-
-```bash
-curl -X GET "https://api.nutrient.io/credits/usage?period=month" \
-  -H "Authorization: Bearer $NUTRIENT_API_KEY"
-```
-
-## Chaining Actions
-
-Multiple actions can be chained in a single API call:
-
-```json
-{
-  "parts": [{ "file": "scanned.pdf" }],
-  "actions": [
-    { "type": "ocr", "language": "english" },
-    { "type": "redaction", "strategy": "preset", "preset": "social-security-number" },
-    { "type": "redaction", "strategy": "preset", "preset": "email-address" },
-    { "type": "watermark", "watermarkType": "text", "text": "REDACTED", "fontSize": 36, "opacity": 0.2, "rotation": 45, "width": "50%", "height": "50%" },
-    { "type": "sign", "signatureType": "cms", "signerName": "Compliance Dept" }
-  ]
-}
-```
-
-Actions execute in order. This example: OCR → redact SSNs → redact emails → add watermark → sign.
-
-## Error Handling
-
-### HTTP Status Codes
-
-| Code | Meaning |
-|------|---------|
-| 200 | Success — response body is the output file |
-| 400 | Bad request — invalid instructions or missing required fields |
-| 401 | Unauthorized — invalid or missing API key |
-| 402 | Payment required — insufficient credits |
-| 413 | Payload too large — file exceeds 100 MB limit |
-| 415 | Unsupported media type — unsupported input format |
-| 422 | Unprocessable entity — valid format but cannot process |
-| 429 | Rate limited — too many requests |
-| 500 | Server error — retry with exponential backoff |
-
-### Error Response Format
-
-```json
-{
-  "error": {
-    "code": "invalid_instructions",
-    "message": "The 'parts' field is required",
-    "details": {}
-  }
-}
-```
-
-### Common Errors and Solutions
-
-| Error | Cause | Solution |
-|-------|-------|----------|
-| `file_not_found` | Referenced file not in upload | Ensure filename in instructions matches upload field name |
-| `unsupported_format` | Input format not recognized | Check supported formats list |
-| `password_required` | PDF is encrypted | Add `"password"` to the part |
-| `insufficient_credits` | Account out of credits | Top up at dashboard.nutrient.io |
-| `ocr_language_invalid` | Unsupported OCR language | Check supported languages table |
-
-## Rate Limits
-
-- Free tier: 100 requests/minute
-- Paid tiers: Higher limits based on plan
-- Use `429` status code to detect rate limiting
-- Implement exponential backoff for retries
-
-## File Size Limits
-
-- Maximum input file: 100 MB
-- Maximum total upload: 500 MB per request
-- Recommended: Keep files under 50 MB for fastest processing
-
-## Additional Resources
-
-- [API Playground](https://dashboard.nutrient.io/processor-api/playground/) — Interactive testing
-- [API Documentation](https://www.nutrient.io/guides/dws-processor/) — Official guides
-- [MCP Server](https://github.com/PSPDFKit/nutrient-dws-mcp-server) — For AI agent integration
-- [npm Package](https://www.npmjs.com/package/@nutrient-sdk/dws-mcp-server) — MCP server on npm
-- [Dashboard](https://dashboard.nutrient.io/) — Manage API keys and credits
-- [Sign Up](https://dashboard.nutrient.io/sign_up/?product=processor) — Get a free API key
+- [Processor API overview](https://www.nutrient.io/api/processor-api/)
+- [General documentation](https://www.nutrient.io/api/documentation/)
+- [Tools and APIs](https://www.nutrient.io/api/documentation/tools-and-api/)
+- [API playground](https://dashboard.nutrient.io/processor-api/playground/)
diff --git a/nutrient-document-processing/references/compliance-and-optimization.md b/nutrient-document-processing/references/compliance-and-optimization.md
new file mode 100644
index 0000000..b11454d
--- /dev/null
+++ b/nutrient-document-processing/references/compliance-and-optimization.md
@@ -0,0 +1,128 @@
+# Compliance and Optimization
+
+Use these patterns when the required output is archival, accessible, or tuned for delivery performance.
+
+## PDF/A archival conversion
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document=@document.pdf \
+  -F 'instructions={
+    "parts": [
+      {
+        "file": "document"
+      }
+    ],
+    "output": {
+      "type": "pdfa",
+      "conformance": "pdfa-2a",
+      "vectorization": true,
+      "rasterization": true
+    }
+  }' \
+  -o result.pdf
+```
+
+Supported PDF/A targets include:
+
+- `pdfa-1a`, `pdfa-1b`
+- `pdfa-2a`, `pdfa-2u`, `pdfa-2b`
+- `pdfa-3a`, `pdfa-3u`, `pdfa-3b`
+- `pdfa-4`, `pdfa-4e`, `pdfa-4f`
+
+### PDF/A caveat
+
+To achieve conformance, conversion may vectorize or rasterize content. That can remove live text and font information, so later OCR may be needed.
+
+## PDF/UA auto-tagging
+
+Dedicated endpoint:
+
+```bash
+curl -X POST https://api.nutrient.io/processor/pdfua \
+  -H "Content-Type: application/pdf" \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  --data-binary @document.pdf \
+  -o result.pdf
+```
+
+Equivalent `/build` workflow:
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document=@document.pdf \
+  -F 'instructions={
+    "parts": [
+      {
+        "file": "document"
+      }
+    ],
+    "output": {
+      "type": "pdfua"
+    }
+  }' \
+  -o result.pdf
+```
+
+### PDF/UA rules
+
+- PDF/UA is an accessibility target, not just a format conversion.
+- Clean born-digital PDFs generally tag better than rasterized or flattened inputs.
+- Structured HTML sources also tend to produce better accessibility outcomes than image-only content.
+- Validate final outputs with your required checker when accessibility compliance is contractual.
+
+## PDF optimization and linearization
+
+Linearize a PDF for fast web viewing:
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document=@document.pdf \
+  -F 'instructions={
+    "parts": [
+      {
+        "file": "document"
+      }
+    ],
+    "output": {
+      "type": "pdf",
+      "optimize": {
+        "linearize": true
+      }
+    }
+  }' \
+  -o result.pdf
+```
+
+Optimization with compression controls:
+
+```json
+{
+  "parts": [{ "file": "document" }],
+  "output": {
+    "type": "pdf",
+    "optimize": {
+      "disableImages": false,
+      "mrcCompression": true,
+      "imageOptimizationQuality": 2,
+      "linearize": true
+    }
+  }
+}
+```
+
+### Optimization rules
+
+- Linearize only for delivery PDFs meant for network viewing.
+- Optimize before signatures when possible. Treat signed PDFs as immutable delivery artifacts.
+- Compression changes should be validated visually when image quality matters.
+
+## Official docs
+
+- [PDF to PDF/A API](https://www.nutrient.io/api/pdf-to-pdfa-api/)
+- [PDF/UA auto-tagging API](https://www.nutrient.io/api/pdfua-auto-tagging-api/)
+- [Optimization API](https://www.nutrient.io/api/document-optimization-api/)
+- [PDF linearization API](https://www.nutrient.io/api/pdf-linearization-api/)
diff --git a/nutrient-document-processing/references/extraction-and-ocr.md b/nutrient-document-processing/references/extraction-and-ocr.md
new file mode 100644
index 0000000..7dfed2f
--- /dev/null
+++ b/nutrient-document-processing/references/extraction-and-ocr.md
@@ -0,0 +1,100 @@
+# Extraction and OCR
+
+Use these patterns when the goal is to pull machine-readable data from PDFs or scans.
+
+## Extract plain text
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"output":{"type":"text"}}' \
+  -o extracted.txt
+```
+
+## Extract tables
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"extraction","strategy":"tables"}],"output":{"type":"xlsx"}}' \
+  -o tables.xlsx
+```
+
+Common structured outputs: `xlsx`, `json`, `xml`, `csv`
+
+## Extract key-value pairs
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"extraction","strategy":"key-values"}],"output":{"type":"json"}}' \
+  -o pairs.json
+```
+
+## Basic OCR
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F scanned.pdf=@scanned.pdf \
+  -F 'instructions={"parts":[{"file":"scanned.pdf"}],"actions":[{"type":"ocr","language":"english"}]}' \
+  -o searchable.pdf
+```
+
+## Multi-language OCR
+
+```json
+{
+  "parts": [{ "file": "scanned.pdf" }],
+  "actions": [{ "type": "ocr", "language": ["english", "german", "french"] }]
+}
+```
+
+## OCR on images
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F scan.jpg=@scan.jpg \
+  -F 'instructions={"parts":[{"file":"scan.jpg"}],"actions":[{"type":"ocr","language":"english"}]}' \
+  -o searchable.pdf
+```
+
+## OCR languages
+
+Common OCR languages include:
+
+- `english`
+- `german`
+- `french`
+- `spanish`
+- `italian`
+- `portuguese`
+- `dutch`
+- `swedish`
+- `polish`
+- `czech`
+- `turkish`
+- `japanese`
+- `korean`
+- `chinese-simplified`
+- `chinese-traditional`
+- `arabic`
+- `hebrew`
+- `hindi`
+- `russian`
+
+## OCR and extraction rules
+
+- OCR before extraction when text is image-only, unselectable, or suspiciously sparse.
+- Tables and key-values benefit from cleaner scans and correct page orientation.
+- For multilingual inputs, pass an array of languages rather than guessing a single language.
+- If OCR quality is poor, fix source orientation and scan quality before retrying.
+
+## Official docs
+
+- [Data extraction overview](https://www.nutrient.io/api/data-extraction-api/)
+- [Processor API overview](https://www.nutrient.io/api/processor-api/)
diff --git a/nutrient-document-processing/references/generation-and-conversion.md b/nutrient-document-processing/references/generation-and-conversion.md
new file mode 100644
index 0000000..b2419ea
--- /dev/null
+++ b/nutrient-document-processing/references/generation-and-conversion.md
@@ -0,0 +1,119 @@
+# Generation and Conversion
+
+Use these patterns when the main task is to generate a new document or convert an existing one into another format.
+
+## HTML to PDF generation
+
+Upload the HTML file and reference it through `parts[].html`.
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F index.html=@index.html \
+  -F 'instructions={"parts":[{"html":"index.html"}]}' \
+  -o result.pdf
+```
+
+With layout options:
+
+```json
+{
+  "parts": [{ "html": "index.html" }],
+  "output": {
+    "type": "pdf",
+    "layout": {
+      "orientation": "landscape",
+      "size": "A4",
+      "margin": { "top": 20, "bottom": 20, "left": 15, "right": 15 }
+    }
+  }
+}
+```
+
+## Remote URL generation or conversion
+
+When the input already lives at a stable remote URL, send a JSON request and use `file.url`:
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -d '{
+    "parts": [
+      {
+        "file": {
+          "url": "https://www.nutrient.io/api/assets/downloads/samples/docx/document.docx"
+        }
+      }
+    ]
+  }' \
+  -o result.pdf
+```
+
+Use this pattern when you want server-side fetches and do not need to upload a local file first.
+
+## Office to PDF
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.docx=@document.docx \
+  -F 'instructions={"parts":[{"file":"document.docx"}]}' \
+  -o result.pdf
+```
+
+Supported common inputs: `docx`, `xlsx`, `pptx`, `doc`, `xls`, `ppt`, `odt`, `ods`, `odp`, `rtf`
+
+## Image to PDF
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F photo.jpg=@photo.jpg \
+  -F 'instructions={"parts":[{"file":"photo.jpg"}]}' \
+  -o result.pdf
+```
+
+Supported common images: `jpg`, `jpeg`, `png`, `gif`, `webp`, `tiff`, `bmp`
+
+## PDF to Office
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"output":{"type":"docx"}}' \
+  -o result.docx
+```
+
+Supported common outputs: `docx`, `xlsx`, `pptx`
+
+## PDF to image
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf","pages":{"start":0,"end":0}}],"output":{"type":"png","dpi":150}}' \
+  -o page.png
+```
+
+Useful output options:
+
+| Option | Meaning |
+|--------|---------|
+| `type` | `png`, `jpeg`, or `webp` |
+| `dpi` | Resolution target |
+| `width` / `height` | Explicit pixel size |
+
+## Generation and conversion rules
+
+- Use HTML generation when you control the markup and need stable, reproducible output.
+- Use remote URL requests when the source already exists online and you want to avoid local uploads.
+- Use `output.type` for direct conversions. Do not create unnecessary `actions` for simple format changes.
+- For paginated image output, render only the pages you need.
+
+## Official docs
+
+- [PDF generator / converter overview](https://www.nutrient.io/api/pdf-converter-api/)
+- [URL to PDF API](https://www.nutrient.io/api/url-to-pdf-api/)
diff --git a/nutrient-document-processing/references/request-basics.md b/nutrient-document-processing/references/request-basics.md
new file mode 100644
index 0000000..989fc84
--- /dev/null
+++ b/nutrient-document-processing/references/request-basics.md
@@ -0,0 +1,118 @@
+# Request Basics
+
+Most Nutrient DWS workflows use:
+
+```text
+POST https://api.nutrient.io/build
+Authorization: Bearer YOUR_API_KEY
+```
+
+Use multipart when you are uploading local files. Use JSON when every input is a remote URL.
+
+## Multipart pattern
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document"}]}' \
+  -o result.pdf
+```
+
+## JSON pattern for remote URLs
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -d '{
+    "parts": [
+      {
+        "file": {
+          "url": "https://www.nutrient.io/api/assets/downloads/samples/docx/document.docx"
+        }
+      }
+    ]
+  }' \
+  -o result.pdf
+```
+
+## Instructions model
+
+```json
+{
+  "parts": [
+    {
+      "file": "document.pdf",
+      "pages": { "start": 0, "end": -1 },
+      "password": "optional-password"
+    }
+  ],
+  "actions": [
+    { "type": "action_type" }
+  ],
+  "output": {
+    "type": "pdf"
+  }
+}
+```
+
+## Core rules
+
+- Multipart field names must match the filenames or symbolic names referenced in `parts`.
+- `parts` preserves order. Multiple parts become a merged output unless the selected output type says otherwise.
+- `actions` execute in order and mutate the in-flight document.
+- `output.type` selects the final artifact type such as `pdf`, `text`, `docx`, `xlsx`, `pptx`, `png`, `pdfa`, or `pdfua`.
+- Password-protected inputs need `password` on the relevant part.
+
+## Credits
+
+Check balance:
+
+```bash
+curl -X GET https://api.nutrient.io/credits \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY"
+```
+
+Check usage:
+
+```bash
+curl -X GET "https://api.nutrient.io/credits/usage?period=month" \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY"
+```
+
+## Limits and common errors
+
+### HTTP status codes
+
+| Code | Meaning |
+|------|---------|
+| `200` | Success |
+| `400` | Invalid instructions or missing required fields |
+| `401` | Invalid or missing API key |
+| `402` | Insufficient credits |
+| `413` | Payload too large |
+| `415` | Unsupported media type |
+| `422` | Valid request but unsupported or unprocessable content |
+| `429` | Rate limited |
+| `500` | Server error |
+
+### Common problems
+
+| Problem | Cause | Fix |
+|---------|-------|-----|
+| `file_not_found` | The symbolic file name in `parts` does not match an uploaded field | Align multipart names and `parts` references |
+| Empty extraction | The file is scanned or rasterized | OCR first |
+| `password_required` | The PDF is encrypted | Add the password on the part |
+| `insufficient_credits` | Batch or AI-heavy workflow exceeded credits | Check balance before the run |
+
+### File limits
+
+- Maximum input file: 100 MB
+- Maximum total upload: 500 MB per request
+- For faster runs, prefer files below 50 MB when possible
+
+## Official docs
+
+- [API overview](https://www.nutrient.io/api/documentation/developer-guides/api-overview/)
+- [Processor API overview](https://www.nutrient.io/api/processor-api/)
diff --git a/nutrient-document-processing/references/security-signing-and-forms.md b/nutrient-document-processing/references/security-signing-and-forms.md
new file mode 100644
index 0000000..0fc7026
--- /dev/null
+++ b/nutrient-document-processing/references/security-signing-and-forms.md
@@ -0,0 +1,141 @@
+# Security, Signing, and Forms
+
+Use these patterns when the task is about redaction, watermarking, signatures, form fill, or document protection.
+
+## Preset redaction
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"redaction","strategy":"preset","preset":"email-address"}]}' \
+  -o redacted.pdf
+```
+
+Common presets:
+
+- `social-security-number`
+- `credit-card-number`
+- `email-address`
+- `north-american-phone-number`
+- `international-phone-number`
+- `date`
+- `url`
+- `ipv4`
+- `ipv6`
+- `mac-address`
+- `us-zip-code`
+- `vin`
+- `time`
+
+## Regex redaction
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"redaction","strategy":"regex","regex":"\\b\\d{3}-\\d{2}-\\d{4}\\b","caseSensitive":false}]}' \
+  -o redacted.pdf
+```
+
+## AI redaction
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"ai_redaction","criteria":"All personally identifiable information"}]}' \
+  -o redacted.pdf
+```
+
+Use AI redaction for contextual asks. Use preset or regex redaction for explicit patterns.
+
+## Text watermark
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"watermark","watermarkType":"text","text":"DRAFT","fontSize":72,"fontColor":"#FF0000","opacity":0.3,"rotation":45,"width":"50%","height":"50%"}]}' \
+  -o watermarked.pdf
+```
+
+## Image watermark
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F logo.png=@logo.png \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"watermark","watermarkType":"image","imagePath":"logo.png","width":"25%","height":"25%","opacity":0.5}]}' \
+  -o watermarked.pdf
+```
+
+## CMS signature
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"sign","signatureType":"cms","signerName":"John Doe","reason":"Document approval","location":"San Francisco"}]}' \
+  -o signed.pdf
+```
+
+## CAdES signature
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document.pdf=@document.pdf \
+  -F 'instructions={"parts":[{"file":"document.pdf"}],"actions":[{"type":"sign","signatureType":"cades","cadesLevel":"b-lt","signerName":"Jane Smith"}]}' \
+  -o signed.pdf
+```
+
+## Fill form fields
+
+```json
+{
+  "parts": [{ "file": "form.pdf" }],
+  "actions": [{
+    "type": "fillForm",
+    "fields": [
+      { "name": "firstName", "value": "John" },
+      { "name": "lastName", "value": "Doe" },
+      { "name": "email", "value": "john@example.com" },
+      { "name": "agree", "value": true }
+    ]
+  }]
+}
+```
+
+## Encrypt output
+
+```json
+{
+  "parts": [{ "file": "document.pdf" }],
+  "output": {
+    "type": "pdf",
+    "owner_password": "owner123",
+    "user_password": "user456"
+  }
+}
+```
+
+## Open a password-protected input
+
+```json
+{
+  "parts": [{ "file": "protected.pdf", "password": "user456" }]
+}
+```
+
+## Rules
+
+- Redact before signing. Signed documents should be treated as final artifacts.
+- Deterministic redaction is easier to verify than AI redaction.
+- Confirm real signing requirements before promising a legally sufficient workflow.
+- Use real field names for form fill. Do not guess from visible labels.
+
+## Official docs
+
+- [Tools and APIs](https://www.nutrient.io/api/documentation/tools-and-api/)
diff --git a/nutrient-document-processing/references/workflow-recipes.md b/nutrient-document-processing/references/workflow-recipes.md
new file mode 100644
index 0000000..55f21ec
--- /dev/null
+++ b/nutrient-document-processing/references/workflow-recipes.md
@@ -0,0 +1,84 @@
+# Workflow Recipes
+
+Use these patterns when the task spans more than one DWS feature and ordering matters.
+
+## 1. Scan to searchable text
+
+Goal: take an image-only PDF and produce both a searchable PDF and extracted text.
+
+Recommended sequence:
+
+1. OCR the source PDF into a searchable PDF.
+2. Extract text from the searchable result.
+
+Reasoning: OCR improves extraction quality and gives you a reusable intermediate artifact.
+
+## 2. Scan to redacted delivery PDF
+
+Goal: redact a scanned document and deliver a shareable PDF.
+
+Recommended sequence:
+
+1. OCR
+2. Preset or regex redaction
+3. Optional watermark
+4. Optional optimization or linearization
+5. Signature last, if required
+
+Reasoning: OCR enables reliable matching, and signatures should only happen after all content mutations are complete.
+
+## 3. HTML report to archival PDF
+
+Goal: generate a PDF from structured content and archive it.
+
+Recommended sequence:
+
+1. Generate PDF from HTML
+2. Convert the result to PDF/A
+3. Validate the archival output in your downstream compliance workflow if required
+
+Reasoning: HTML generation gives you better structure control than post-hoc browser printing, and PDF/A should be treated as a final archival artifact.
+
+## 4. Existing PDF to accessible PDF/UA
+
+Goal: turn a born-digital PDF into a screen-reader-ready artifact.
+
+Recommended sequence:
+
+1. Start from the cleanest available PDF
+2. Run PDF/UA auto-tagging
+3. Validate the result with your required accessibility checker
+
+Reasoning: flattened, rasterized, or noisy inputs reduce tagging quality.
+
+## 5. Form packet to signed output
+
+Goal: fill a form, remove interactivity if needed, and sign it.
+
+Recommended sequence:
+
+1. Fill form fields
+2. Optional flattening
+3. Signature last
+
+Reasoning: signing too early forces a second mutation pass and can invalidate the signed artifact.
+
+## 6. Web delivery PDF
+
+Goal: publish a PDF that streams quickly in viewers.
+
+Recommended sequence:
+
+1. Final content edits
+2. Optional compression
+3. Linearization
+4. Publish to a server that supports byte-range requests
+
+Reasoning: linearization is a delivery concern, not an authoring concern.
+
+## Recipe heuristics
+
+- Keep OCR early.
+- Keep compliance targets intentional.
+- Keep signatures last.
+- Treat optimization as a delivery-stage step unless the workflow explicitly needs it earlier.

From f6449b85035128fc14a579d1700b2f7722221cfb Mon Sep 17 00:00:00 2001
From: "Jonathan D. Rhyne" <jonathan@pspdfkit.com>
Date: Sat, 14 Mar 2026 16:02:29 -0400
Subject: [PATCH 2/2] docs(skill): cover PDF assembly workflows and tighten
 router guidance

Document the merge, split, rotate, flatten, and page-range workflows that the top-level skill already advertises. Add setup assumptions and anti-pattern guidance to the router, and keep the README plus Codex metadata aligned with the broader document-processing surface.
---
 README.md                                     |  1 +
 nutrient-document-processing/SKILL.md         | 17 +++-
 .../agents/openai.yaml                        |  4 +-
 .../references/REFERENCE.md                   |  3 +
 .../references/pdf-manipulation.md            | 96 +++++++++++++++++++
 .../references/workflow-recipes.md            | 14 +++
 6 files changed, 131 insertions(+), 4 deletions(-)
 create mode 100644 nutrient-document-processing/references/pdf-manipulation.md

diff --git a/README.md b/README.md
index fa715f5..7aa2e22 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,7 @@ patient-records.pdf (contains PII)
 |------------|-------------|----------------|
 | ✨ **Generate** | Create PDFs from HTML templates, uploaded assets, or remote URLs | *"Generate a PDF proposal from this HTML template"* |
 | 📄 **Convert** | PDF ↔ DOCX/XLSX/PPTX, HTML → PDF, images → PDF | *"Convert report.docx to PDF"* |
+| 🧩 **Assemble** | Merge, split, reorder, rotate, and flatten PDF packets before delivery | *"Merge these PDFs, rotate the landscape pages, and keep only pages 1-5"* |
 | 📝 **Extract** | Text, tables, and key-value pairs from PDFs | *"Extract all tables from invoice.pdf as Excel"* |
 | 🔍 **OCR** | Multi-language OCR for scanned documents | *"OCR this German scan and extract the text"* |
 | 🔒 **Redact** | Pattern-based + AI-powered PII redaction | *"Redact all SSNs and emails from records.pdf"* |
diff --git a/nutrient-document-processing/SKILL.md b/nutrient-document-processing/SKILL.md
index fce5dcd..10a8ed2 100644
--- a/nutrient-document-processing/SKILL.md
+++ b/nutrient-document-processing/SKILL.md
@@ -1,14 +1,19 @@
 ---
 name: nutrient-document-processing
-description: Use when tasks involve generating PDFs from HTML or URLs, converting Office/images/PDFs, OCRing and extracting content, redacting, watermarking, signing, filling, merging, or producing compliance outputs like PDF/A, PDF/UA, and linearized PDFs with Nutrient DWS; prefer the Nutrient MCP server when it is already configured, otherwise call the API directly.
+description: Use when tasks involve generating PDFs from HTML or URLs, converting Office/images/PDFs, assembling or splitting PDFs, OCRing and extracting content, redacting, watermarking, signing, filling, or producing compliance outputs like PDF/A, PDF/UA, and linearized PDFs with Nutrient DWS. Triggers include convert to PDF, OCR this scan, extract tables, merge these PDFs, redact PII, sign this PDF, make this PDF/A, or linearize for web delivery. Prefer the Nutrient MCP server when it is already configured, otherwise call the API directly.
 metadata:
-  short-description: Generate, convert, OCR, redact, sign, archive, and optimize documents
+  short-description: Generate, convert, assemble, OCR, redact, sign, archive, and optimize documents
 ---
 
 # Nutrient Document Processing
 
 Use Nutrient DWS for managed document workflows where fidelity, compliance, or multi-step processing matters more than local-tool convenience.
 
+## Setup assumptions
+- Direct API calls use `Authorization: Bearer $NUTRIENT_API_KEY`.
+- MCP setups commonly use `@nutrient-sdk/dws-mcp-server` with `NUTRIENT_DWS_API_KEY`.
+- Open `references/request-basics.md` first when authentication or payload shape is the blocker.
+
 ## When to use
 - Generate PDFs from HTML templates, uploaded assets, or remote URLs.
 - Convert Office, HTML, image, and PDF files between supported formats.
@@ -52,16 +57,24 @@ curl -X POST https://api.nutrient.io/build \
 - Use `output.type` for conversion and finalization targets. Use `actions` for transformations.
 - OCR before text extraction, key-value extraction, or semantic redaction on scans.
 - Prefer preset or regex redaction when the target is explicit. Use AI redaction only for contextual or natural-language requests.
+- Use the PDF manipulation reference for merge, split, rotate, flatten, and page-range workflows instead of inferring those payloads from conversion examples.
 - Treat PDF/A and PDF/UA as compliance targets, not cosmetic export formats. Choose the target up front and validate final artifacts when requirements are contractual.
 - For PDF/UA, clean born-digital inputs and structured HTML usually tag better than rasterized or flattened source PDFs.
 - For delivery optimization, linearize or optimize unsigned output artifacts instead of mutating already signed files.
 - When the user asks for multiple steps, keep destructive or final steps late in the sequence. Use the workflow recipes when ordering is ambiguous.
 
+## Anti-patterns
+- Do not OCR born-digital PDFs just because the task mentions extraction. Extract first and OCR only if the text layer is missing.
+- Do not flatten forms or annotations until the user confirms the artifact no longer needs to stay editable.
+- Do not sign, archive, or linearize intermediate working files. Keep those as final-delivery steps.
+- Do not promise PDF/A or PDF/UA compliance without a validation step when the requirement is contractual.
+
 ## Reference map
 Read only what you need:
 
 - `references/request-basics.md` -> endpoint model, auth, multipart vs JSON, credits, limits, and errors
 - `references/generation-and-conversion.md` -> HTML/URL generation and format conversion
+- `references/pdf-manipulation.md` -> merge, split, page-range, rotate, and flatten workflows
 - `references/extraction-and-ocr.md` -> OCR, text extraction, tables, and key-value workflows
 - `references/security-signing-and-forms.md` -> redaction, watermarking, signatures, forms, and passwords
 - `references/compliance-and-optimization.md` -> PDF/A, PDF/UA, optimization, and linearization
diff --git a/nutrient-document-processing/agents/openai.yaml b/nutrient-document-processing/agents/openai.yaml
index 8d03fef..c1cf13d 100644
--- a/nutrient-document-processing/agents/openai.yaml
+++ b/nutrient-document-processing/agents/openai.yaml
@@ -1,6 +1,6 @@
 interface:
   display_name: "Nutrient Document Processing"
-  short_description: "Generate, convert, OCR, redact, sign, archive, and optimize documents"
+  short_description: "Generate, convert, assemble, OCR, redact, sign, archive, and optimize documents"
   icon_small: "./assets/nutrient.svg"
   icon_large: "./assets/nutrient.svg"
-  default_prompt: "Use $nutrient-document-processing to generate, convert, OCR, extract, redact, sign, fill, archive, optimize, or linearize this document, then return the output files and a concise summary."
+  default_prompt: "Use $nutrient-document-processing to generate, convert, assemble, OCR, extract, redact, sign, fill, archive, optimize, or linearize this document, then return the output files and a concise summary."
diff --git a/nutrient-document-processing/references/REFERENCE.md b/nutrient-document-processing/references/REFERENCE.md
index ce0cdb4..323f7b9 100644
--- a/nutrient-document-processing/references/REFERENCE.md
+++ b/nutrient-document-processing/references/REFERENCE.md
@@ -10,6 +10,9 @@ Use this folder as a modular cookbook. Keep `SKILL.md` loaded as the router and
 - [generation-and-conversion.md](generation-and-conversion.md)
   PDF generation from HTML or URLs, Office/image conversion, and output-format selection.
 
+- [pdf-manipulation.md](pdf-manipulation.md)
+  Merge, split, reorder, rotate, and flatten PDF workflows.
+
 - [extraction-and-ocr.md](extraction-and-ocr.md)
   OCR, text extraction, table extraction, and key-value workflows.
 
diff --git a/nutrient-document-processing/references/pdf-manipulation.md b/nutrient-document-processing/references/pdf-manipulation.md
new file mode 100644
index 0000000..2c6486a
--- /dev/null
+++ b/nutrient-document-processing/references/pdf-manipulation.md
@@ -0,0 +1,96 @@
+# PDF Manipulation
+
+Use these patterns when the task is about assembling, slicing, or normalizing PDFs before final delivery operations.
+
+## Merge PDFs
+
+Part order controls merge order:
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F cover=@cover.pdf \
+  -F body=@body.pdf \
+  -F appendix=@appendix.pdf \
+  -F 'instructions={
+    "parts": [
+      { "file": "cover" },
+      { "file": "body" },
+      { "file": "appendix" }
+    ]
+  }' \
+  -o packet.pdf
+```
+
+## Split or extract page ranges
+
+Use `pages` on a part to carve out a subset:
+
+```bash
+curl -X POST https://api.nutrient.io/build \
+  -H "Authorization: Bearer $NUTRIENT_API_KEY" \
+  -F document=@document.pdf \
+  -F 'instructions={
+    "parts": [
+      {
+        "file": "document",
+        "pages": { "start": 0, "end": 4 }
+      }
+    ]
+  }' \
+  -o first-five-pages.pdf
+```
+
+## Reorder or assemble a packet from ranges
+
+Reuse the same source part with different page ranges to build a new packet:
+
+```json
+{
+  "parts": [
+    { "file": "document", "pages": { "start": 5, "end": 9 } },
+    { "file": "document", "pages": { "start": 0, "end": 4 } }
+  ]
+}
+```
+
+## Rotate pages
+
+Use a rotation action when page orientation is wrong:
+
+```json
+{
+  "parts": [{ "file": "document.pdf" }],
+  "actions": [
+    {
+      "type": "rotate",
+      "rotation": 90,
+      "pages": [0, 1, 2]
+    }
+  ]
+}
+```
+
+## Flatten forms or annotations
+
+Flatten only when the output should stop being interactive:
+
+```json
+{
+  "parts": [{ "file": "document.pdf" }],
+  "actions": [{ "type": "flatten" }]
+}
+```
+
+## Rules
+
+- Page indexes are zero-based. `end: -1` means the last page.
+- Assemble the full packet before watermarking, signing, optimizing, or linearizing.
+- Rotate before rasterizing or signing if the source orientation is incorrect.
+- Flatten late. It removes editability from forms and many annotation workflows.
+- Keep passwords on the affected `part` when slicing or merging encrypted inputs.
+
+## Official docs
+
+- [Processor API overview](https://www.nutrient.io/api/processor-api/)
+- [Tools and APIs](https://www.nutrient.io/api/documentation/tools-and-api/)
diff --git a/nutrient-document-processing/references/workflow-recipes.md b/nutrient-document-processing/references/workflow-recipes.md
index 55f21ec..aab7780 100644
--- a/nutrient-document-processing/references/workflow-recipes.md
+++ b/nutrient-document-processing/references/workflow-recipes.md
@@ -76,6 +76,20 @@ Recommended sequence:
 
 Reasoning: linearization is a delivery concern, not an authoring concern.
 
+## 7. Packet assembly before signing
+
+Goal: merge multiple PDFs, fix page orientation, and produce a final packet for signing or distribution.
+
+Recommended sequence:
+
+1. Merge or reorder the required parts
+2. Extract or omit page ranges as needed
+3. Optional page rotation
+4. Optional flattening
+5. Watermark, sign, optimize, or linearize last
+
+Reasoning: assembly and page normalization are still content mutations. Final-artifact operations should happen only after the packet shape is stable.
+
 ## Recipe heuristics
 
 - Keep OCR early.