From aeec7923ab70815f6c856ee3704924121c027628 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 13 Mar 2026 13:37:01 +0000
Subject: [PATCH] refactor: remove Ollama integration to simplify architecture

Remove the optional Ollama local LLM preprocessing pipeline (3 agents:
ingestion, processing, synthesis) and all related infrastructure. The MCP
host (Claude Code / Claude Desktop) handles aggregation directly via the
aggregate_results tool, making the Ollama pipeline redundant.

Deleted:
- Ollama MCP server, Dockerfiles, entrypoint scripts
- Agent servers (ingestion, processing, synthesis)
- Agent prompt templates
- Ollama-specific tests

Updated:
- docker-compose.yml: removed 5 Ollama services and volume
- Makefile: removed Ollama targets and profile references
- Config: removed ollama_url/ollama_model settings
- Models: renamed ollama_model to model in AggregatedMetadata/graph
- Reports: updated generators to use generic "Model" label
- CI: removed Ollama/agent matrix entries from build workflow
- Docs: cleaned README.md, DOCKER.md, CLAUDE.md
- Dependencies: removed ollama, fastapi, uvicorn from requirements

All 188 tests pass.

https://claude.ai/code/session_01BaZKkxAotm6KAehzs2qYjL
---
 .claude/mcp-start.sh                        |   2 +-
 .env.example                                |  13 -
 .github/workflows/build-and-push.yml        |  16 -
 CLAUDE.md                                   |  27 +-
 DOCKER.md                                   |  79 +---
 Makefile                                    |  72 +--
 README.md                                   | 139 +-----
 blhackbox-mcp-catalog.yaml                  |   9 -
 blhackbox-mcp.json                          |  12 -
 blhackbox/__init__.py                       |   2 +-
 blhackbox/agents/__init__.py                |  24 -
 blhackbox/agents/base_agent.py              | 144 ------
 blhackbox/agents/base_agent_server.py       | 399 ----------------
 blhackbox/agents/ingestion_agent.py         |  17 -
 blhackbox/agents/ingestion_server.py        |  10 -
 blhackbox/agents/processing_agent.py        |  19 -
 blhackbox/agents/processing_server.py       |  10 -
 blhackbox/agents/synthesis_agent.py         |  18 -
 blhackbox/agents/synthesis_server.py        |  10 -
 blhackbox/config.py                         |   4 -
 blhackbox/core/knowledge_graph.py           |   4 +-
 blhackbox/main.py                           |   1 -
 blhackbox/models/aggregated_payload.py      |  11 +-
 blhackbox/models/graph.py                   |   6 +-
 blhackbox/prompts/agents/__init__.py        |   0
 blhackbox/prompts/agents/ingestionagent.md  | 254 ----------
 blhackbox/prompts/agents/processingagent.md | 219 ---------
 blhackbox/prompts/agents/synthesisagent.md  | 277 -----------
 blhackbox/reporting/html_generator.py       |   6 +-
 blhackbox/reporting/md_generator.py         |   2 +-
 blhackbox/reporting/pdf_generator.py        |   2 +-
 docker-compose.yml                          | 197 +-------
 docker/agent-ingestion.Dockerfile           |  12 -
 docker/agent-processing.Dockerfile          |  12 -
 docker/agent-synthesis.Dockerfile           |  12 -
 docker/claude-code-entrypoint.sh            |  14 +-
 docker/claude-code.Dockerfile               |   4 -
 docker/ollama-entrypoint.sh                 |  52 ---
 docker/ollama-mcp.Dockerfile                |  14 -
 docker/ollama.Dockerfile                    |  10 -
 mcp_servers/ollama_mcp_server.py            | 492 --------------------
 pyproject.toml                              |   9 +-
 requirements.txt                            |   8 -
 setup.sh                                    |  15 +-
 tests/test_agent_server.py                  | 343 --------------
 tests/test_agents.py                        | 305 ------------
 tests/test_aggregated_payload.py            |  10 +-
 tests/test_config.py                        |  15 +-
 tests/test_ollama_mcp.py                    | 390 ----------------
 tests/test_prompts.py                       |  12 -
 50 files changed, 51 insertions(+), 3683 deletions(-)
 delete mode 100644 blhackbox/agents/__init__.py
 delete mode 100644 blhackbox/agents/base_agent.py
 delete mode 100644 blhackbox/agents/base_agent_server.py
 delete mode 100644 blhackbox/agents/ingestion_agent.py
 delete mode 100644 blhackbox/agents/ingestion_server.py
 delete mode 100644 blhackbox/agents/processing_agent.py
 delete mode 100644 blhackbox/agents/processing_server.py
 delete mode 100644 blhackbox/agents/synthesis_agent.py
 delete mode 100644 blhackbox/agents/synthesis_server.py
 delete mode 100644 blhackbox/prompts/agents/__init__.py
 delete mode 100644 blhackbox/prompts/agents/ingestionagent.md
 delete mode 100644 blhackbox/prompts/agents/processingagent.md
 delete mode 100644 blhackbox/prompts/agents/synthesisagent.md
 delete mode 100644 docker/agent-ingestion.Dockerfile
 delete mode 100644 docker/agent-processing.Dockerfile
 delete mode 100644 docker/agent-synthesis.Dockerfile
 delete mode 100644 docker/ollama-entrypoint.sh
 delete mode 100644 docker/ollama-mcp.Dockerfile
 delete mode 100644 docker/ollama.Dockerfile
 delete mode 100644 mcp_servers/ollama_mcp_server.py
 delete mode 100644 tests/test_agent_server.py
 delete mode 100644 tests/test_agents.py
 delete mode 100644 tests/test_ollama_mcp.py

diff --git a/.claude/mcp-start.sh b/.claude/mcp-start.sh
index 768e3b6..dcce990 100755
--- a/.claude/mcp-start.sh
+++ b/.claude/mcp-start.sh
@@ -15,7 +15,7 @@ if [ ! -f ".venv/bin/blhackbox" ]; then
   .venv/bin/pip install -e . --quiet >&2
 fi
 
-# Load .env if present (for NEO4J_*, OLLAMA_*, etc.)
+# Load .env if present (for NEO4J_*, etc.)
 # API keys (ANTHROPIC_API_KEY, OPENAI_API_KEY) are intentionally commented
 # out in .env.example — Claude Code provides its own authentication.
 if [ -f ".env" ]; then
diff --git a/.env.example b/.env.example
index c6c2864..8f91911 100644
--- a/.env.example
+++ b/.env.example
@@ -53,19 +53,6 @@ NEO4J_PASSWORD=changeme-min-8-chars
 # Neo4j Aura alternative (cloud):
 # NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
 
-# ── Ollama (optional — legacy local pipeline) ──────────────────────
-# The MCP host (Claude) now handles data aggregation directly.
-# These settings are only needed if you enable the Ollama pipeline:
-#   docker compose --profile ollama up -d
-#
-# OLLAMA_MODEL=llama3.1:8b
-# OLLAMA_TIMEOUT=300
-# OLLAMA_NUM_CTX=8192
-# OLLAMA_KEEP_ALIVE=10m
-# OLLAMA_RETRIES=2
-# AGENT_TIMEOUT=1200
-# AGENT_RETRIES=2
-
 # ── OpenAI (optional — for ChatGPT MCP clients on host) ────────────
 # Required for ChatGPT / OpenAI MCP clients (host-based only).
 # Get your key at platform.openai.com
diff --git a/.github/workflows/build-and-push.yml b/.github/workflows/build-and-push.yml
index fd944a1..f75640e 100644
--- a/.github/workflows/build-and-push.yml
+++ b/.github/workflows/build-and-push.yml
@@ -48,22 +48,6 @@ jobs:
             dockerfile: docker/screenshot-mcp.Dockerfile
             tag_prefix: "screenshot-mcp-"
             scout: false
-          - service: ollama-mcp
-            dockerfile: docker/ollama-mcp.Dockerfile
-            tag_prefix: "ollama-mcp-"
-            scout: true
-          - service: agent-ingestion
-            dockerfile: docker/agent-ingestion.Dockerfile
-            tag_prefix: "agent-ingestion-"
-            scout: false
-          - service: agent-processing
-            dockerfile: docker/agent-processing.Dockerfile
-            tag_prefix: "agent-processing-"
-            scout: false
-          - service: agent-synthesis
-            dockerfile: docker/agent-synthesis.Dockerfile
-            tag_prefix: "agent-synthesis-"
-            scout: false
           - service: claude-code
             dockerfile: docker/claude-code.Dockerfile
             tag_prefix: "claude-code-"
diff --git a/CLAUDE.md b/CLAUDE.md
index 1d8e7c4..d0ab7dd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -16,20 +16,18 @@ Read the following before writing a single line:
 - `CLAUDE.md` (this file), `README.md`
 - `docker-compose.yml`, `Makefile`, `.env.example`
 - `blhackbox/mcp/server.py` — blhackbox stdio MCP server (Claude Code Web path)
-- `mcp_servers/ollama_mcp_server.py` — Ollama MCP orchestrator (optional, `--profile ollama`)
-- Every file directly relevant to the task: the relevant `Dockerfile`, `*_server.py`, `*_agent.py`, agent prompts in `blhackbox/prompts/agents/` — whatever applies
+- Every file directly relevant to the task: the relevant `Dockerfile`, `*_server.py` — whatever applies
 - Do not rely on memory from previous sessions. Read the actual current files.
 
 **Phase 3: Understand Before Acting**
 Before writing code, answer these internally:
 1. What is the root cause — not the symptom, the actual root cause?
 2. Does the fix conflict with anything else in the codebase?
-3. Does it break the `AggregatedPayload` schema contract? (Must stay stable for `aggregate_results`, report generation, and the optional Ollama pipeline)
+3. Does it break the `AggregatedPayload` schema contract? (Must stay stable for `aggregate_results` and report generation)
 4. Does it violate the `shell=False` rule?
-5. Am I touching agent prompts in `blhackbox/prompts/agents/`? If so — do I need a rebuild, or can I use a volume mount override?
-6. Is there a simpler fix that achieves the same result?
+5. Is there a simpler fix that achieves the same result?
 
-Only after answering all six — write the fix.
+Only after answering all five — write the fix.
 
 ---
 
@@ -39,8 +37,7 @@ Claude Desktop, or ChatGPT) IS the orchestrator — it decides which tools to ca
 collects raw outputs, and structures them directly into an `AggregatedPayload` via
 the `aggregate_results` MCP tool before writing the final pentest report.
 
-The Ollama preprocessing pipeline (3 agents) is now optional (`--profile ollama`)
-for local-only / offline processing. By default, the MCP host handles aggregation.
+The MCP host handles all data aggregation directly.
 
 ## Code Standards
 - All Python code must be type-annotated
@@ -60,27 +57,13 @@ for local-only / offline processing. By default, the MCP host handles aggregatio
 7. Document tools in README.md components table
 8. Add unit tests
 
-## Adding or Tuning an Agent Prompt (Optional Ollama Pipeline)
-Agent prompts are in `blhackbox/prompts/agents/` (only relevant if using `--profile ollama`):
-- `ingestionagent.md` — Ingestion Agent system prompt
-- `processingagent.md` — Processing Agent system prompt
-- `synthesisagent.md` — Synthesis Agent system prompt
-
-**To tune without rebuilding:** Mount the file as a volume in `docker-compose.yml`.
-**To make it permanent:** Edit the `.md` file and rebuild the relevant image.
-
-Always validate that the `AggregatedPayload` Pydantic model still parses correctly
-after prompt changes (`make test`).
-
 ## Key Reference Links
 | Resource | URL |
 |----------|-----|
 | FastMCP (Python MCP framework) | https://pypi.org/project/fastmcp |
 | MCP Protocol spec | https://modelcontextprotocol.io |
 | MCP Gateway | https://hub.docker.com/r/docker/mcp-gateway |
-| Ollama Python SDK | https://github.com/ollama/ollama-python |
 | Portainer CE | https://docs.portainer.io |
-| NVIDIA Container Toolkit | https://docs.nvidia.com/datacenter/cloud-native/container-toolkit |
 | Docker Hub (blhackbox) | https://hub.docker.com/r/crhacky/blhackbox |
 
 ## Verification Document — Authorization for Pentesting
diff --git a/DOCKER.md b/DOCKER.md
index c118af0..8d5b0cc 100644
--- a/DOCKER.md
+++ b/DOCKER.md
@@ -19,22 +19,15 @@ All custom images are published to a single Docker Hub repository, differentiate
 
 ## Images and Tags
 
-Eight custom images are published to `crhacky/blhackbox` on Docker Hub:
+Four custom images are published to `crhacky/blhackbox` on Docker Hub:
 
 | Service | Tag | Dockerfile | Base |
 |---|---|---|---|
 | **Kali MCP** | `crhacky/blhackbox:kali-mcp` | `docker/kali-mcp.Dockerfile` | `kalilinux/kali-rolling` |
 | **WireMCP** | `crhacky/blhackbox:wire-mcp` | `docker/wire-mcp.Dockerfile` | `debian:bookworm-slim` |
 | **Screenshot MCP** | `crhacky/blhackbox:screenshot-mcp` | `docker/screenshot-mcp.Dockerfile` | `python:3.13-slim` |
-| **Ollama MCP** | `crhacky/blhackbox:ollama-mcp` | `docker/ollama-mcp.Dockerfile` | `python:3.13-slim` |
-| **Agent: Ingestion** | `crhacky/blhackbox:agent-ingestion` | `docker/agent-ingestion.Dockerfile` | `python:3.13-slim` |
-| **Agent: Processing** | `crhacky/blhackbox:agent-processing` | `docker/agent-processing.Dockerfile` | `python:3.13-slim` |
-| **Agent: Synthesis** | `crhacky/blhackbox:agent-synthesis` | `docker/agent-synthesis.Dockerfile` | `python:3.13-slim` |
 | **Claude Code** | `crhacky/blhackbox:claude-code` | `docker/claude-code.Dockerfile` | `node:22-slim` |
 
-Custom-built locally (no pre-built image on Docker Hub):
-- `crhacky/blhackbox:ollama` — wraps `ollama/ollama:latest` with auto-pull entrypoint (`docker/ollama.Dockerfile`)
-
 Official images pulled directly (no custom build):
 - `portainer/portainer-ce:latest` — Docker management UI
 - `docker/mcp-gateway:latest` — MCP Gateway (optional, `--profile gateway`)
@@ -63,15 +56,6 @@ Claude Code ──┬──> Kali MCP (SSE, port 9001)
               │
               │  After collecting raw outputs, Claude structures them directly:
               │    get_payload_schema() → parse/dedup/correlate → aggregate_results()
-              │
-              └──> (optional) Ollama MCP (SSE, port 9000)
-                        │
-                        ├──► agent-ingestion:8001
-                        ├──► agent-processing:8002
-                        └──► agent-synthesis:8003
-                                  │
-                                  ▼
-                               Ollama (LLM backend)
 
 output/          Host-mounted directory for reports, screenshots, sessions
 Portainer        Docker UI (https://localhost:9443)
@@ -86,10 +70,6 @@ Claude Desktop ──> MCP Gateway (localhost:8080/mcp) ──┬──> Kali MC
                                                       └──> Screenshot MCP
 ```
 
-> **Ollama is optional since v2.1.** The MCP host (Claude) now handles data
-> aggregation directly. The Ollama pipeline is kept as an optional fallback
-> for local-only / offline processing. Enable with `--profile ollama`.
-
 ---
 
 ## Usage
@@ -152,11 +132,6 @@ make health                # MCP server health check
 | `claude-code` | `crhacky/blhackbox:claude-code` | - | `claude-code` | Claude Code CLI client (Docker) |
 | `mcp-gateway` | `docker/mcp-gateway:latest` | `8080` | `gateway` | Single MCP entry point (host clients) |
 | `neo4j` | `neo4j:5` | `7474` `7687` | `neo4j` | Cross-session knowledge graph |
-| `ollama-mcp` | `crhacky/blhackbox:ollama-mcp` | `9000` | `ollama` | Thin MCP orchestrator (optional) |
-| `agent-ingestion` | `crhacky/blhackbox:agent-ingestion` | `8001` | `ollama` | Agent 1: parse raw output (optional) |
-| `agent-processing` | `crhacky/blhackbox:agent-processing` | `8002` | `ollama` | Agent 2: deduplicate, compress (optional) |
-| `agent-synthesis` | `crhacky/blhackbox:agent-synthesis` | `8003` | `ollama` | Agent 3: assemble payload (optional) |
-| `ollama` | `crhacky/blhackbox:ollama` (built locally) | `11434` | `ollama` | LLM inference backend (optional) |
 
 ---
 
@@ -171,8 +146,7 @@ The Claude Code container's `.mcp.json` connects directly to each server:
   "mcpServers": {
     "kali":            { "type": "sse", "url": "http://kali-mcp:9001/sse" },
     "wireshark":       { "type": "sse", "url": "http://kali-mcp:9003/sse" },
-    "screenshot":      { "type": "sse", "url": "http://screenshot-mcp:9004/sse" },
-    "ollama-pipeline": { "type": "sse", "url": "http://ollama-mcp:9000/sse" }
+    "screenshot":      { "type": "sse", "url": "http://screenshot-mcp:9004/sse" }
   }
 }
 ```
@@ -201,7 +175,6 @@ Requires `--profile gateway` (`make up-gateway`).
 | Variable | Default | Description |
 |---|---|---|
 | `ANTHROPIC_API_KEY` | - | Required for Claude Code in Docker |
-| `OLLAMA_MODEL` | `llama3.1:8b` | Ollama model for preprocessing agents |
 | `MCP_GATEWAY_PORT` | `8080` | MCP Gateway host port (optional) |
 | `MSF_TIMEOUT` | `300` | Metasploit command timeout in seconds |
 | `NEO4J_URI` | `bolt://neo4j:7687` | Neo4j connection URI (optional) |
@@ -240,24 +213,6 @@ Requires `--profile gateway` (`make up-gateway`).
 - **Entrypoint**: Screenshot MCP server (FastMCP + Playwright headless Chromium)
 - **Transport**: SSE on port 9004
 
-### Ollama MCP (`crhacky/blhackbox:ollama-mcp`)
-
-- **Base**: `python:3.13-slim`
-- **Entrypoint**: `ollama_mcp_server.py`
-- **Transport**: SSE on port 9000
-- **Role**: Thin MCP orchestrator (built with FastMCP) — calls 3 agent containers via HTTP, does NOT call Ollama directly
-- **NOT an official Ollama product**
-
-### Agent Containers (`agent-ingestion`, `agent-processing`, `agent-synthesis`)
-
-- **Base**: `python:3.13-slim`
-- **Entrypoint**: FastAPI server (`uvicorn`)
-- **Ports**: 8001, 8002, 8003 respectively (internal only)
-- **Depends on**: Ollama container (each calls Ollama via the official `ollama` Python package)
-- **Health endpoint**: `GET /health` — returns immediately without calling Ollama
-- Prompts baked in from `blhackbox/prompts/agents/` at build time
-- Can be overridden via volume mount for tuning without rebuilding
-
 ### Claude Code (`crhacky/blhackbox:claude-code`)
 
 - **Base**: `node:22-slim`
@@ -286,7 +241,6 @@ Named volumes for persistent data:
 
 | Volume | Service | Purpose |
 |---|---|---|
-| `ollama_models` | ollama | Ollama model storage (optional) |
 | `neo4j_data` | neo4j | Neo4j graph database (optional) |
 | `neo4j_logs` | neo4j | Neo4j logs (optional) |
 | `portainer_data` | portainer | Portainer configuration |
@@ -304,20 +258,18 @@ Host bind mounts for output (accessible on your local filesystem):
 
 ## CI/CD Pipeline
 
-Eight custom images are built and pushed to Docker Hub via GitHub Actions:
+Four custom images are built and pushed to Docker Hub via GitHub Actions:
 
 ```
 PR opened  ───>  CI (lint + test + pip-audit)
                       │
-PR merged  ───>  CI  ───>  Build & Push (8 images)  ───>  Docker Hub
+PR merged  ───>  CI  ───>  Build & Push (4 images)  ───>  Docker Hub
                            (on CI success)
-Tag v*     ──────────────>  Build & Push (8 images)  ───>  Docker Hub
+Tag v*     ──────────────>  Build & Push (4 images)  ───>  Docker Hub
 
-Manual     ──────────────>  Build & Push (8 images)  ───>  Docker Hub
+Manual     ──────────────>  Build & Push (4 images)  ───>  Docker Hub
 ```
 
-Docker Scout vulnerability scanning runs on the ollama-mcp image.
-
 ---
 
 ## Useful Commands
@@ -338,15 +290,9 @@ make up-gateway
 # Start with Neo4j (5 containers)
 docker compose --profile neo4j up -d
 
-# Start with Ollama pipeline (9 containers, optional)
-docker compose --profile ollama up -d
-
 # Launch Claude Code in Docker
 make claude-code
 
-# Pull the Ollama model (only if using --profile ollama)
-make ollama-pull
-
 # Check health of all MCP servers
 make health
 
@@ -366,24 +312,11 @@ make clean                 # also removes volumes
 
 ---
 
-## GPU Support
-
-GPU acceleration is **disabled by default** for broad compatibility. Ollama runs
-on CPU out of the box.
-
-If you have an NVIDIA GPU, uncomment the `deploy` block under the `ollama`
-service in `docker-compose.yml` and install the
-[NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
-on the host. GPU acceleration significantly speeds up Ollama inference.
-
----
-
 ## Security
 
 - **Docker socket**: MCP Gateway (optional) and Portainer mount `/var/run/docker.sock`. This grants effective root on the host. Never expose ports 8080 or 9443 to the public internet.
 - **Authorization**: Ensure you have written permission before scanning any target.
 - **Neo4j**: Set a strong password in `.env`. Never use defaults in production.
-- **Agent containers**: Communicate only on the internal `blhackbox_net` Docker network. No ports exposed to host.
 - **Portainer**: Uses HTTPS with a self-signed certificate. Create a strong admin password on first run.
 
 **This tool is for authorized security testing only.** Unauthorized access to computer systems is illegal.
diff --git a/Makefile b/Makefile
index 3c3637c..e76b442 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,9 @@
-.PHONY: help setup up up-full up-ollama up-gateway down logs test test-local lint format clean nuke \
-       pull status health portainer gateway-logs ollama-pull ollama-shell \
+.PHONY: help setup up up-full up-gateway down logs test test-local lint format clean nuke \
+       pull status health portainer gateway-logs \
        claude-code \
-       neo4j-browser logs-ollama-mcp logs-kali \
+       neo4j-browser logs-kali \
        logs-wireshark logs-screenshot \
-       logs-agent-ingestion logs-agent-processing logs-agent-synthesis \
-       restart-ollama-mcp restart-kali restart-agents \
+       restart-kali \
        restart-wireshark restart-screenshot \
        push-all wordlists recon report \
        inject-verification
@@ -22,14 +21,11 @@ help: ## Show this help
 pull: ## Pull all pre-built images from Docker Hub
 	$(COMPOSE) pull
 
-up: ## Start core stack (4 containers — no Ollama, no gateway)
+up: ## Start core stack (4 containers)
 	$(COMPOSE) up -d
 
-up-ollama: ## Start with Ollama pipeline (9 containers — legacy local processing)
-	$(COMPOSE) --profile ollama up -d
-
 down: ## Stop all services (all profiles)
-	$(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama down
+	$(COMPOSE) --profile gateway --profile neo4j --profile claude-code down
 
 logs: ## Tail logs from all services
 	$(COMPOSE) logs -f
@@ -55,7 +51,7 @@ format: ## Auto-format code
 	ruff format blhackbox/ tests/
 
 clean: ## Remove containers, volumes, networks, and build artifacts (keeps images)
-	$(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama down -v --remove-orphans
+	$(COMPOSE) --profile gateway --profile neo4j --profile claude-code down -v --remove-orphans
 	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
 	rm -rf dist/ build/ *.egg-info
 
@@ -63,14 +59,12 @@ nuke: ## Full cleanup: containers + volumes + ALL images (frees max disk space)
 	@echo "\033[1;33m  WARNING: This will remove ALL blhackbox containers, volumes, AND images.\033[0m"
 	@echo "\033[2m  You will need to 'docker compose pull' or 'docker compose build' again.\033[0m"
 	@echo ""
-	$(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama down -v --remove-orphans --rmi all
+	$(COMPOSE) --profile gateway --profile neo4j --profile claude-code down -v --remove-orphans --rmi all
 	@echo ""
 	@echo "\033[2m  Pruning dangling images and build cache...\033[0m"
 	docker image prune -f
 	docker builder prune -f
 	@echo ""
-	@echo "\033[2m  Removing downloaded Ollama models (if volume still exists)...\033[0m"
-	docker volume rm blhackbox_ollama_models 2>/dev/null || true
 	docker volume rm blhackbox_portainer_data 2>/dev/null || true
 	docker volume rm blhackbox_neo4j_data 2>/dev/null || true
 	docker volume rm blhackbox_neo4j_logs 2>/dev/null || true
@@ -97,7 +91,7 @@ status: ## Health status of all containers
 	@echo ""
 	@echo "\033[1m  blhackbox Container Status\033[0m"
 	@echo "\033[2m  ──────────────────────────────────────\033[0m"
-	@$(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama ps --format "table {{.Name}}\t{{.Status}}\t{{.Ports}}" 2>/dev/null || $(COMPOSE) ps
+	@$(COMPOSE) --profile gateway --profile neo4j --profile claude-code ps --format "table {{.Name}}\t{{.Status}}\t{{.Ports}}" 2>/dev/null || $(COMPOSE) ps
 	@echo ""
 
 health: ## Quick health check of all MCP servers
@@ -113,21 +107,6 @@ health: ## Quick health check of all MCP servers
 	@printf "  %-22s " "Screenshot MCP (9004)"; \
 		docker exec blhackbox-screenshot-mcp python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:9004/health')" > /dev/null 2>&1 \
 		&& echo "\033[32m[OK]\033[0m" || echo "\033[31m[FAIL]\033[0m"
-	@printf "  %-22s " "Ollama MCP (9000)"; \
-		docker inspect --format='{{.State.Running}}' blhackbox-ollama-mcp 2>/dev/null | grep -q "true" \
-		&& echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m  (optional — enable with: make up-ollama)"
-	@printf "  %-22s " "Ollama (11434)"; \
-		docker inspect --format='{{.State.Running}}' blhackbox-ollama 2>/dev/null | grep -q "true" \
-		&& echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m  (optional — enable with: make up-ollama)"
-	@printf "  %-22s " "Agent Ingestion"; \
-		docker inspect --format='{{.State.Running}}' blhackbox-agent-ingestion 2>/dev/null | grep -q "true" \
-		&& echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m  (optional)"
-	@printf "  %-22s " "Agent Processing"; \
-		docker inspect --format='{{.State.Running}}' blhackbox-agent-processing 2>/dev/null | grep -q "true" \
-		&& echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m  (optional)"
-	@printf "  %-22s " "Agent Synthesis"; \
-		docker inspect --format='{{.State.Running}}' blhackbox-agent-synthesis 2>/dev/null | grep -q "true" \
-		&& echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m  (optional)"
 	@printf "  %-22s " "MCP Gateway (8080)"; \
 		docker inspect --format='{{.State.Running}}' blhackbox-mcp-gateway 2>/dev/null | grep -q "true" \
 		&& echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m  (optional — enable with: make up-gateway)"
@@ -136,13 +115,6 @@ health: ## Quick health check of all MCP servers
 		&& echo "\033[32m[OK]\033[0m  https://localhost:9443" || echo "\033[31m[FAIL]\033[0m"
 	@echo ""
 
-# ── Ollama ──────────────────────────────────────────────────────
-ollama-pull: ## Pull default Ollama model into container
-	docker exec blhackbox-ollama ollama pull $$(grep OLLAMA_MODEL .env | cut -d= -f2)
-
-ollama-shell: ## Shell into Ollama container
-	docker exec -it blhackbox-ollama /bin/bash
-
 # ── Monitoring ──────────────────────────────────────────────────
 portainer: ## Open Portainer dashboard (first run: create admin account)
 	@echo ""
@@ -158,9 +130,6 @@ portainer: ## Open Portainer dashboard (first run: create admin account)
 gateway-logs: ## Live MCP tool call log (requires --profile gateway)
 	$(COMPOSE) logs -f mcp-gateway
 
-logs-ollama-mcp: ## Tail Ollama MCP server logs
-	$(COMPOSE) logs -f ollama-mcp
-
 logs-kali: ## Tail Kali MCP server logs
 	$(COMPOSE) logs -f kali-mcp
 
@@ -170,22 +139,10 @@ logs-wireshark: ## Tail WireMCP server logs
 logs-screenshot: ## Tail Screenshot MCP server logs
 	$(COMPOSE) logs -f screenshot-mcp
 
-logs-agent-ingestion: ## Tail Ingestion Agent logs
-	$(COMPOSE) logs -f agent-ingestion
-
-logs-agent-processing: ## Tail Processing Agent logs
-	$(COMPOSE) logs -f agent-processing
-
-logs-agent-synthesis: ## Tail Synthesis Agent logs
-	$(COMPOSE) logs -f agent-synthesis
-
 neo4j-browser: ## Open Neo4j Browser
 	@open http://localhost:7474 2>/dev/null || xdg-open http://localhost:7474
 
 # ── Per-service restart ──────────────────────────────────────────
-restart-ollama-mcp: ## Restart Ollama MCP server
-	$(COMPOSE) restart ollama-mcp
-
 restart-kali: ## Restart Kali MCP server
 	$(COMPOSE) restart kali-mcp
 
@@ -195,9 +152,6 @@ restart-wireshark: ## Restart WireMCP server
 restart-screenshot: ## Restart Screenshot MCP server
 	$(COMPOSE) restart screenshot-mcp
 
-restart-agents: ## Restart all 3 agent containers
-	$(COMPOSE) restart agent-ingestion agent-processing agent-synthesis
-
 # ── Recon & Reporting ──────────────────────────────────────────
 wordlists: ## Download common wordlists
 	mkdir -p wordlists
@@ -225,16 +179,8 @@ push-all: ## Build and push all custom images to Docker Hub
 	docker build -f docker/kali-mcp.Dockerfile -t crhacky/blhackbox:kali-mcp .
 	docker build -f docker/wire-mcp.Dockerfile -t crhacky/blhackbox:wire-mcp .
 	docker build -f docker/screenshot-mcp.Dockerfile -t crhacky/blhackbox:screenshot-mcp .
-	docker build -f docker/ollama-mcp.Dockerfile -t crhacky/blhackbox:ollama-mcp .
-	docker build -f docker/agent-ingestion.Dockerfile -t crhacky/blhackbox:agent-ingestion .
-	docker build -f docker/agent-processing.Dockerfile -t crhacky/blhackbox:agent-processing .
-	docker build -f docker/agent-synthesis.Dockerfile -t crhacky/blhackbox:agent-synthesis .
 	docker build -f docker/claude-code.Dockerfile -t crhacky/blhackbox:claude-code .
 	docker push crhacky/blhackbox:kali-mcp
 	docker push crhacky/blhackbox:wire-mcp
 	docker push crhacky/blhackbox:screenshot-mcp
-	docker push crhacky/blhackbox:ollama-mcp
-	docker push crhacky/blhackbox:agent-ingestion
-	docker push crhacky/blhackbox:agent-processing
-	docker push crhacky/blhackbox:agent-synthesis
 	docker push crhacky/blhackbox:claude-code
diff --git a/README.md b/README.md
index 1d1badd..6f82c05 100644
--- a/README.md
+++ b/README.md
@@ -29,13 +29,11 @@
 - [How Prompts Flow Through the System](#how-prompts-flow-through-the-system)
 - [Do I Need the MCP Gateway?](#do-i-need-the-mcp-gateway)
 - [Portainer Setup](#portainer-setup)
-- [Ollama Preprocessing Pipeline (Optional)](#ollama-preprocessing-pipeline-optional)
 - [Troubleshooting](#troubleshooting)
 - [CLI Reference](#cli-reference)
 - [Makefile Shortcuts](#makefile-shortcuts)
 - [Docker Hub Images](#docker-hub-images)
 - [Neo4j (Optional)](#neo4j-optional)
-- [GPU Support for Ollama (Optional)](#gpu-support-for-ollama-optional)
 - [Authorization & Verification](#authorization--verification)
 - [Security Notes](#security-notes)
 - [Project Structure](#project-structure)
@@ -143,20 +141,15 @@ manually, create it with: `mkdir -p output/reports output/screenshots output/ses
 | **Claude Code** | Anthropic CLI MCP client in Docker | — | `claude-code` |
 | **MCP Gateway** | Single entry point for host-based MCP clients | 8080 | `gateway` |
 | **Neo4j** | Cross-session knowledge graph | 7474/7687 | `neo4j` |
-| **Ollama MCP** | Legacy thin orchestrator — calls 3 agent containers | 9000 | `ollama` |
-| **Agent: Ingestion** | Parses raw tool output into structured typed data | 8001 | `ollama` |
-| **Agent: Processing** | Deduplicates, compresses, annotates errors | 8002 | `ollama` |
-| **Agent: Synthesis** | Merges into final `AggregatedPayload` | 8003 | `ollama` |
-| **Ollama** | Local LLM inference backend (llama3.1:8b by default) | 11434 | `ollama` |
 
 ---
 
 ## Prerequisites
 
 - **Docker** and **Docker Compose** (Docker Engine on Linux, or Docker Desktop)
-- At least **8 GB RAM** recommended (4 containers in the core stack). If using the optional Ollama pipeline (`--profile ollama`), 16 GB+ is recommended.
+- At least **8 GB RAM** recommended (4 containers in the core stack).
 - An **Anthropic API key** from [console.anthropic.com](https://console.anthropic.com) (**required** for Claude Code)
-- **NVIDIA Container Toolkit** (optional — only needed if using `--profile ollama` with GPU. See [GPU Support](#gpu-support-for-ollama))
+
 
 ---
 
@@ -172,7 +165,7 @@ cd blhackbox
 
 The setup wizard will:
 1. Check prerequisites (Docker, Docker Compose, disk space)
-2. Let you choose optional components (Neo4j, MCP Gateway, Ollama)
+2. Let you choose optional components (Neo4j, MCP Gateway)
 3. Prompt for your `ANTHROPIC_API_KEY` (required for Claude Code in Docker)
 4. Generate `.env` and create the `output/` directory
 5. Pull Docker images and start all services
@@ -242,9 +235,6 @@ You should see 4 containers, all "Up" or "healthy":
 - `blhackbox-screenshot-mcp`
 - `blhackbox-portainer`
 
-> **Want local-only processing?** Use `make up-ollama` to also start the
-> Ollama pipeline (adds 5 more containers, requires 16 GB+ RAM).
-
 > **First time?** Open Portainer at `https://localhost:9443` and create an admin
 > account within 5 minutes. See [Portainer Setup](#portainer-setup).
 
@@ -287,7 +277,6 @@ Checking service connectivity...
   Kali MCP               [ OK ]
   WireMCP                [ OK ]
   Screenshot MCP         [ OK ]
-  Ollama Pipeline        [ WARN ]  (optional — not running)
 
 ──────────────────────────────────────────────────
   All 3 services connected.
@@ -534,30 +523,6 @@ Then open `https://localhost:9443` again and create your account.
 
 ---
 
-## Ollama Preprocessing Pipeline (Optional)
-
-> **Since v2.1, the MCP host (Claude) handles data aggregation directly.**
-> The Ollama pipeline is kept as an optional fallback for local-only / offline
-> processing where you don't want to use the MCP host's intelligence.
-
-Enable with: `docker compose --profile ollama up -d` (or `make up-ollama`).
-
-The Ollama MCP Server is a thin orchestrator built with
-[FastMCP](https://github.com/modelcontextprotocol/python-sdk) that calls 3
-agent containers in sequence via HTTP. Each agent container is a FastAPI server
-that calls Ollama via the official
-[`ollama` Python package](https://github.com/ollama/ollama-python) with a
-task-specific system prompt.
-
-1. **Ingestion Agent** (`agent-ingestion:8001`) — Parses raw tool output into structured typed data
-2. **Processing Agent** (`agent-processing:8002`) — Deduplicates, compresses, annotates error_log with security_relevance
-3. **Synthesis Agent** (`agent-synthesis:8003`) — Merges into final `AggregatedPayload`
-
-Agent prompts are baked into each container from `blhackbox/prompts/agents/*.md`
-at build time. Override via volume mount for tuning without rebuilding.
-
----
-
 ## Troubleshooting
 
 ### Claude Code shows "Status: failed" for MCP servers
@@ -597,28 +562,6 @@ missed it, restart:
 docker compose restart portainer
 ```
 
-### Ollama model not pulled (only if using --profile ollama)
-
-The agents need a model loaded in Ollama. Without it, the preprocessing pipeline
-returns empty results:
-
-```bash
-make ollama-pull     # pulls the model specified in .env (default: llama3.1:8b)
-```
-
-If the model fails to load with an "out of memory" error, your system doesn't
-have enough RAM for the configured model. Try a smaller model:
-
-```bash
-# Edit .env and change OLLAMA_MODEL to a smaller model:
-OLLAMA_MODEL=llama3.2:3b
-# Then re-pull:
-make ollama-pull
-```
-
-> **Note:** If you're not using `--profile ollama`, you don't need to pull any
-> model. The MCP host (Claude) handles aggregation directly.
-
 ### MCP Gateway doesn't start
 
 The gateway is **optional** — Claude Code in Docker does not use it. If you
@@ -628,25 +571,17 @@ need it for Claude Desktop / ChatGPT:
 2. Start with the gateway profile: `make up-gateway`
 3. Check logs: `make gateway-logs`
 
-### NVIDIA GPU errors on startup
-
-GPU acceleration is disabled by default. If you enabled it by uncommenting the
-`deploy` block and see errors, ensure the
-[NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
-is installed. See [GPU Support](#gpu-support-for-ollama).
-
 ### Container keeps restarting
 
 Check its logs for the specific error:
 
 ```bash
-docker compose logs <service-name>     # e.g., kali-mcp, ollama-mcp
+docker compose logs <service-name>     # e.g., kali-mcp, wire-mcp
 ```
 
 Common causes:
 - Port conflict on the host
 - Insufficient memory
-- Missing Ollama model (only if using `--profile ollama`)
 
 ---
 
@@ -683,7 +618,6 @@ make setup                 # Interactive setup wizard (prereqs, .env, pull, star
 make help                  # Show all available targets
 make pull                  # Pull all pre-built images from Docker Hub
 make up                    # Start core stack (4 containers)
-make up-ollama             # Start with Ollama pipeline (9 containers, legacy)
 make up-full               # Start with Neo4j (5 containers)
 make up-gateway            # Start with MCP Gateway for Claude Desktop (5 containers)
 make down                  # Stop all services
@@ -692,17 +626,11 @@ make status                # Container status table
 make health                # Quick health check of all services
 make test                  # Run tests
 make lint                  # Run linter
-make ollama-pull           # Pull Ollama model (only if using --profile ollama)
 make portainer             # Open Portainer dashboard (shows setup instructions)
 make gateway-logs          # Live MCP Gateway logs (requires --profile gateway)
-make restart-agents        # Restart all 3 agent containers (requires --profile ollama)
 make logs-kali             # Tail Kali MCP logs (includes Metasploit)
 make logs-wireshark        # Tail WireMCP logs
 make logs-screenshot       # Tail Screenshot MCP logs
-make logs-ollama-mcp       # Tail Ollama MCP logs (requires --profile ollama)
-make logs-agent-ingestion  # Tail Ingestion Agent logs (requires --profile ollama)
-make logs-agent-processing # Tail Processing Agent logs (requires --profile ollama)
-make logs-agent-synthesis  # Tail Synthesis Agent logs (requires --profile ollama)
 make inject-verification   # Render verification.env → active authorization document
 make push-all              # Build and push all images to Docker Hub
 ```
@@ -731,13 +659,6 @@ All custom images are published to `crhacky/blhackbox`:
 | `crhacky/blhackbox:wire-mcp` | WireMCP Server (tshark, 7 tools) |
 | `crhacky/blhackbox:screenshot-mcp` | Screenshot MCP Server (headless Chromium, 4 tools) |
 | `crhacky/blhackbox:claude-code` | Claude Code CLI client (direct SSE to MCP servers) |
-| `crhacky/blhackbox:ollama-mcp` | Ollama MCP Server — optional, `--profile ollama` |
-| `crhacky/blhackbox:agent-ingestion` | Agent 1: Ingestion — optional, `--profile ollama` |
-| `crhacky/blhackbox:agent-processing` | Agent 2: Processing — optional, `--profile ollama` |
-| `crhacky/blhackbox:agent-synthesis` | Agent 3: Synthesis — optional, `--profile ollama` |
-
-Custom-built locally (no pre-built image on Docker Hub):
-- `crhacky/blhackbox:ollama` (wraps `ollama/ollama:latest` with auto-pull entrypoint — optional, `--profile ollama`)
 
 Official images pulled directly:
 - `portainer/portainer-ce:latest`
@@ -759,34 +680,6 @@ Useful for recurring engagements against the same targets.
 
 ---
 
-## GPU Support for Ollama (Optional)
-
-> **Only relevant if using `--profile ollama`.** The default stack does not
-> use Ollama — the MCP host handles aggregation directly.
-
-GPU acceleration is **disabled by default** in `docker-compose.yml` for broad
-compatibility. Ollama runs on CPU out of the box.
-
-**If you have an NVIDIA GPU**, uncomment the `deploy` block under the `ollama`
-service in `docker-compose.yml` to enable GPU acceleration:
-
-```yaml
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-```
-
-This requires the
-[NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
-to be installed on the host. GPU acceleration significantly speeds up Ollama
-inference for the preprocessing pipeline.
-
----
-
 ## Authorization & Verification
 
 Before running any pentest template, blhackbox requires an **active verification
@@ -925,8 +818,6 @@ Then run `make inject-verification` and start your Claude Code session.
   an active authorization. The rendered document (`.claude/verification-active.md`)
   is git-ignored and never committed.
 - **Neo4j**: Set a strong password in `.env`. Never use defaults in production.
-- **Agent containers** (optional Ollama pipeline): Communicate only on the
-  internal `blhackbox_net` Docker network. No ports are exposed to the host.
 - **Portainer**: Uses HTTPS with a self-signed certificate. Create a strong
   admin password on first run.
 
@@ -952,31 +843,15 @@ blhackbox/
 │   ├── kali-mcp.Dockerfile          # Kali Linux + Metasploit Framework
 │   ├── wire-mcp.Dockerfile
 │   ├── screenshot-mcp.Dockerfile
-│   ├── ollama.Dockerfile             # optional (--profile ollama)
-│   ├── ollama-mcp.Dockerfile         # optional (--profile ollama)
-│   ├── agent-ingestion.Dockerfile    # optional (--profile ollama)
-│   ├── agent-processing.Dockerfile   # optional (--profile ollama)
-│   ├── agent-synthesis.Dockerfile    # optional (--profile ollama)
 │   ├── claude-code.Dockerfile       # MCP client container
 │   └── claude-code-entrypoint.sh    # Startup script with health checks
 ├── kali-mcp/                        # Kali MCP server (70+ tools + Metasploit)
 ├── wire-mcp/                        # WireMCP server (tshark, 7 tools)
 ├── screenshot-mcp/                  # Screenshot MCP server (Playwright, 4 tools)
 ├── metasploit-mcp/                  # [DEPRECATED] Standalone MSF RPC server (kept for reference)
-├── mcp_servers/
-│   └── ollama_mcp_server.py         # thin MCP orchestrator (optional)
 ├── blhackbox/
 │   ├── mcp/
 │   │   └── server.py               # blhackbox MCP server (stdio)
-│   ├── agents/                      # agent server + library code
-│   │   ├── base_agent.py            # base class (library/testing)
-│   │   ├── base_agent_server.py     # FastAPI server base
-│   │   ├── ingestion_agent.py       # library class
-│   │   ├── ingestion_server.py      # container entry point
-│   │   ├── processing_agent.py
-│   │   ├── processing_server.py
-│   │   ├── synthesis_agent.py
-│   │   └── synthesis_server.py
 │   ├── models/
 │   │   ├── aggregated_payload.py    # AggregatedPayload Pydantic model
 │   │   ├── base.py
@@ -984,11 +859,7 @@ blhackbox/
 │   ├── prompts/
 │   │   ├── claude_playbook.md       # pentest playbook for MCP host
 │   │   ├── verification.md          # authorization template ({{PLACEHOLDER}} tokens)
-│   │   ├── inject_verification.py   # renders template → active document
-│   │   └── agents/
-│   │       ├── ingestionagent.md
-│   │       ├── processingagent.md
-│   │       └── synthesisagent.md
+│   │   └── inject_verification.py   # renders template → active document
 │   ├── core/
 │   │   ├── knowledge_graph.py
 │   │   ├── graph_exporter.py
diff --git a/blhackbox-mcp-catalog.yaml b/blhackbox-mcp-catalog.yaml
index a16d6d9..7e1e8be 100644
--- a/blhackbox-mcp-catalog.yaml
+++ b/blhackbox-mcp-catalog.yaml
@@ -38,12 +38,3 @@ registry:
     remote:
       url: "http://screenshot-mcp:9004/sse"
       transport_type: sse
-
-  # ollama-mcp is optional (--profile ollama). Uncomment if using the legacy pipeline.
-  # ollama-mcp:
-  #   description: "blhackbox Ollama preprocessing pipeline — 3-agent data pipeline for scan result aggregation"
-  #   title: "Ollama MCP Server"
-  #   type: "server"
-  #   remote:
-  #     url: "http://ollama-mcp:9000/sse"
-  #     transport_type: sse
diff --git a/blhackbox-mcp.json b/blhackbox-mcp.json
index ccdda69..a149e7d 100644
--- a/blhackbox-mcp.json
+++ b/blhackbox-mcp.json
@@ -15,18 +15,6 @@
       "url": "http://localhost:9004/sse",
       "description": "Screenshot MCP Server — headless Chromium screenshots for bug bounty PoC evidence capture (4 tools)"
     },
-    "blhackbox-aggregator": {
-      "command": "python3",
-      "args": ["mcp_servers/ollama_mcp_server.py"],
-      "env": {
-        "OLLAMA_URL": "http://localhost:11434",
-        "OLLAMA_MODEL": "llama3.1:8b",
-        "NEO4J_URI": "${NEO4J_URI}",
-        "NEO4J_USER": "${NEO4J_USER}",
-        "NEO4J_PASSWORD": "${NEO4J_PASSWORD}"
-      },
-      "description": "blhackbox custom aggregator MCP server — NOT an official Ollama product. Orchestrates local Ollama preprocessing agents to clean raw pentest data before Claude analysis."
-    },
     "blhackbox": {
       "command": "blhackbox",
       "args": ["mcp"],
diff --git a/blhackbox/__init__.py b/blhackbox/__init__.py
index aa8605a..c19d5cd 100644
--- a/blhackbox/__init__.py
+++ b/blhackbox/__init__.py
@@ -1,3 +1,3 @@
-"""Blhackbox – MCP-based autonomous pentesting with knowledge graph and Ollama preprocessing."""
+"""Blhackbox – MCP-based autonomous pentesting with knowledge graph."""
 
 __version__ = "2.0.0"
diff --git a/blhackbox/agents/__init__.py b/blhackbox/agents/__init__.py
deleted file mode 100644
index 6b965b9..0000000
--- a/blhackbox/agents/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""Ollama preprocessing agents for the blhackbox pipeline.
-
-Three agents run sequentially as separate containers:
-  1. IngestionAgent  — parse raw tool output into structured data
-  2. ProcessingAgent — deduplicate, compress, annotate error_log
-  3. SynthesisAgent  — merge into final AggregatedPayload
-
-Each agent runs as a FastAPI HTTP server (see base_agent_server.py).
-The BaseAgent class is kept for library/testing use.
-"""
-
-from blhackbox.agents.base_agent import BaseAgent
-from blhackbox.agents.base_agent_server import BaseAgentServer
-from blhackbox.agents.ingestion_agent import IngestionAgent
-from blhackbox.agents.processing_agent import ProcessingAgent
-from blhackbox.agents.synthesis_agent import SynthesisAgent
-
-__all__ = [
-    "BaseAgent",
-    "BaseAgentServer",
-    "IngestionAgent",
-    "ProcessingAgent",
-    "SynthesisAgent",
-]
diff --git a/blhackbox/agents/base_agent.py b/blhackbox/agents/base_agent.py
deleted file mode 100644
index ebf74c8..0000000
--- a/blhackbox/agents/base_agent.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""Base class for all Ollama preprocessing agents.
-
-Each agent is a plain Python class that:
-1. Loads a task-specific system prompt from a .md file at runtime
-2. Sends the prompt + raw pentest data to Ollama via the official ``ollama``
-   Python package
-3. Parses the JSON response into a Python dict
-
-There is no agent framework involved — just ``ollama.AsyncClient`` calls to a
-standard Ollama instance running unchanged as a Docker container.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import os
-from pathlib import Path
-from typing import Any
-
-from ollama import AsyncClient, ResponseError
-
-logger = logging.getLogger("blhackbox.agents.base")
-
-# Resolve prompts directory relative to this file
-_PROMPTS_DIR = Path(__file__).resolve().parent.parent / "prompts" / "agents"
-
-# Configurable via environment — mirrors the server defaults.
-_OLLAMA_TIMEOUT = float(os.getenv("OLLAMA_TIMEOUT", "300"))
-_OLLAMA_NUM_CTX = int(os.getenv("OLLAMA_NUM_CTX", "8192"))
-_OLLAMA_KEEP_ALIVE = os.getenv("OLLAMA_KEEP_ALIVE", "30m")
-_OLLAMA_RETRIES = int(os.getenv("OLLAMA_RETRIES", "2"))
-
-
-def _serialize_data(data: dict | str) -> str:
-    """Convert data to a proper JSON string for Ollama.
-
-    Dicts are serialised with ``json.dumps`` so that Ollama receives valid
-    JSON instead of the Python repr that ``str()`` would produce.
-    """
-    if isinstance(data, str):
-        return data
-    return json.dumps(data, default=str)
-
-
-class BaseAgent:
-    """Abstract base for Ollama preprocessing agents.
-
-    Subclasses are named IngestionAgent, ProcessingAgent, SynthesisAgent.
-    The prompt file is determined by lowercasing the class name
-    (e.g. ``IngestionAgent`` loads ``prompts/agents/ingestionagent.md``).
-    """
-
-    def __init__(
-        self,
-        ollama_host: str = "http://localhost:11434",
-        model: str = "llama3.1:8b",
-    ) -> None:
-        self.ollama_host = ollama_host.rstrip("/")
-        self.model = model
-        # Load system prompt from prompts/agents/<classname>.md at runtime
-        prompt_file = _PROMPTS_DIR / f"{self.__class__.__name__.lower()}.md"
-        if prompt_file.exists():
-            self.system_prompt = prompt_file.read_text(encoding="utf-8")
-        else:
-            logger.warning("Prompt file not found: %s", prompt_file)
-            self.system_prompt = (
-                f"You are a {self.__class__.__name__} data processing agent. "
-                "Respond only in valid JSON."
-            )
-
-    async def process(self, data: dict | str) -> dict[str, Any]:
-        """Send data to Ollama for processing and return parsed JSON.
-
-        Retries transient failures with exponential backoff.  If Ollama is
-        unreachable or returns invalid JSON after all attempts, returns an
-        empty dict — the caller is responsible for degraded handling.
-        """
-        user_content = _serialize_data(data)
-
-        for attempt in range(1 + _OLLAMA_RETRIES):
-            try:
-                client = AsyncClient(
-                    host=self.ollama_host, timeout=_OLLAMA_TIMEOUT,
-                )
-                response = await client.chat(
-                    model=self.model,
-                    messages=[
-                        {"role": "system", "content": self.system_prompt},
-                        {"role": "user", "content": user_content},
-                    ],
-                    format="json",
-                    options={"num_ctx": _OLLAMA_NUM_CTX},
-                    keep_alive=_OLLAMA_KEEP_ALIVE,
-                )
-                return self._parse(response)
-            except ResponseError as exc:
-                logger.warning(
-                    "%s: Ollama error (attempt %d/%d): %s",
-                    self.__class__.__name__, attempt + 1, 1 + _OLLAMA_RETRIES, exc,
-                )
-            except Exception as exc:
-                logger.warning(
-                    "%s: Ollama request failed (attempt %d/%d): %s",
-                    self.__class__.__name__, attempt + 1, 1 + _OLLAMA_RETRIES, exc,
-                )
-
-            if attempt < _OLLAMA_RETRIES:
-                await asyncio.sleep(2 ** attempt)
-
-        logger.error(
-            "%s: all %d attempts failed", self.__class__.__name__, 1 + _OLLAMA_RETRIES,
-        )
-        return {}
-
-    def _parse(self, response: Any) -> dict[str, Any]:
-        """Extract and parse the JSON content from Ollama's response."""
-        content = response.message.content or ""
-
-        if not content:
-            logger.warning(
-                "%s: Empty response from Ollama", self.__class__.__name__
-            )
-            return {}
-
-        try:
-            return json.loads(content)
-        except json.JSONDecodeError:
-            # Try to extract JSON from the response text
-            text = content.strip()
-            start = text.find("{")
-            end = text.rfind("}") + 1
-            if start >= 0 and end > start:
-                try:
-                    return json.loads(text[start:end])
-                except json.JSONDecodeError:
-                    pass
-            logger.warning(
-                "%s: Could not parse Ollama response as JSON: %s",
-                self.__class__.__name__,
-                text[:200],
-            )
-            return {}
diff --git a/blhackbox/agents/base_agent_server.py b/blhackbox/agents/base_agent_server.py
deleted file mode 100644
index 4a71fb2..0000000
--- a/blhackbox/agents/base_agent_server.py
+++ /dev/null
@@ -1,399 +0,0 @@
-"""Base FastAPI agent server for blhackbox Ollama preprocessing agents.
-
-Each agent subclass exposes POST /process that accepts raw data,
-calls Ollama via the official ``ollama`` Python package, and
-returns structured JSON.
-
-These run as separate Docker containers, NOT inside the ollama-mcp server.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import os
-from contextlib import asynccontextmanager
-from pathlib import Path
-from typing import Any
-
-import uvicorn
-from fastapi import FastAPI, HTTPException
-from ollama import AsyncClient, ResponseError
-from pydantic import BaseModel
-
-logger = logging.getLogger("blhackbox.agent_server")
-
-OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://ollama:11434")
-OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
-
-# Model fallback chain — if the primary model fails (e.g. OOM), try these
-# in order. Set via OLLAMA_FALLBACK_MODELS (comma-separated).
-_DEFAULT_FALLBACKS = "llama3.1:8b,mistral:7b,phi3:mini,tinyllama"
-OLLAMA_FALLBACK_MODELS = [
-    m.strip()
-    for m in os.getenv("OLLAMA_FALLBACK_MODELS", _DEFAULT_FALLBACKS).split(",")
-    if m.strip()
-]
-
-# Timeout (seconds) for Ollama requests — generous to cover cold-start model
-# loading, which can take minutes on first invocation.
-OLLAMA_TIMEOUT = float(os.getenv("OLLAMA_TIMEOUT", "300"))
-
-# Context window size — large pentest outputs need more than the default 2048.
-OLLAMA_NUM_CTX = int(os.getenv("OLLAMA_NUM_CTX", "8192"))
-
-# Keep the model in memory between sequential agent calls to avoid repeated
-# cold-start loading.  Default: 10 minutes.
-OLLAMA_KEEP_ALIVE = os.getenv("OLLAMA_KEEP_ALIVE", "30m")
-
-# Number of retries for transient Ollama failures.
-OLLAMA_RETRIES = int(os.getenv("OLLAMA_RETRIES", "2"))
-
-# Prompt directory — resolved at container build time
-_PROMPTS_DIR = Path(__file__).resolve().parent.parent / "prompts" / "agents"
-
-
-def _get_available_ram_gb() -> float:
-    """Return available system RAM in GiB, or -1 if unknown."""
-    try:
-        import psutil
-        return psutil.virtual_memory().available / (1024 ** 3)
-    except ImportError:
-        pass
-    # Fallback: read /proc/meminfo on Linux
-    try:
-        with open("/proc/meminfo") as f:
-            for line in f:
-                if line.startswith("MemAvailable:"):
-                    kb = int(line.split()[1])
-                    return kb / (1024 ** 2)
-    except (OSError, ValueError):
-        pass
-    return -1.0
-
-
-# Rough RAM requirements per model (GiB).  Used for pre-flight check.
-_MODEL_RAM_REQUIREMENTS = {
-    "llama3.3": 41.5,
-    "llama3.1:70b": 41.5,
-    "llama3.1:8b": 5.5,
-    "llama3.2": 5.5,
-    "mistral:7b": 5.5,
-    "phi3:mini": 3.0,
-    "phi3:medium": 8.5,
-    "tinyllama": 1.5,
-    "qwen2:7b": 5.5,
-}
-
-
-def _select_model(requested: str) -> str:
-    """Select the best model that fits in available RAM.
-
-    If the requested model fits, use it. Otherwise, walk the fallback chain
-    and pick the first model that fits. If nothing fits, return the
-    smallest fallback (best-effort).
-    """
-    avail_ram = _get_available_ram_gb()
-    if avail_ram < 0:
-        logger.info("Cannot determine available RAM — using requested model %s", requested)
-        return requested
-
-    logger.info("Available RAM: %.1f GiB", avail_ram)
-
-    # Check if requested model fits
-    req_ram = _MODEL_RAM_REQUIREMENTS.get(requested, 0)
-    if req_ram == 0 or req_ram <= avail_ram:
-        logger.info("Model %s (%.1f GiB) fits in available RAM", requested, req_ram)
-        return requested
-
-    logger.warning(
-        "Model %s requires %.1f GiB but only %.1f GiB available — checking fallbacks",
-        requested, req_ram, avail_ram,
-    )
-
-    # Try fallback chain
-    for fallback in OLLAMA_FALLBACK_MODELS:
-        fb_ram = _MODEL_RAM_REQUIREMENTS.get(fallback, 0)
-        if fb_ram == 0 or fb_ram <= avail_ram:
-            logger.info(
-                "Selected fallback model %s (%.1f GiB) — fits in %.1f GiB RAM",
-                fallback, fb_ram, avail_ram,
-            )
-            return fallback
-
-    # Nothing fits — use smallest fallback as best-effort
-    smallest = OLLAMA_FALLBACK_MODELS[-1] if OLLAMA_FALLBACK_MODELS else requested
-    logger.warning(
-        "No model fits in %.1f GiB RAM — using %s as best-effort fallback",
-        avail_ram, smallest,
-    )
-    return smallest
-
-
-def _serialize_data(data: dict | str) -> str:
-    """Convert request data to a proper JSON string for Ollama.
-
-    If *data* is already a string it is returned as-is.  If it is a dict
-    (the typical case for Processing / Synthesis agents), it is serialised
-    with ``json.dumps`` so that Ollama receives valid JSON — **not** the
-    Python repr that ``str()`` would produce.
-    """
-    if isinstance(data, str):
-        return data
-    return json.dumps(data, default=str)
-
-
-class ProcessRequest(BaseModel):
-    """Request body for the /process endpoint."""
-
-    data: dict | str
-    session_id: str = ""
-    target: str = ""
-
-
-class BaseAgentServer:
-    """Create a FastAPI app for a named agent.
-
-    The agent loads its system prompt from
-    ``blhackbox/prompts/agents/<agent_name>.md`` and exposes:
-      - GET  /health   — liveness check (also verifies Ollama reachability)
-      - POST /process  — send data to Ollama and return structured JSON
-    """
-
-    def __init__(self, agent_name: str) -> None:
-        self.agent_name = agent_name
-
-        prompt_file = _PROMPTS_DIR / f"{agent_name.lower()}.md"
-        if prompt_file.exists():
-            self.system_prompt = prompt_file.read_text(encoding="utf-8")
-        else:
-            logger.warning("Prompt file not found: %s — using fallback", prompt_file)
-            self.system_prompt = (
-                f"You are a {agent_name} data processing agent. "
-                "Respond only in valid JSON."
-            )
-
-        # Select model based on available RAM
-        self.model = _select_model(OLLAMA_MODEL)
-        if self.model != OLLAMA_MODEL:
-            logger.warning(
-                "Model override: %s -> %s (RAM constraint)",
-                OLLAMA_MODEL, self.model,
-            )
-
-        # Create FastAPI app with lifespan for model warmup
-        self.app = FastAPI(
-            title=f"blhackbox {agent_name} Agent",
-            lifespan=self._lifespan,
-        )
-
-        # Register routes
-        self._register_routes()
-
-    @asynccontextmanager
-    async def _lifespan(self, app: FastAPI):
-        """Warm up Ollama model on startup to avoid cold-start 502s."""
-        await self._warmup_model()
-        yield
-
-    async def _warmup_model(self) -> None:
-        """Send a tiny request to Ollama to trigger model loading.
-
-        This runs during FastAPI startup so the model is already in memory
-        by the time the first real /process request arrives.
-        """
-        logger.info("Warming up Ollama model %s at %s …", self.model, OLLAMA_HOST)
-        try:
-            client = AsyncClient(host=OLLAMA_HOST, timeout=OLLAMA_TIMEOUT)
-            await client.chat(
-                model=self.model,
-                messages=[{"role": "user", "content": "hello"}],
-                keep_alive=OLLAMA_KEEP_ALIVE,
-            )
-            logger.info("Model %s is warm and ready", self.model)
-        except ResponseError as exc:
-            # Check if it's an OOM error — try a smaller model
-            err_msg = str(exc).lower()
-            if "memory" in err_msg or "oom" in err_msg:
-                logger.warning(
-                    "Model %s OOM during warmup: %s — trying fallbacks",
-                    self.model, exc,
-                )
-                for fallback in OLLAMA_FALLBACK_MODELS:
-                    if fallback == self.model:
-                        continue
-                    try:
-                        await client.chat(
-                            model=fallback,
-                            messages=[{"role": "user", "content": "hello"}],
-                            keep_alive=OLLAMA_KEEP_ALIVE,
-                        )
-                        logger.info("Fallback model %s loaded successfully", fallback)
-                        self.model = fallback
-                        return
-                    except Exception:
-                        continue
-                logger.error("All model fallbacks failed during warmup")
-            else:
-                logger.warning("Model warmup failed (will retry on first request): %s", exc)
-        except Exception as exc:
-            logger.warning("Model warmup failed (will retry on first request): %s", exc)
-
-    def _register_routes(self) -> None:
-        app = self.app
-        agent_name = self.agent_name
-        system_prompt = self.system_prompt
-
-        # Store reference to self for model access in closures
-        agent_server = self
-
-        @app.get("/health")
-        async def health() -> dict:
-            """Liveness check — also verifies Ollama is reachable."""
-            result: dict[str, Any] = {
-                "status": "ok",
-                "agent": agent_name,
-                "model": agent_server.model,
-                "available_ram_gb": round(_get_available_ram_gb(), 1),
-            }
-            try:
-                client = AsyncClient(host=OLLAMA_HOST, timeout=10.0)
-                models = await client.list()
-                result["ollama"] = "reachable"
-                result["models_loaded"] = len(models.get("models", []))
-            except Exception:
-                result["ollama"] = "unreachable"
-            return result
-
-        @app.post("/process")
-        async def process(req: ProcessRequest) -> dict:
-            user_content = _serialize_data(req.data)
-
-            for attempt in range(1 + OLLAMA_RETRIES):
-                try:
-                    client = AsyncClient(
-                        host=OLLAMA_HOST, timeout=OLLAMA_TIMEOUT,
-                    )
-                    response = await client.chat(
-                        model=agent_server.model,
-                        messages=[
-                            {"role": "system", "content": system_prompt},
-                            {"role": "user", "content": user_content},
-                        ],
-                        format="json",
-                        options={"num_ctx": OLLAMA_NUM_CTX},
-                        keep_alive=OLLAMA_KEEP_ALIVE,
-                    )
-                    # Success — break out of retry loop
-                    break
-                except ResponseError as exc:
-                    err_msg = str(exc).lower()
-                    # Handle OOM by trying fallback models
-                    if "memory" in err_msg or "oom" in err_msg:
-                        logger.warning(
-                            "%s: Model %s OOM — trying fallback models",
-                            agent_name, agent_server.model,
-                        )
-                        fallback_success = False
-                        for fallback in OLLAMA_FALLBACK_MODELS:
-                            if fallback == agent_server.model:
-                                continue
-                            try:
-                                response = await client.chat(
-                                    model=fallback,
-                                    messages=[
-                                        {"role": "system", "content": system_prompt},
-                                        {"role": "user", "content": user_content},
-                                    ],
-                                    format="json",
-                                    options={"num_ctx": OLLAMA_NUM_CTX},
-                                    keep_alive=OLLAMA_KEEP_ALIVE,
-                                )
-                                agent_server.model = fallback
-                                logger.info(
-                                    "%s: Switched to fallback model %s",
-                                    agent_name, fallback,
-                                )
-                                fallback_success = True
-                                break
-                            except Exception:
-                                continue
-                        if fallback_success:
-                            break
-                        raise HTTPException(
-                            status_code=502,
-                            detail=(
-                                f"Ollama OOM: model {agent_server.model} requires more RAM "
-                                f"than available ({_get_available_ram_gb():.1f} GiB). "
-                                f"All fallback models also failed. "
-                                f"Set OLLAMA_MODEL to a smaller model or add more RAM."
-                            ),
-                        ) from exc
-
-                    logger.warning(
-                        "%s: Ollama ResponseError (attempt %d/%d): %s",
-                        agent_name, attempt + 1, 1 + OLLAMA_RETRIES, exc,
-                    )
-                    if attempt < OLLAMA_RETRIES:
-                        await asyncio.sleep(2 ** attempt)
-                        continue
-                    raise HTTPException(
-                        status_code=502,
-                        detail=f"Ollama error after {1 + OLLAMA_RETRIES} attempts: {exc}",
-                    ) from exc
-                except Exception as exc:
-                    logger.warning(
-                        "%s: Ollama request failed (attempt %d/%d): %s",
-                        agent_name, attempt + 1, 1 + OLLAMA_RETRIES, exc,
-                    )
-                    if attempt < OLLAMA_RETRIES:
-                        await asyncio.sleep(2 ** attempt)
-                        continue
-                    raise HTTPException(
-                        status_code=503,
-                        detail=(
-                            f"Ollama unreachable at {OLLAMA_HOST} after "
-                            f"{1 + OLLAMA_RETRIES} attempts: {exc}"
-                        ),
-                    ) from exc
-
-            content = response.message.content or ""
-            if not content:
-                logger.warning(
-                    "%s: Ollama returned empty content for model %s",
-                    agent_name, agent_server.model,
-                )
-                raise HTTPException(
-                    status_code=502,
-                    detail=(
-                        f"{agent_name} received empty response from Ollama "
-                        f"(model: {agent_server.model}). The model may have "
-                        f"failed to generate output for the given input size."
-                    ),
-                )
-
-            try:
-                return json.loads(content)
-            except json.JSONDecodeError:
-                # Try to extract JSON from preamble text
-                text = content.strip()
-                start = text.find("{")
-                end = text.rfind("}") + 1
-                if start >= 0 and end > start:
-                    try:
-                        return json.loads(text[start:end])
-                    except json.JSONDecodeError:
-                        pass
-                raise HTTPException(
-                    status_code=500,
-                    detail=f"Agent returned invalid JSON: {text[:200]}",
-                ) from None
-
-
-def run_agent(agent_name: str, port: int) -> None:
-    """Entry point to start an agent server."""
-    server = BaseAgentServer(agent_name)
-    logging.basicConfig(level=logging.INFO)
-    logger.info("Starting %s agent on port %d (model: %s)", agent_name, port, server.model)
-    uvicorn.run(server.app, host="0.0.0.0", port=port)
diff --git a/blhackbox/agents/ingestion_agent.py b/blhackbox/agents/ingestion_agent.py
deleted file mode 100644
index 36ba5f6..0000000
--- a/blhackbox/agents/ingestion_agent.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""Ingestion Agent — parses raw tool output into structured typed data.
-
-No filtering, no deduplication — just parse and structure everything.
-"""
-
-from __future__ import annotations
-
-from blhackbox.agents.base_agent import BaseAgent
-
-
-class IngestionAgent(BaseAgent):
-    """Parse all raw tool output into structured typed data objects.
-
-    Input: raw strings (nmap XML, nikto output, gobuster lists, etc.)
-    Output: structured dict — hosts, ports, services, endpoints,
-            CVEs, subdomains, etc.
-    """
diff --git a/blhackbox/agents/ingestion_server.py b/blhackbox/agents/ingestion_server.py
deleted file mode 100644
index 0b561e4..0000000
--- a/blhackbox/agents/ingestion_server.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""Ingestion Agent — FastAPI container server.
-
-Parses raw tool output into structured typed data.
-Runs as a standalone container on port 8001.
-"""
-
-from blhackbox.agents.base_agent_server import run_agent
-
-if __name__ == "__main__":
-    run_agent("ingestionagent", port=8001)
diff --git a/blhackbox/agents/processing_agent.py b/blhackbox/agents/processing_agent.py
deleted file mode 100644
index d61a095..0000000
--- a/blhackbox/agents/processing_agent.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""Processing Agent — deduplicates, compresses, and annotates ingested data.
-
-Takes structured data from the Ingestion Agent, removes duplicates,
-extracts errors/anomalies into an annotated error_log, and compresses
-redundant data for optimal context window usage.
-"""
-
-from __future__ import annotations
-
-from blhackbox.agents.base_agent import BaseAgent
-
-
-class ProcessingAgent(BaseAgent):
-    """Clean and compress the Ingestion Agent's structured output.
-
-    Input: Ingestion Agent's structured output dict.
-    Output: deduplicated + compressed data + annotated error_log
-            with security_relevance and security_note fields.
-    """
diff --git a/blhackbox/agents/processing_server.py b/blhackbox/agents/processing_server.py
deleted file mode 100644
index cc0ebf0..0000000
--- a/blhackbox/agents/processing_server.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""Processing Agent — FastAPI container server.
-
-Deduplicates, compresses, and annotates ingested data.
-Runs as a standalone container on port 8002.
-"""
-
-from blhackbox.agents.base_agent_server import run_agent
-
-if __name__ == "__main__":
-    run_agent("processingagent", port=8002)
diff --git a/blhackbox/agents/synthesis_agent.py b/blhackbox/agents/synthesis_agent.py
deleted file mode 100644
index dd9a111..0000000
--- a/blhackbox/agents/synthesis_agent.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""Synthesis Agent — merges all agent outputs into a single AggregatedPayload.
-
-Final stage of the preprocessing pipeline. Combines Ingestion and Processing
-agent outputs, resolves conflicts, and adds metadata.
-"""
-
-from __future__ import annotations
-
-from blhackbox.agents.base_agent import BaseAgent
-
-
-class SynthesisAgent(BaseAgent):
-    """Merge Ingestion + Processing outputs into one AggregatedPayload.
-
-    Input: dict containing ingestion_output and processing_output.
-    Output: AggregatedPayload-compatible dict with findings, error_log,
-            and metadata.
-    """
diff --git a/blhackbox/agents/synthesis_server.py b/blhackbox/agents/synthesis_server.py
deleted file mode 100644
index 6ea2d2d..0000000
--- a/blhackbox/agents/synthesis_server.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""Synthesis Agent — FastAPI container server.
-
-Merges Ingestion + Processing outputs into final AggregatedPayload.
-Runs as a standalone container on port 8003.
-"""
-
-from blhackbox.agents.base_agent_server import run_agent
-
-if __name__ == "__main__":
-    run_agent("synthesisagent", port=8003)
diff --git a/blhackbox/config.py b/blhackbox/config.py
index 6edd0c0..aa183b0 100644
--- a/blhackbox/config.py
+++ b/blhackbox/config.py
@@ -29,10 +29,6 @@ class Settings(BaseSettings):
     )
     neo4j_database: str = Field(default="neo4j", description="Neo4j database name")
 
-    # --- Ollama ---
-    ollama_url: str = Field(default="http://ollama:11434", description="Ollama API URL")
-    ollama_model: str = Field(default="llama3.1:8b", description="Ollama model name")
-
     # --- MCP Gateway ---
     mcp_gateway_port: int = Field(default=8080, description="MCP Gateway port")
 
diff --git a/blhackbox/core/knowledge_graph.py b/blhackbox/core/knowledge_graph.py
index 198a3ee..374d19a 100644
--- a/blhackbox/core/knowledge_graph.py
+++ b/blhackbox/core/knowledge_graph.py
@@ -246,7 +246,7 @@ async def merge_aggregated_session(
         tools_run: list[str] | str = "",
         agents_run: list[str] | str = "",
         compression_ratio: float = 0.0,
-        ollama_model: str = "",
+        model: str = "",
         duration_seconds: float = 0.0,
         warning: str = "",
     ) -> AggregatedSessionNode:
@@ -258,7 +258,7 @@ async def merge_aggregated_session(
             tools_run=tools_run,
             agents_run=agents_run,
             compression_ratio=compression_ratio,
-            ollama_model=ollama_model,
+            model=model,
             duration_seconds=duration_seconds,
             warning=warning,
         )
diff --git a/blhackbox/main.py b/blhackbox/main.py
index 5a297ec..7366f01 100644
--- a/blhackbox/main.py
+++ b/blhackbox/main.py
@@ -54,7 +54,6 @@ def version() -> None:
     """Show the Blhackbox version."""
     print_banner()
     rich_console.print(f"[info]Version:[/info] {blhackbox.__version__}")
-    rich_console.print(f"[info]Ollama URL:[/info] {settings.ollama_url}")
     rich_console.print(f"[info]Neo4j URI:[/info] {settings.neo4j_uri}")
 
 
diff --git a/blhackbox/models/aggregated_payload.py b/blhackbox/models/aggregated_payload.py
index b01589b..a4e609e 100644
--- a/blhackbox/models/aggregated_payload.py
+++ b/blhackbox/models/aggregated_payload.py
@@ -5,9 +5,6 @@
 parsing, deduplicating, and synthesizing them.  The MCP host calls
 ``aggregate_results`` to validate and persist this payload, then
 ``generate_report`` to produce the final pentest report.
-
-Legacy: previously assembled by a 3-agent Ollama pipeline (Ingestion →
-Processing → Synthesis).  That pipeline is now optional (``--profile ollama``).
 """
 
 from __future__ import annotations
@@ -317,14 +314,8 @@ class AggregatedMetadata(BaseModel):
             "output is larger than the raw input."
         ),
     )
-    # Which model performed the aggregation.  When the MCP host (Claude)
-    # does it directly, set to the host model name (e.g. "claude-opus-4-6").
-    # When the legacy Ollama pipeline is used, set to the Ollama model name.
+    # Which model performed the aggregation (e.g. "claude-opus-4-6").
     model: str = ""
-    ollama_model: str = Field(
-        default="",
-        description="Deprecated — use 'model' instead.  Kept for backward compatibility.",
-    )
     duration_seconds: float = 0.0
     stage_timing: PipelineStageTiming = Field(
         default_factory=PipelineStageTiming,
diff --git a/blhackbox/models/graph.py b/blhackbox/models/graph.py
index e3f43ff..169270f 100644
--- a/blhackbox/models/graph.py
+++ b/blhackbox/models/graph.py
@@ -146,7 +146,7 @@ def __init__(self, name: str, category: str = "", **kwargs: Any) -> None:
 
 
 class AggregatedSessionNode(GraphNode):
-    """Represents an aggregated pentest session processed by the Ollama pipeline."""
+    """Represents an aggregated pentest session."""
 
     label: str = "AggregatedSession"
     merge_key: str = "session_id"
@@ -159,7 +159,7 @@ def __init__(
         tools_run: list[str] | str = "",
         agents_run: list[str] | str = "",
         compression_ratio: float = 0.0,
-        ollama_model: str = "",
+        model: str = "",
         duration_seconds: float = 0.0,
         warning: str = "",
         **kwargs: Any,
@@ -186,7 +186,7 @@ def __init__(
                 "tools_run": tools_run_val,
                 "agents_run": agents_run_val,
                 "compression_ratio": compression_ratio,
-                "ollama_model": ollama_model,
+                "model": model,
                 "duration_seconds": duration_seconds,
                 "warning": warning,
             }
diff --git a/blhackbox/prompts/agents/__init__.py b/blhackbox/prompts/agents/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/blhackbox/prompts/agents/ingestionagent.md b/blhackbox/prompts/agents/ingestionagent.md
deleted file mode 100644
index 607c2eb..0000000
--- a/blhackbox/prompts/agents/ingestionagent.md
+++ /dev/null
@@ -1,254 +0,0 @@
-# Ingestion Agent — System Prompt
-
-You are a data ingestion agent for the blhackbox penetration testing framework.
-Your job is to receive raw output from security scanning and **exploitation** tools
-and parse it into structured typed data. You do NOT filter, deduplicate, or discard
-anything — you only parse and structure.
-
-**Exploitation data is critical.** When tool output contains extracted data (database
-rows, file contents, credentials, tokens, command output), you MUST preserve it
-in full in the `evidence` fields. This data IS the proof of impact.
-
-## Input
-
-You will receive raw text output from one or more security tools. The input may
-include any combination of:
-
-- nmap XML, greppable, or normal output (including NSE script results)
-- nikto scan results (including OSVDB references)
-- gobuster/dirb/feroxbuster directory enumeration output
-- masscan output
-- whatweb technology detection output
-- wafw00f WAF detection output
-- sqlmap injection test output (including injection points, dbms info)
-- wpscan WordPress scan results (plugins, themes, users, vulnerabilities)
-- subfinder/amass/fierce/dnsenum subdomain enumeration output
-- hydra/medusa brute force results
-- nuclei template scan results
-- Metasploit MCP JSON responses
-- WHOIS records
-- DNS records (dig, host, nslookup output)
-- Certificate transparency logs
-- SSL/TLS scan output (sslscan, sslyze, testssl.sh)
-- Any other security tool output
-
-## Output
-
-Respond with ONLY a valid JSON object. No preamble, no markdown fences, no
-explanation text. The JSON must match this schema exactly:
-
-```json
-{
-  "hosts": [
-    {
-      "ip": "192.168.1.1",
-      "hostname": "target.com",
-      "os": "Linux 4.15",
-      "ports": [
-        {
-          "port": 80,
-          "protocol": "tcp",
-          "state": "open",
-          "service": "http",
-          "version": "Apache/2.4.41",
-          "banner": "Apache/2.4.41 (Ubuntu)",
-          "nse_scripts": {"http-title": "Default Page", "http-server-header": "Apache/2.4.41"}
-        }
-      ]
-    }
-  ],
-  "ports": [
-    {"port": 443, "protocol": "tcp", "state": "open", "service": "https"}
-  ],
-  "services": [
-    {"name": "http", "version": "Apache/2.4.41", "host": "192.168.1.1", "port": 80, "cpe": "cpe:/a:apache:http_server:2.4.41"}
-  ],
-  "vulnerabilities": [
-    {
-      "id": "CVE-2021-12345",
-      "title": "Apache Path Traversal",
-      "severity": "high",
-      "cvss": 7.5,
-      "host": "192.168.1.1",
-      "port": 80,
-      "description": "Path traversal allowing file read outside webroot",
-      "references": ["https://nvd.nist.gov/vuln/detail/CVE-2021-12345"],
-      "evidence": "GET /..%2f..%2fetc/passwd returned 200 with body: root:x:0:0:root:/root:/bin/bash ...",
-      "poc_steps": ["1. Send GET request to /..%2f..%2fetc/passwd", "2. Observe HTTP 200 response with /etc/passwd contents"],
-      "poc_payload": "curl -k 'https://192.168.1.1/..%2f..%2fetc/passwd'",
-      "tool_source": "nikto"
-    }
-  ],
-  "endpoints": [
-    {"url": "/admin", "method": "GET", "status_code": 200, "content_length": 1234, "redirect": ""}
-  ],
-  "subdomains": ["mail.example.com", "dev.example.com"],
-  "technologies": [
-    {"name": "Apache", "version": "2.4.41", "category": "web-server"}
-  ],
-  "ssl_certs": [
-    {
-      "host": "example.com",
-      "port": 443,
-      "issuer": "Let's Encrypt",
-      "subject": "example.com",
-      "san": ["example.com", "www.example.com"],
-      "not_before": "2024-01-01",
-      "not_after": "2025-01-01",
-      "protocol": "TLSv1.3",
-      "cipher": "TLS_AES_256_GCM_SHA384",
-      "issues": ["weak-cipher", "expired", "self-signed"]
-    }
-  ],
-  "credentials": [
-    {
-      "host": "192.168.1.1",
-      "port": 22,
-      "service": "ssh",
-      "username": "admin",
-      "password": "admin",
-      "tool_source": "hydra"
-    }
-  ],
-  "http_headers": [
-    {
-      "host": "example.com",
-      "port": 443,
-      "missing_security_headers": ["Content-Security-Policy", "X-Frame-Options", "Strict-Transport-Security"],
-      "server": "Apache/2.4.41",
-      "x_powered_by": "PHP/7.4"
-    }
-  ],
-  "whois": {
-    "domain": "example.com",
-    "registrar": "GoDaddy",
-    "creation_date": "2020-01-01",
-    "expiration_date": "2025-01-01",
-    "nameservers": ["ns1.example.com"],
-    "registrant_org": ""
-  },
-  "dns_records": [
-    {"type": "A", "name": "example.com", "value": "93.184.216.34"},
-    {"type": "MX", "name": "example.com", "value": "mail.example.com", "priority": 10},
-    {"type": "TXT", "name": "example.com", "value": "v=spf1 include:_spf.google.com ~all"}
-  ]
-}
-```
-
-## Tool-Specific Parsing Guidance
-
-### nmap
-- Extract OS detection results into `hosts[].os`
-- Parse NSE script output into `hosts[].ports[].nse_scripts` as key-value pairs
-- Extract CPE strings from service detection into `services[].cpe`
-- "filtered" ports are significant — include them with `state: "filtered"`
-- Extract traceroute hops if present
-
-### nikto
-- Each OSVDB reference is a vulnerability — map OSVDB-XXXX to the id field
-- Extract the HTTP method and URL from each finding
-- Note outdated server versions as vulnerabilities (severity: "info" or "low")
-- Extract missing security headers and map to `http_headers[].missing_security_headers`
-- **PoC**: Use the nikto finding URL + method as `poc_payload`, the full nikto output
-  line as `evidence`
-
-### sqlmap
-- Extract confirmed injection points as critical vulnerabilities
-- Include the injection type (blind, error-based, time-based, UNION)
-- Include the DBMS type and version if detected
-- Each confirmed injection point = severity "critical"
-- **PoC**: Extract the sqlmap command as `poc_payload`, the injection point URL + parameter
-  as step 1 of `poc_steps`, the DBMS confirmation as `evidence`
-
-### wpscan
-- Map plugin/theme vulnerabilities to `vulnerabilities[]` with CVE IDs
-- Include outdated plugins/themes as low-severity vulnerabilities
-- Map enumerated users to `credentials[]` with empty password
-
-### hydra/medusa
-- Each successful login goes in `credentials[]`
-- Include the service type (ssh, ftp, http-form, etc.)
-- **PoC**: The hydra/medusa command as `poc_payload`, "Successful login: user:pass" as `evidence`
-
-### SSL/TLS scans
-- Map to `ssl_certs[]`
-- Flag: expired certs, self-signed certs, weak ciphers (RC4, DES, 3DES),
-  weak protocols (SSLv2, SSLv3, TLSv1.0, TLSv1.1), short key lengths (<2048)
-
-### Exploitation Tool Output (sqlmap dumps, metasploit sessions, LFI reads, etc.)
-- **Database dumps**: Include extracted table names, column names, and sample rows
-  (max 5 rows) in the `evidence` field. Include the full sqlmap command as `poc_payload`.
-- **Command execution output** (RCE/command injection): Include the full command
-  output (`id`, `whoami`, `uname -a`, file reads) in `evidence`.
-- **LFI/traversal file reads**: Include the file contents obtained in `evidence`.
-- **SSRF responses**: Include the internal service response body in `evidence`.
-- **Metasploit session output**: Include session commands and their output in `evidence`,
-  the exploit module and options as `poc_payload`.
-- **Authentication bypass**: Include the response body of the protected resource in `evidence`.
-- **IDOR results**: Include both users' response data in `evidence`.
-- **Never truncate extracted data** in evidence fields — this is the proof of impact.
-
-## Rules
-
-1. Parse ALL data from the input — nothing is discarded at this stage.
-2. If a field is unknown, use an empty string "" or 0 as appropriate.
-3. Preserve raw evidence where possible (e.g. banner strings, version strings, HTTP responses).
-4. Map CVE/OSVDB/CWE identifiers whenever they appear in any tool output.
-5. If the input contains multiple tools' output, merge them into the same structure.
-6. Record which tool produced each finding in `tool_source` where applicable.
-7. Treat informational findings as severity "info" — do not skip them.
-8. Arrays that have no data should be `[]`, objects with no data should be `{}`.
-9. Output ONLY valid JSON — no markdown fences, no commentary.
-10. **Extract PoC data for every vulnerability:**
-    - `evidence`: Raw tool output or HTTP response proving the finding (never empty for confirmed vulns).
-    - `poc_steps`: Ordered list of steps to reproduce. Extract from tool output where possible
-      (e.g., sqlmap shows injection steps, nikto shows the request path).
-    - `poc_payload`: The exact command, payload, or HTTP request used. Extract from tool
-      invocation or output (e.g., the sqlmap command line, the nikto finding URL).
-    - If PoC data is not available from the tool output, set `poc_steps: []` and `poc_payload: ""`
-      but ALWAYS populate `evidence` with the raw tool output that detected the finding.
-
-## Example
-
-**Input:**
-```
-=== nmap ===
-Nmap scan report for target.com (10.0.0.1)
-OS: Linux 5.4
-PORT   STATE    SERVICE VERSION
-22/tcp open     ssh     OpenSSH 8.4
-80/tcp open     http    nginx/1.18.0
-| http-title: Login Page
-| http-security-headers:
-|   Missing: X-Frame-Options, Content-Security-Policy
-443/tcp open    ssl/http nginx/1.18.0
-| ssl-cert: Subject: commonName=target.com
-|   Not valid after: 2024-06-01
-8080/tcp filtered http-proxy
-
-=== subfinder ===
-mail.target.com
-dev.target.com
-staging.target.com
-
-=== wafw00f ===
-The site https://target.com is behind Cloudflare (Cloudflare Inc.)
-```
-
-**Output:**
-```json
-{
-  "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 22, "protocol": "tcp", "state": "open", "service": "ssh", "version": "OpenSSH 8.4", "banner": "", "nse_scripts": {}}, {"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}, {"port": 443, "protocol": "tcp", "state": "open", "service": "ssl/http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {}}, {"port": 8080, "protocol": "tcp", "state": "filtered", "service": "http-proxy", "version": "", "banner": "", "nse_scripts": {}}]}],
-  "ports": [{"port": 22, "protocol": "tcp", "state": "open", "service": "ssh"}, {"port": 80, "protocol": "tcp", "state": "open", "service": "http"}, {"port": 443, "protocol": "tcp", "state": "open", "service": "ssl/http"}, {"port": 8080, "protocol": "tcp", "state": "filtered", "service": "http-proxy"}],
-  "services": [{"name": "ssh", "version": "OpenSSH 8.4", "host": "10.0.0.1", "port": 22, "cpe": ""}, {"name": "http", "version": "nginx/1.18.0", "host": "10.0.0.1", "port": 80, "cpe": ""}, {"name": "ssl/http", "version": "nginx/1.18.0", "host": "10.0.0.1", "port": 443, "cpe": ""}],
-  "vulnerabilities": [],
-  "endpoints": [],
-  "subdomains": ["mail.target.com", "dev.target.com", "staging.target.com"],
-  "technologies": [{"name": "OpenSSH", "version": "8.4", "category": "remote-access"}, {"name": "nginx", "version": "1.18.0", "category": "web-server"}, {"name": "Cloudflare", "version": "", "category": "cdn/waf"}],
-  "ssl_certs": [{"host": "target.com", "port": 443, "issuer": "", "subject": "target.com", "san": [], "not_before": "", "not_after": "2024-06-01", "protocol": "", "cipher": "", "issues": []}],
-  "credentials": [],
-  "http_headers": [{"host": "target.com", "port": 80, "missing_security_headers": ["X-Frame-Options", "Content-Security-Policy"], "server": "nginx/1.18.0", "x_powered_by": ""}],
-  "whois": {},
-  "dns_records": []
-}
-```
diff --git a/blhackbox/prompts/agents/processingagent.md b/blhackbox/prompts/agents/processingagent.md
deleted file mode 100644
index 118f2e5..0000000
--- a/blhackbox/prompts/agents/processingagent.md
+++ /dev/null
@@ -1,219 +0,0 @@
-# Processing Agent — System Prompt
-
-You are a data processing agent for the blhackbox penetration testing framework.
-Your job is to take structured data from the Ingestion Agent and clean it:
-deduplicate repeated findings, extract errors/timeouts/anomalies into a separate
-annotated error_log, correlate findings across tools, assess exploitability, and
-compress redundant data so the final payload is as small and dense as possible
-for the MCP host's context window.
-
-**Critical: NEVER discard or compress exploitation evidence.** Extracted data
-(database rows, file contents, credentials, command output, tokens) in `evidence`
-fields is the proof of real-world impact. It must pass through processing intact.
-
-## Input
-
-You will receive a JSON object containing structured data from the Ingestion Agent
-with fields: hosts, ports, services, vulnerabilities, endpoints, subdomains,
-technologies, ssl_certs, credentials, http_headers, whois, dns_records.
-
-## Output
-
-Respond with ONLY a valid JSON object. No preamble, no markdown fences, no
-explanation text. The JSON must match this schema:
-
-```json
-{
-  "findings": {
-    "hosts": [],
-    "ports": [],
-    "services": [],
-    "vulnerabilities": [],
-    "endpoints": [],
-    "subdomains": [],
-    "technologies": [],
-    "ssl_certs": [],
-    "credentials": [],
-    "http_headers": [],
-    "whois": {},
-    "dns_records": []
-  },
-  "error_log": [
-    {
-      "type": "timeout|auth_failure|dns_failure|rate_limit|scan_error|connection_refused|waf_block|other",
-      "count": 1,
-      "locations": ["nmap:port-443"],
-      "likely_cause": "WAF blocking SYN probes",
-      "security_relevance": "none|low|medium|high",
-      "security_note": "Systematic timeouts may indicate active WAF"
-    }
-  ],
-  "attack_surface": {
-    "external_services": 0,
-    "web_applications": 0,
-    "login_panels": 0,
-    "api_endpoints": 0,
-    "outdated_software": 0,
-    "default_credentials": 0,
-    "missing_security_headers": 0,
-    "ssl_issues": 0,
-    "high_value_targets": ["admin panel at /admin", "phpMyAdmin at /phpmyadmin"]
-  }
-}
-```
-
-## Rules
-
-### 1. Deduplication
-- Remove exact duplicate entries across all finding categories.
-- When two entries refer to the same entity (same host+port, same CVE, same endpoint),
-  merge them — keep the version with more detail and more evidence.
-- Merge port lists when the same host appears multiple times.
-
-### 2. Compression
-- Collapse redundant data. If 50 endpoints all return 404, summarize as one entry
-  with a note rather than listing all 50.
-- Merge similar low-severity findings into grouped entries.
-- Keep ALL critical and high severity findings individually — never compress those.
-
-### 3. Cross-Tool Correlation
-- If multiple tools report the same vulnerability (e.g., nikto + nuclei both find
-  CVE-2021-3449), merge into one entry and note both tools in evidence.
-- If nmap shows a service version and nikto reports a vulnerability for that version,
-  increase confidence in the vulnerability.
-- Correlate technology detection (whatweb) with vulnerability reports — if a CVE
-  applies to a detected technology version, flag it.
-- **When merging duplicate findings, preserve the best PoC data:** keep the entry
-  with the most complete `poc_steps`, `poc_payload`, and `evidence`. Merge evidence
-  from both tools (e.g., "Detected by: nikto, nuclei. nikto output: ... nuclei output: ...").
-
-### 4. Severity Assessment
-Reassess severity using these pentesting-specific rules:
-- **critical**: Remote code execution, SQL injection (confirmed), authentication bypass,
-  default/weak credentials on admin interfaces, exposed sensitive data (API keys, passwords)
-- **high**: File inclusion (LFI/RFI), SSRF, XXE, stored XSS, privilege escalation paths,
-  exposed admin panels with login bypass potential, SSL certs expired or self-signed on production
-- **medium**: Reflected XSS, CSRF, directory listing, verbose error messages exposing
-  stack traces, missing security headers on authenticated pages, outdated software with
-  known but unexploitable CVEs, information disclosure
-- **low**: Missing non-critical security headers, server version disclosure, DNS zone
-  transfer (if no sensitive records), clickjacking on non-sensitive pages
-- **info**: Technology fingerprint, open ports without vulnerabilities, subdomain discovery,
-  DNS records, WHOIS data
-
-### 5. False Positive Detection
-Flag potential false positives:
-- Vulnerabilities reported by only one tool without evidence of successful exploitation
-- Generic "outdated software" findings without specific CVE applicability
-- WAF-blocked scan results that may have triggered false detections
-- Findings contradicted by other tool results (e.g., service reported as vulnerable
-  but version doesn't match CVE affected range)
-Add `"likely_false_positive": true` to suspicious vulnerability entries.
-
-### 6. Error Log
-Extract errors, timeouts, connection failures, DNS failures, rate-limit indicators,
-WAF blocks, and anomalies into the `error_log` array. NEVER delete them.
-
-**Security relevance classification for errors:**
-- **high**: Systematic blocking on all ports (suggests active IDS/IPS), authentication
-  failures suggesting account lockout, DNS poisoning indicators
-- **medium**: WAF detection, rate limiting on specific endpoints, filtered ports
-  suggesting firewall rules, certificate validation failures
-- **low**: Sporadic timeouts, individual connection resets, DNS lookup delays
-- **none**: Transient network errors, tool configuration warnings
-
-### 7. Attack Surface Summary
-Populate `attack_surface` by counting:
-- `external_services`: Open ports accessible from the network
-- `web_applications`: Distinct web apps found (by unique base URLs)
-- `login_panels`: Endpoints with login/authentication forms
-- `api_endpoints`: Endpoints that appear to be API routes (/api/, /v1/, /graphql, etc.)
-- `outdated_software`: Services with versions behind current stable release
-- `default_credentials`: Credentials found by brute force tools
-- `missing_security_headers`: Hosts missing critical security headers
-- `ssl_issues`: SSL/TLS problems (expired, weak cipher, old protocol)
-- `high_value_targets`: List of the most interesting targets for further exploitation
-
-### 8. PoC & Exploitation Data Preservation
-**Never discard PoC data or extracted exploitation evidence.** Every vulnerability
-entry must retain its `evidence`, `poc_steps`, and `poc_payload` fields through
-processing. A finding without PoC evidence is not a valid finding.
-
-- When deduplicating, keep the PoC with the most detail and the most extracted data.
-- **Never truncate or compress `evidence` fields that contain extracted data** —
-  database rows, file contents, credentials, command output, token values. This data
-  is the proof of real-world impact and must reach the report intact.
-- When compressing low-severity findings, still preserve at least the `evidence` field.
-- If a finding has empty `poc_steps` and `poc_payload`, it must be flagged with
-  `"likely_false_positive": true` unless the `evidence` field alone is sufficient
-  to confirm the vulnerability.
-- **Credential entries in `credentials[]` must never be compressed or removed** —
-  every discovered credential is critical for demonstrating lateral movement potential.
-
-### 9. Data Preservation
-Never discard data with security value. If an error or anomaly could indicate a
-security control (WAF, IDS, rate limiter, geo-block), keep it in error_log.
-
-### 10. Output
-Output ONLY valid JSON — no markdown fences, no commentary.
-
-## Example
-
-**Input:**
-```json
-{
-  "hosts": [
-    {"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18", "banner": "", "nse_scripts": {"http-title": "Login Page"}}, {"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {}}]},
-    {"ip": "10.0.0.1", "hostname": "target.com", "os": "", "ports": [{"port": 443, "protocol": "tcp", "state": "filtered", "service": "", "version": "", "banner": "", "nse_scripts": {}}]}
-  ],
-  "vulnerabilities": [
-    {"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "NULL pointer dereference in signature_algorithms processing", "references": [], "evidence": "", "tool_source": "nikto"},
-    {"id": "CVE-2021-3449", "title": "OpenSSL Denial of Service", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "DoS via crafted renegotiation", "references": ["https://nvd.nist.gov/vuln/detail/CVE-2021-3449"], "evidence": "", "tool_source": "nuclei"}
-  ],
-  "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}, {"url": "/api/v1/users", "method": "GET", "status_code": 401, "content_length": 45, "redirect": ""}],
-  "subdomains": ["mail.target.com", "mail.target.com", "dev.target.com", "staging.target.com"],
-  "http_headers": [{"host": "target.com", "port": 80, "missing_security_headers": ["X-Frame-Options", "Content-Security-Policy", "Strict-Transport-Security"], "server": "nginx/1.18.0", "x_powered_by": ""}],
-  "services": [], "ports": [], "technologies": [], "ssl_certs": [], "credentials": [], "whois": {}, "dns_records": []
-}
-```
-
-**Output:**
-```json
-{
-  "findings": {
-    "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}, {"port": 443, "protocol": "tcp", "state": "filtered", "service": "", "version": "", "banner": "", "nse_scripts": {}}]}],
-    "ports": [],
-    "services": [],
-    "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "NULL pointer dereference in signature_algorithms processing. Confirmed by multiple tools.", "references": ["https://nvd.nist.gov/vuln/detail/CVE-2021-3449"], "evidence": "Detected by: nikto, nuclei. nikto: + OpenSSL/1.1.1j appears vulnerable to CVE-2021-3449. nuclei: [CVE-2021-3449] [high] https://10.0.0.1:443", "poc_steps": ["1. Run nikto against target on port 443", "2. Run nuclei with CVE-2021-3449 template against target", "3. Both tools confirm the vulnerability"], "poc_payload": "nuclei -u https://10.0.0.1 -t CVE-2021-3449.yaml", "tool_source": "nikto,nuclei", "likely_false_positive": false}],
-    "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}, {"url": "/api/v1/users", "method": "GET", "status_code": 401, "content_length": 45, "redirect": ""}],
-    "subdomains": ["mail.target.com", "dev.target.com", "staging.target.com"],
-    "technologies": [],
-    "ssl_certs": [],
-    "credentials": [],
-    "http_headers": [{"host": "target.com", "port": 80, "missing_security_headers": ["X-Frame-Options", "Content-Security-Policy", "Strict-Transport-Security"], "server": "nginx/1.18.0", "x_powered_by": ""}],
-    "whois": {},
-    "dns_records": []
-  },
-  "error_log": [
-    {
-      "type": "waf_block",
-      "count": 1,
-      "locations": ["nmap:10.0.0.1:443"],
-      "likely_cause": "Port 443 filtered — firewall or WAF dropping packets",
-      "security_relevance": "medium",
-      "security_note": "Filtered port suggests active packet filtering. HTTPS service may be behind WAF or host-based firewall. Consider testing from different source IPs."
-    }
-  ],
-  "attack_surface": {
-    "external_services": 2,
-    "web_applications": 1,
-    "login_panels": 1,
-    "api_endpoints": 1,
-    "outdated_software": 0,
-    "default_credentials": 0,
-    "missing_security_headers": 3,
-    "ssl_issues": 0,
-    "high_value_targets": ["Admin panel at /admin (HTTP 200, no auth)", "API endpoint at /api/v1/users (returns 401, potential IDOR target)"]
-  }
-}
-```
diff --git a/blhackbox/prompts/agents/synthesisagent.md b/blhackbox/prompts/agents/synthesisagent.md
deleted file mode 100644
index a3b078c..0000000
--- a/blhackbox/prompts/agents/synthesisagent.md
+++ /dev/null
@@ -1,277 +0,0 @@
-# Synthesis Agent — System Prompt
-
-You are a data synthesis agent for the blhackbox penetration testing framework.
-Your job is to merge the outputs from the Ingestion Agent and the Processing Agent
-into one final AggregatedPayload JSON object. You resolve conflicts, add metadata,
-generate an executive summary, identify attack chains, and provide remediation
-recommendations.
-
-**Critical: Preserve all exploitation evidence and extracted data.** The final
-payload must contain the full proof of impact — database rows, file contents,
-credentials, command output, tokens. This data drives the report's credibility.
-
-## Input
-
-You will receive a JSON object with two keys:
-
-```json
-{
-  "ingestion_output": { ... },
-  "processing_output": { ... }
-}
-```
-
-- `ingestion_output`: Raw structured data from the Ingestion Agent (hosts, ports,
-  services, vulnerabilities, endpoints, subdomains, technologies, ssl_certs,
-  credentials, http_headers, whois, dns_records).
-- `processing_output`: Cleaned, deduplicated data with findings, error_log, and
-  attack_surface from the Processing Agent.
-
-## Output
-
-Respond with ONLY a valid JSON object matching the AggregatedPayload schema.
-No preamble, no markdown fences, no explanation text.
-
-```json
-{
-  "findings": {
-    "hosts": [],
-    "ports": [],
-    "services": [],
-    "vulnerabilities": [],
-    "endpoints": [],
-    "subdomains": [],
-    "technologies": [],
-    "ssl_certs": [],
-    "credentials": [],
-    "http_headers": [],
-    "whois": {},
-    "dns_records": []
-  },
-  "error_log": [
-    {
-      "type": "timeout|auth_failure|dns_failure|rate_limit|scan_error|connection_refused|waf_block|other",
-      "count": 0,
-      "locations": [],
-      "likely_cause": "",
-      "security_relevance": "none|low|medium|high",
-      "security_note": ""
-    }
-  ],
-  "attack_surface": {
-    "external_services": 0,
-    "web_applications": 0,
-    "login_panels": 0,
-    "api_endpoints": 0,
-    "outdated_software": 0,
-    "default_credentials": 0,
-    "missing_security_headers": 0,
-    "ssl_issues": 0,
-    "high_value_targets": []
-  },
-  "executive_summary": {
-    "risk_level": "critical|high|medium|low|info",
-    "headline": "One-line summary of the most significant finding",
-    "summary": "2-3 paragraph executive summary of all findings",
-    "total_vulnerabilities": {"critical": 0, "high": 0, "medium": 0, "low": 0, "info": 0},
-    "top_findings": [
-      {
-        "title": "SQL Injection in /api/login",
-        "severity": "critical",
-        "impact": "Full database access, potential RCE via INTO OUTFILE",
-        "exploitability": "easy|moderate|difficult",
-        "remediation": "Use parameterized queries"
-      }
-    ],
-    "attack_chains": [
-      {
-        "name": "Unauthenticated RCE via chained vulnerabilities",
-        "steps": ["1. Subdomain dev.target.com found via subfinder", "2. Admin panel exposed without auth at /admin", "3. File upload in admin allows .php upload", "4. Webshell uploaded → RCE"],
-        "overall_severity": "critical"
-      }
-    ]
-  },
-  "remediation": [
-    {
-      "priority": 1,
-      "finding_id": "CVE-2021-12345",
-      "title": "Upgrade Apache to 2.4.51+",
-      "description": "The current Apache version (2.4.41) is vulnerable to path traversal. Upgrade to 2.4.51 or later.",
-      "effort": "low|medium|high",
-      "category": "patch|config|architecture|process"
-    }
-  ],
-  "metadata": {
-    "tools_run": [],
-    "total_raw_size_bytes": 0,
-    "compressed_size_bytes": 0,
-    "compression_ratio": 0.0,
-    "ollama_model": "",
-    "duration_seconds": 0.0,
-    "warning": ""
-  }
-}
-```
-
-## Rules
-
-### 1. Data Merging
-- **Prefer Processing Agent data** for findings — it is deduplicated and cleaned.
-- Use Ingestion Agent data only to fill gaps the Processing Agent missed.
-- If a finding appears in Ingestion but not Processing, include it (it may have
-  been accidentally dropped during processing).
-
-### 2. Conflict Resolution
-- If the same vulnerability appears with different severity levels, use the higher severity.
-- If the same host appears with different port lists, merge the port lists (union).
-- If tool_source differs, combine them ("nikto,nuclei").
-- For version strings, prefer the more specific version (e.g., "1.18.0" over "1.18").
-- **When merging vulnerabilities, keep the most complete PoC data** — prefer the entry
-  with non-empty `poc_steps`, `poc_payload`, and `evidence`. If both have PoC data,
-  merge the evidence from both tools.
-
-### 3. Error Log Merging
-- Take error_log from Processing Agent output.
-- If Ingestion Agent data contains errors that weren't captured by Processing, add them.
-- Do not duplicate error_log entries.
-
-### 4. Attack Surface
-- Take attack_surface from Processing Agent if available.
-- If not available, compute it from the merged findings.
-
-### 5. Executive Summary Generation
-- `risk_level`: Set to the highest severity found across all vulnerabilities.
-  If credentials were found, set to at least "high". If RCE is possible, set "critical".
-- `headline`: One sentence describing the most impactful finding **with demonstrated impact**
-  (e.g., "SQL injection exploited — 500 user records extracted from production database"
-  not just "SQL injection found").
-- `summary`: 2-3 paragraphs covering:
-  - What was tested (target, scope, tools used)
-  - Key findings by severity
-  - **Real-world impact achieved** — what data was extracted, what systems were
-    compromised, what credentials were obtained, what lateral movement was possible
-  - Overall security posture assessment
-- `total_vulnerabilities`: Count findings by severity level.
-- `top_findings`: List the 5 most impactful findings, sorted by severity then exploitability.
-  Each must include: title, severity, impact statement, exploitability rating, remediation.
-- `attack_chains`: Identify chains of findings that could be combined for greater impact.
-  Examples:
-  - Information disclosure + default credentials = unauthorized access
-  - Subdomain discovery + exposed admin panel + weak auth = admin takeover
-  - Open port + outdated service + known CVE with public exploit = RCE
-  - SSRF + internal service access + credential theft = lateral movement
-
-### 6. Remediation Recommendations
-Generate prioritized remediation steps:
-- **Priority 1**: Critical and high severity findings with easy exploitability
-- **Priority 2**: Medium severity findings or high severity with difficult exploitability
-- **Priority 3**: Low severity findings and hardening recommendations
-- Group related remediations (e.g., "upgrade all packages" instead of one per CVE)
-- `effort`: low (config change), medium (code change), high (architecture change)
-- `category`:
-  - `patch`: Software update needed
-  - `config`: Configuration change (firewall rules, headers, TLS settings)
-  - `architecture`: Design-level change (network segmentation, auth system overhaul)
-  - `process`: Operational change (credential rotation, monitoring, incident response)
-
-### 7. PoC & Exploitation Evidence Validation
-- **Every vulnerability with severity > "info" MUST have PoC data with exploitation evidence.**
-- Check that `evidence` is non-empty for all confirmed vulnerabilities.
-- Check that `poc_steps` has at least one step for critical and high findings.
-- **Check that `evidence` contains actual extracted data** for exploited findings —
-  database rows, file contents, command output, credentials, tokens. A finding that
-  says "SQLi confirmed" without showing extracted data is incomplete.
-- **Never discard or truncate extracted data in evidence fields** — this is the
-  proof of real-world impact.
-- If a vulnerability has severity ≥ "low" but empty `evidence`, `poc_steps`, and
-  `poc_payload`, downgrade it to "info" and add a note in the description:
-  "Downgraded: exploitation could not be confirmed — no PoC evidence available."
-- A finding without a PoC is not a valid finding.
-
-### 8. Completeness
-- Every field in the schema MUST be present.
-- Missing arrays → `[]`. Missing strings → `""`. Missing numbers → `0`.
-- Metadata: populate what you can from the input. Set fields you cannot determine
-  to their zero values.
-
-### 9. Output
-Output ONLY valid JSON — no markdown fences, no commentary.
-
-## Example
-
-**Input:**
-```json
-{
-  "ingestion_output": {
-    "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}]}],
-    "subdomains": ["mail.target.com", "dev.target.com"],
-    "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "OpenSSL denial of service", "references": [], "evidence": "", "tool_source": "nikto"}],
-    "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}],
-    "http_headers": [{"host": "target.com", "port": 80, "missing_security_headers": ["X-Frame-Options", "Content-Security-Policy", "Strict-Transport-Security"], "server": "nginx/1.18.0", "x_powered_by": ""}],
-    "ports": [], "services": [], "technologies": [], "ssl_certs": [], "credentials": [], "whois": {}, "dns_records": []
-  },
-  "processing_output": {
-    "findings": {
-      "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}]}],
-      "subdomains": ["mail.target.com", "dev.target.com"],
-      "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "OpenSSL denial of service. Confirmed by multiple tools.", "references": [], "evidence": "Detected by: nikto, nuclei. nikto: OpenSSL/1.1.1j vulnerable. nuclei: [CVE-2021-3449] [high] confirmed", "poc_steps": ["1. Run nikto against target on port 443", "2. Run nuclei with CVE-2021-3449 template", "3. Both tools confirm vulnerability in OpenSSL 1.1.1j"], "poc_payload": "nuclei -u https://10.0.0.1 -t CVE-2021-3449.yaml", "tool_source": "nikto,nuclei"}],
-      "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}],
-      "http_headers": [{"host": "target.com", "port": 80, "missing_security_headers": ["X-Frame-Options", "Content-Security-Policy", "Strict-Transport-Security"], "server": "nginx/1.18.0", "x_powered_by": ""}],
-      "ports": [], "services": [], "technologies": [], "ssl_certs": [], "credentials": [], "whois": {}, "dns_records": []
-    },
-    "error_log": [{"type": "waf_block", "count": 1, "locations": ["nmap:10.0.0.1:443"], "likely_cause": "Port 443 filtered", "security_relevance": "medium", "security_note": "Active packet filtering detected"}],
-    "attack_surface": {"external_services": 1, "web_applications": 1, "login_panels": 1, "api_endpoints": 0, "outdated_software": 0, "default_credentials": 0, "missing_security_headers": 3, "ssl_issues": 0, "high_value_targets": ["Admin panel at /admin"]}
-  }
-}
-```
-
-**Output:**
-```json
-{
-  "findings": {
-    "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}]}],
-    "ports": [],
-    "services": [],
-    "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "OpenSSL denial of service. Confirmed by multiple tools.", "references": [], "evidence": "Detected by: nikto, nuclei. nikto: OpenSSL/1.1.1j vulnerable. nuclei: [CVE-2021-3449] [high] confirmed", "poc_steps": ["1. Run nikto against target on port 443", "2. Run nuclei with CVE-2021-3449 template", "3. Both tools confirm vulnerability in OpenSSL 1.1.1j"], "poc_payload": "nuclei -u https://10.0.0.1 -t CVE-2021-3449.yaml", "tool_source": "nikto,nuclei"}],
-    "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}],
-    "subdomains": ["mail.target.com", "dev.target.com"],
-    "technologies": [],
-    "ssl_certs": [],
-    "credentials": [],
-    "http_headers": [{"host": "target.com", "port": 80, "missing_security_headers": ["X-Frame-Options", "Content-Security-Policy", "Strict-Transport-Security"], "server": "nginx/1.18.0", "x_powered_by": ""}],
-    "whois": {},
-    "dns_records": []
-  },
-  "error_log": [{"type": "waf_block", "count": 1, "locations": ["nmap:10.0.0.1:443"], "likely_cause": "Port 443 filtered", "security_relevance": "medium", "security_note": "Active packet filtering detected"}],
-  "attack_surface": {"external_services": 1, "web_applications": 1, "login_panels": 1, "api_endpoints": 0, "outdated_software": 0, "default_credentials": 0, "missing_security_headers": 3, "ssl_issues": 0, "high_value_targets": ["Admin panel at /admin"]},
-  "executive_summary": {
-    "risk_level": "high",
-    "headline": "High-severity OpenSSL vulnerability (CVE-2021-3449) and exposed admin panel with missing security headers",
-    "summary": "Security assessment of target.com (10.0.0.1) identified 1 high-severity vulnerability and multiple configuration issues. The OpenSSL DoS vulnerability (CVE-2021-3449, CVSS 7.5) was confirmed by two independent tools (nikto and nuclei), indicating high confidence.\n\nAn admin panel was discovered at /admin returning HTTP 200 without apparent authentication. The web server is missing critical security headers (X-Frame-Options, Content-Security-Policy, Strict-Transport-Security), increasing exposure to client-side attacks.\n\nPort 443 appears filtered, suggesting WAF or firewall protection. Two subdomains (mail, dev) were discovered and should be assessed separately.",
-    "total_vulnerabilities": {"critical": 0, "high": 1, "medium": 0, "low": 0, "info": 0},
-    "top_findings": [
-      {"title": "CVE-2021-3449 — OpenSSL Denial of Service", "severity": "high", "impact": "Remote denial of service via crafted TLS renegotiation", "exploitability": "moderate", "remediation": "Upgrade OpenSSL to 1.1.1k or later"},
-      {"title": "Exposed admin panel at /admin", "severity": "medium", "impact": "Potential unauthorized administrative access", "exploitability": "easy", "remediation": "Restrict access via IP allowlist or VPN, add authentication"},
-      {"title": "Missing security headers", "severity": "low", "impact": "Increased exposure to clickjacking, XSS, and MITM attacks", "exploitability": "moderate", "remediation": "Add X-Frame-Options, CSP, and HSTS headers"}
-    ],
-    "attack_chains": [
-      {"name": "Admin panel compromise via missing protections", "steps": ["1. Admin panel at /admin accessible without authentication", "2. No X-Frame-Options header enables clickjacking", "3. No HSTS enables potential MITM on login credentials"], "overall_severity": "high"}
-    ]
-  },
-  "remediation": [
-    {"priority": 1, "finding_id": "CVE-2021-3449", "title": "Upgrade OpenSSL to 1.1.1k+", "description": "Current OpenSSL is vulnerable to DoS. Upgrade to patched version.", "effort": "low", "category": "patch"},
-    {"priority": 1, "finding_id": "", "title": "Restrict admin panel access", "description": "Admin panel at /admin is publicly accessible. Add authentication and IP allowlisting.", "effort": "medium", "category": "config"},
-    {"priority": 2, "finding_id": "", "title": "Add security headers", "description": "Configure X-Frame-Options: DENY, Content-Security-Policy, and Strict-Transport-Security headers.", "effort": "low", "category": "config"},
-    {"priority": 3, "finding_id": "", "title": "Assess discovered subdomains", "description": "Run full scans on mail.target.com and dev.target.com — dev environments often have weaker security.", "effort": "medium", "category": "process"}
-  ],
-  "metadata": {
-    "tools_run": [],
-    "total_raw_size_bytes": 0,
-    "compressed_size_bytes": 0,
-    "compression_ratio": 0.0,
-    "ollama_model": "",
-    "duration_seconds": 0.0,
-    "warning": ""
-  }
-}
-```
diff --git a/blhackbox/reporting/html_generator.py b/blhackbox/reporting/html_generator.py
index 75108bd..77615c5 100644
--- a/blhackbox/reporting/html_generator.py
+++ b/blhackbox/reporting/html_generator.py
@@ -510,8 +510,8 @@ def _truncate_text(value: str, max_len: int = 3000) -> str:
                 <div class="value">{{ "%.2f"|format(payload.metadata.compression_ratio) }}</div>
             </div>
             <div class="stat-card">
-                <div class="label">Ollama Model</div>
-                <div class="value">{{ payload.metadata.ollama_model }}</div>
+                <div class="label">Model</div>
+                <div class="value">{{ payload.metadata.model }}</div>
             </div>
             <div class="stat-card">
                 <div class="label">Duration</div>
@@ -538,7 +538,7 @@ def generate_html_report_from_payload(
     """Generate an HTML report from an AggregatedPayload.
 
     This is the v2.0 report generation path, consuming structured output
-    from the Ollama preprocessing pipeline rather than raw scan results.
+    from the aggregation pipeline rather than raw scan results.
 
     Args:
         payload: Aggregated pentest data from the aggregator MCP server.
diff --git a/blhackbox/reporting/md_generator.py b/blhackbox/reporting/md_generator.py
index a2b9182..1c3fa7d 100644
--- a/blhackbox/reporting/md_generator.py
+++ b/blhackbox/reporting/md_generator.py
@@ -253,7 +253,7 @@ def generate_md_report_from_payload(
     lines.append("|--------|-------|")
     lines.append(f"| Total Raw Size (bytes) | {payload.metadata.total_raw_size_bytes} |")
     lines.append(f"| Compression Ratio | {payload.metadata.compression_ratio:.2f} |")
-    lines.append(f"| Ollama Model | {payload.metadata.ollama_model} |")
+    lines.append(f"| Model | {payload.metadata.model} |")
     lines.append(f"| Duration | {payload.metadata.duration_seconds:.1f}s |")
     lines.append("")
 
diff --git a/blhackbox/reporting/pdf_generator.py b/blhackbox/reporting/pdf_generator.py
index 30e3e84..aea50ed 100644
--- a/blhackbox/reporting/pdf_generator.py
+++ b/blhackbox/reporting/pdf_generator.py
@@ -66,7 +66,7 @@ def generate_pdf_report_from_payload(
     """Generate a PDF report from an AggregatedPayload.
 
     This is the v2.0 report generation path, consuming structured output
-    from the Ollama preprocessing pipeline.
+    from the aggregation pipeline.
 
     Args:
         payload: Aggregated pentest data from the aggregator MCP server.
diff --git a/docker-compose.yml b/docker-compose.yml
index b93fc6e..5199946 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,7 +3,6 @@
 # Usage:
 #   docker compose pull                          Pull all pre-built images
 #   docker compose up -d                         Start core stack (4 containers)
-#   docker compose --profile ollama up -d        Start with Ollama pipeline (9 containers, legacy)
 #   docker compose --profile gateway up -d       Start with MCP Gateway
 #   docker compose --profile claude-code up -d   Start with Claude Code container
 #   docker compose --profile neo4j up -d         Start with Neo4j
@@ -14,15 +13,10 @@
 #   wire-mcp         (FastMCP SSE, port 9003) — Wireshark/tshark (7 tools)
 #   screenshot-mcp   (FastMCP SSE, port 9004) — Headless Chromium screenshots (4 tools)
 #
-# Optional (--profile ollama):
-#   ollama-mcp       (FastMCP SSE, port 9000) — Ollama preprocessing pipeline (legacy)
-#   agent-ingestion, agent-processing, agent-synthesis, ollama
-#
-# The MCP host (Claude Code, Claude Desktop, ChatGPT) now handles data
+# The MCP host (Claude Code, Claude Desktop, ChatGPT) handles data
 # aggregation directly — it parses raw tool outputs, deduplicates, and
 # structures them into an AggregatedPayload, then validates via the
-# aggregate_results tool.  The Ollama pipeline is kept as an optional
-# fallback for local-only / offline processing.
+# aggregate_results tool.
 #
 # Claude Code (Docker) connects directly to MCP servers via SSE.
 # Claude Desktop / ChatGPT connect via the MCP Gateway (--profile gateway).
@@ -38,7 +32,6 @@ networks:
 volumes:
   neo4j_data:
   neo4j_logs:
-  ollama_models:
   portainer_data:
   wordlists:
 
@@ -191,188 +184,6 @@ services:
     networks:
       - blhackbox_net
 
-  # -- OLLAMA MCP SERVER (OPTIONAL — LEGACY) ----------------------------------
-  # blhackbox custom component. NOT an official Ollama product.
-  # Thin MCP orchestrator -- calls the 3 agent containers in sequence
-  # via HTTP, assembles the AggregatedPayload, and returns it to Claude.
-  #
-  # OPTIONAL since v2.1: The MCP host (Claude) now handles aggregation
-  # directly via the aggregate_results tool, which is faster and more
-  # accurate.  Enable this pipeline with: docker compose --profile ollama up -d
-  ollama-mcp:
-    image: crhacky/blhackbox:ollama-mcp
-    build:
-      context: .
-      dockerfile: docker/ollama-mcp.Dockerfile
-    container_name: blhackbox-ollama-mcp
-    profiles: ["ollama"]
-    restart: unless-stopped
-    init: true
-    healthcheck:
-      test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:9000/sse')\""]
-      interval: 15s
-      timeout: 10s
-      retries: 5
-      start_period: 10s
-    environment:
-      OLLAMA_HOST: "http://ollama:11434"
-      OLLAMA_MODEL: "${OLLAMA_MODEL:-llama3.1:8b}"
-      AGENT_INGESTION_URL: "http://agent-ingestion:8001"
-      AGENT_PROCESSING_URL: "http://agent-processing:8002"
-      AGENT_SYNTHESIS_URL: "http://agent-synthesis:8003"
-      AGENT_TIMEOUT: "${AGENT_TIMEOUT:-1200}"
-      AGENT_RETRIES: "${AGENT_RETRIES:-2}"
-      NEO4J_URI: "${NEO4J_URI:-bolt://neo4j:7687}"
-      NEO4J_USER: "${NEO4J_USER:-neo4j}"
-      NEO4J_PASSWORD: "${NEO4J_PASSWORD:-}"
-    depends_on:
-      agent-ingestion:
-        condition: service_healthy
-      agent-processing:
-        condition: service_healthy
-      agent-synthesis:
-        condition: service_healthy
-    networks:
-      - blhackbox_net
-
-  # -- AGENT 1: INGESTION (OPTIONAL — LEGACY) --------------------------------
-  # Parses and structures raw Kali tool output.
-  # Calls Ollama /api/chat with ingestion system prompt.
-  # Enable with: docker compose --profile ollama up -d
-  agent-ingestion:
-    image: crhacky/blhackbox:agent-ingestion
-    build:
-      context: .
-      dockerfile: docker/agent-ingestion.Dockerfile
-    container_name: blhackbox-agent-ingestion
-    profiles: ["ollama"]
-    restart: unless-stopped
-    environment:
-      OLLAMA_HOST: "http://ollama:11434"
-      OLLAMA_MODEL: "${OLLAMA_MODEL:-llama3.1:8b}"
-      OLLAMA_TIMEOUT: "${OLLAMA_TIMEOUT:-300}"
-      OLLAMA_NUM_CTX: "${OLLAMA_NUM_CTX:-8192}"
-      OLLAMA_KEEP_ALIVE: "${OLLAMA_KEEP_ALIVE:-30m}"
-      OLLAMA_RETRIES: "${OLLAMA_RETRIES:-2}"
-    depends_on:
-      ollama:
-        condition: service_healthy
-    healthcheck:
-      test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:8001/health')\""]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-    networks:
-      - blhackbox_net
-
-  # -- AGENT 2: PROCESSING (OPTIONAL — LEGACY) --------------------------------
-  # Deduplicates, extracts errors, compresses data into efficient blobs.
-  # Annotates error_log entries with security_relevance.
-  # Enable with: docker compose --profile ollama up -d
-  agent-processing:
-    image: crhacky/blhackbox:agent-processing
-    build:
-      context: .
-      dockerfile: docker/agent-processing.Dockerfile
-    container_name: blhackbox-agent-processing
-    profiles: ["ollama"]
-    restart: unless-stopped
-    environment:
-      OLLAMA_HOST: "http://ollama:11434"
-      OLLAMA_MODEL: "${OLLAMA_MODEL:-llama3.1:8b}"
-      OLLAMA_TIMEOUT: "${OLLAMA_TIMEOUT:-300}"
-      OLLAMA_NUM_CTX: "${OLLAMA_NUM_CTX:-8192}"
-      OLLAMA_KEEP_ALIVE: "${OLLAMA_KEEP_ALIVE:-30m}"
-      OLLAMA_RETRIES: "${OLLAMA_RETRIES:-2}"
-    depends_on:
-      ollama:
-        condition: service_healthy
-    healthcheck:
-      test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:8002/health')\""]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-    networks:
-      - blhackbox_net
-
-  # -- AGENT 3: SYNTHESIS (OPTIONAL — LEGACY) --------------------------------
-  # Merges Agent 1 + Agent 2 output into final AggregatedPayload.
-  # Adds metadata, resolves conflicts, sends back to Claude.
-  # Enable with: docker compose --profile ollama up -d
-  agent-synthesis:
-    image: crhacky/blhackbox:agent-synthesis
-    build:
-      context: .
-      dockerfile: docker/agent-synthesis.Dockerfile
-    container_name: blhackbox-agent-synthesis
-    profiles: ["ollama"]
-    restart: unless-stopped
-    environment:
-      OLLAMA_HOST: "http://ollama:11434"
-      OLLAMA_MODEL: "${OLLAMA_MODEL:-llama3.1:8b}"
-      OLLAMA_TIMEOUT: "${OLLAMA_TIMEOUT:-300}"
-      OLLAMA_NUM_CTX: "${OLLAMA_NUM_CTX:-8192}"
-      OLLAMA_KEEP_ALIVE: "${OLLAMA_KEEP_ALIVE:-30m}"
-      OLLAMA_RETRIES: "${OLLAMA_RETRIES:-2}"
-    depends_on:
-      ollama:
-        condition: service_healthy
-    healthcheck:
-      test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:8003/health')\""]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-    networks:
-      - blhackbox_net
-
-  # -- OLLAMA (OPTIONAL — LEGACY) -------------------------------------------
-  # Custom entrypoint that auto-pulls and warms up the configured model on
-  # startup, eliminating cold-start delays (~17 min → seconds on subsequent
-  # requests).  All 3 agent containers call this via /api/chat independently.
-  # Enable with: docker compose --profile ollama up -d
-  ollama:
-    image: crhacky/blhackbox:ollama
-    build:
-      context: .
-      dockerfile: docker/ollama.Dockerfile
-    container_name: blhackbox-ollama
-    profiles: ["ollama"]
-    restart: unless-stopped
-    environment:
-      OLLAMA_MODEL: "${OLLAMA_MODEL:-llama3.1:8b}"
-      # Keep models loaded in memory between sequential agent calls.
-      # The 3-stage pipeline (ingestion → processing → synthesis) runs
-      # sequentially, and reloading the model between calls adds minutes
-      # of latency on CPU-only systems.  30m ensures the model stays
-      # resident for the entire pipeline.
-      OLLAMA_KEEP_ALIVE: "${OLLAMA_KEEP_ALIVE:-30m}"
-      # Allow parallel inference requests from multiple agent containers.
-      # While the current pipeline is sequential, this enables future
-      # parallelism and prevents request queueing during health checks.
-      OLLAMA_NUM_PARALLEL: "${OLLAMA_NUM_PARALLEL:-3}"
-    volumes:
-      - ollama_models:/root/.ollama
-    healthcheck:
-      # Verify the server is up AND the model is actually loaded
-      test: ["CMD-SHELL", "ollama list | grep -q \"${OLLAMA_MODEL:-llama3.1:8b}\" || ollama list"]
-      interval: 15s
-      timeout: 10s
-      retries: 20
-      start_period: 120s
-    networks:
-      - blhackbox_net
-    # GPU support -- disabled by default for broad compatibility.
-    # If you have an NVIDIA GPU, uncomment the 'deploy' block below
-    # to enable GPU acceleration for Ollama. This can reduce pipeline
-    # processing time from ~17 minutes (CPU) to under 2 minutes (GPU).
-    # deploy:
-    #   resources:
-    #     reservations:
-    #       devices:
-    #         - driver: nvidia
-    #           count: all
-    #           capabilities: [gpu]
-
   # -- NEO4J (OPTIONAL) ------------------------------------------------------
   # Enable with: docker compose --profile neo4j up -d
   # Provides cross-session persistence and relationship querying.
@@ -421,8 +232,8 @@ services:
     environment:
       ANTHROPIC_API_KEY: "${ANTHROPIC_API_KEY}"
       # Bypass egress proxies (e.g. GitHub Codespaces) for internal Docker traffic
-      no_proxy: "mcp-gateway,kali-mcp,wire-mcp,screenshot-mcp,ollama-mcp,ollama,agent-ingestion,agent-processing,agent-synthesis,localhost,127.0.0.1"
-      NO_PROXY: "mcp-gateway,kali-mcp,wire-mcp,screenshot-mcp,ollama-mcp,ollama,agent-ingestion,agent-processing,agent-synthesis,localhost,127.0.0.1"
+      no_proxy: "mcp-gateway,kali-mcp,wire-mcp,screenshot-mcp,localhost,127.0.0.1"
+      NO_PROXY: "mcp-gateway,kali-mcp,wire-mcp,screenshot-mcp,localhost,127.0.0.1"
     dns:
       - 8.8.8.8
       - 1.1.1.1
diff --git a/docker/agent-ingestion.Dockerfile b/docker/agent-ingestion.Dockerfile
deleted file mode 100644
index 3de83d5..0000000
--- a/docker/agent-ingestion.Dockerfile
+++ /dev/null
@@ -1,12 +0,0 @@
-# blhackbox Agent 1: Ingestion
-# Parses raw tool output into structured typed data.
-# Calls Ollama via the official ollama Python package.
-
-FROM python:3.13-slim
-WORKDIR /app
-COPY blhackbox/agents/ /app/blhackbox/agents/
-COPY blhackbox/prompts/agents/ /app/blhackbox/prompts/agents/
-COPY blhackbox/__init__.py /app/blhackbox/__init__.py
-RUN pip install --no-cache-dir fastapi uvicorn ollama pydantic
-EXPOSE 8001
-CMD ["python3", "-m", "blhackbox.agents.ingestion_server"]
diff --git a/docker/agent-processing.Dockerfile b/docker/agent-processing.Dockerfile
deleted file mode 100644
index c45ac18..0000000
--- a/docker/agent-processing.Dockerfile
+++ /dev/null
@@ -1,12 +0,0 @@
-# blhackbox Agent 2: Processing
-# Deduplicates, compresses, and annotates ingested data.
-# Calls Ollama via the official ollama Python package.
-
-FROM python:3.13-slim
-WORKDIR /app
-COPY blhackbox/agents/ /app/blhackbox/agents/
-COPY blhackbox/prompts/agents/ /app/blhackbox/prompts/agents/
-COPY blhackbox/__init__.py /app/blhackbox/__init__.py
-RUN pip install --no-cache-dir fastapi uvicorn ollama pydantic
-EXPOSE 8002
-CMD ["python3", "-m", "blhackbox.agents.processing_server"]
diff --git a/docker/agent-synthesis.Dockerfile b/docker/agent-synthesis.Dockerfile
deleted file mode 100644
index 75b1e4f..0000000
--- a/docker/agent-synthesis.Dockerfile
+++ /dev/null
@@ -1,12 +0,0 @@
-# blhackbox Agent 3: Synthesis
-# Merges Ingestion + Processing outputs into final AggregatedPayload.
-# Calls Ollama via the official ollama Python package.
-
-FROM python:3.13-slim
-WORKDIR /app
-COPY blhackbox/agents/ /app/blhackbox/agents/
-COPY blhackbox/prompts/agents/ /app/blhackbox/prompts/agents/
-COPY blhackbox/__init__.py /app/blhackbox/__init__.py
-RUN pip install --no-cache-dir fastapi uvicorn ollama pydantic
-EXPOSE 8003
-CMD ["python3", "-m", "blhackbox.agents.synthesis_server"]
diff --git a/docker/claude-code-entrypoint.sh b/docker/claude-code-entrypoint.sh
index 01e275e..25a4a9d 100755
--- a/docker/claude-code-entrypoint.sh
+++ b/docker/claude-code-entrypoint.sh
@@ -22,7 +22,7 @@ MAX_RETRIES=20
 RETRY_INTERVAL=3
 
 # Ensure internal Docker hostnames bypass any egress proxy.
-export no_proxy="${no_proxy:+${no_proxy},}mcp-gateway,kali-mcp,wire-mcp,screenshot-mcp,ollama-mcp,ollama,agent-ingestion,agent-processing,agent-synthesis,localhost,127.0.0.1"
+export no_proxy="${no_proxy:+${no_proxy},}mcp-gateway,kali-mcp,wire-mcp,screenshot-mcp,localhost,127.0.0.1"
 export NO_PROXY="$no_proxy"
 
 # ── Functions ───────────────────────────────────────────────────────
@@ -104,15 +104,6 @@ else
     MCP_FAIL=$((MCP_FAIL + 1))
 fi
 
-# Ollama Pipeline is optional — check but don't count as failure
-OLLAMA_STATUS="OFF"
-if check_service "Ollama Pipeline" "http://ollama-mcp:9000/sse"; then
-    printf "  %-22s [ ${CHECK} ]\n" "Ollama Pipeline"
-    OLLAMA_STATUS="ON"
-else
-    printf "  %-22s [ ${WARN} ]  (optional — not running)\n" "Ollama Pipeline"
-fi
-
 # Summary
 echo ""
 echo -e "${DIM}──────────────────────────────────────────────────${NC}"
@@ -134,9 +125,6 @@ echo -e "  ${BOLD}MCP servers (connected via SSE):${NC}"
 echo -e "    kali            ${DIM}Kali Linux security tools + Metasploit (70+ tools)${NC}"
 echo -e "    wireshark       ${DIM}WireMCP — tshark packet capture & analysis${NC}"
 echo -e "    screenshot      ${DIM}Screenshot MCP — headless Chromium evidence capture${NC}"
-if [ "$OLLAMA_STATUS" = "ON" ]; then
-echo -e "    ollama-pipeline ${DIM}Ollama preprocessing (3-agent pipeline, optional)${NC}"
-fi
 echo ""
 echo -e "  ${BOLD}Data aggregation:${NC}"
 echo -e "    ${DIM}You (Claude) handle parsing, deduplication, and synthesis directly.${NC}"
diff --git a/docker/claude-code.Dockerfile b/docker/claude-code.Dockerfile
index c5b4042..29e1754 100644
--- a/docker/claude-code.Dockerfile
+++ b/docker/claude-code.Dockerfile
@@ -36,10 +36,6 @@ RUN echo '{ \
     "screenshot": { \
       "type": "sse", \
       "url": "http://screenshot-mcp:9004/sse" \
-    }, \
-    "ollama-pipeline": { \
-      "type": "sse", \
-      "url": "http://ollama-mcp:9000/sse" \
     } \
   } \
 }' > .mcp.json
diff --git a/docker/ollama-entrypoint.sh b/docker/ollama-entrypoint.sh
deleted file mode 100644
index aea5930..0000000
--- a/docker/ollama-entrypoint.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-# Ollama entrypoint: starts the server, pulls the model, and sends a warmup
-# request so the model is already loaded in memory before any agent calls.
-#
-# Without this, the first agent request triggers a cold-start model download
-# + load, which can take 10-20 minutes.
-
-set -e
-
-MODEL="${OLLAMA_MODEL:-llama3.1:8b}"
-
-# Start the Ollama server in the background
-echo "[*] Starting Ollama server..."
-ollama serve &
-OLLAMA_PID=$!
-
-# Wait for the server to become responsive
-echo "[*] Waiting for Ollama server to be ready..."
-MAX_WAIT=60
-WAITED=0
-while [ "$WAITED" -lt "$MAX_WAIT" ]; do
-    if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
-        echo "[+] Ollama server is ready (took ~${WAITED}s)"
-        break
-    fi
-    sleep 2
-    WAITED=$((WAITED + 2))
-done
-
-if [ "$WAITED" -ge "$MAX_WAIT" ]; then
-    echo "[!] Ollama server did not respond within ${MAX_WAIT}s"
-fi
-
-# Pull the model if not already present.
-# This is a no-op if the model is already cached in the volume.
-echo "[*] Ensuring model '${MODEL}' is available..."
-ollama pull "$MODEL" 2>&1 || echo "[!] Failed to pull model ${MODEL} — may already be present"
-
-# Warmup: send a tiny request to load the model into memory.
-# The keep_alive ensures it stays loaded for subsequent agent requests.
-echo "[*] Warming up model '${MODEL}'..."
-curl -s http://localhost:11434/api/chat -d "{
-  \"model\": \"${MODEL}\",
-  \"messages\": [{\"role\": \"user\", \"content\": \"hi\"}],
-  \"stream\": false,
-  \"keep_alive\": \"60m\"
-}" > /dev/null 2>&1 && echo "[+] Model '${MODEL}' is warm and loaded" \
-    || echo "[!] Warmup request failed — model will load on first agent call"
-
-# Bring the Ollama server to foreground
-echo "[+] Ollama ready. Model '${MODEL}' is pre-loaded."
-wait $OLLAMA_PID
diff --git a/docker/ollama-mcp.Dockerfile b/docker/ollama-mcp.Dockerfile
deleted file mode 100644
index 254658b..0000000
--- a/docker/ollama-mcp.Dockerfile
+++ /dev/null
@@ -1,14 +0,0 @@
-# blhackbox Ollama MCP Server
-# Custom blhackbox component — NOT an official Ollama product.
-# Thin MCP orchestrator: calls 3 agent containers via HTTP, assembles
-# AggregatedPayload. Uses FastMCP for tool schema generation.
-# Transport: FastMCP SSE on port 9000.
-
-FROM python:3.13-slim
-WORKDIR /app
-COPY blhackbox/ /app/blhackbox/
-COPY mcp_servers/ /app/mcp_servers/
-COPY requirements.txt pyproject.toml ./
-RUN pip install --no-cache-dir "mcp>=1.23.0" httpx pydantic
-EXPOSE 9000
-CMD ["python3", "mcp_servers/ollama_mcp_server.py"]
diff --git a/docker/ollama.Dockerfile b/docker/ollama.Dockerfile
deleted file mode 100644
index 4237f14..0000000
--- a/docker/ollama.Dockerfile
+++ /dev/null
@@ -1,10 +0,0 @@
-# Ollama with model pre-loading for blhackbox.
-# Wraps the official Ollama image with an entrypoint that pulls and warms up
-# the configured model on startup, eliminating cold-start delays.
-
-FROM ollama/ollama:latest
-
-COPY docker/ollama-entrypoint.sh /ollama-entrypoint.sh
-RUN chmod +x /ollama-entrypoint.sh
-
-ENTRYPOINT ["/ollama-entrypoint.sh"]
diff --git a/mcp_servers/ollama_mcp_server.py b/mcp_servers/ollama_mcp_server.py
deleted file mode 100644
index a5a52ae..0000000
--- a/mcp_servers/ollama_mcp_server.py
+++ /dev/null
@@ -1,492 +0,0 @@
-"""
-blhackbox Ollama MCP Server
-============================
-Custom MCP server built for the blhackbox project.
-NOT an official Ollama product.
-
-This is a thin MCP orchestrator that receives data from Claude, calls each
-of the 3 agent containers (Ingestion, Processing, Synthesis) via HTTP in
-sequence, assembles the final AggregatedPayload, and returns it to Claude.
-
-It does NOT call Ollama directly — each agent container handles its own
-Ollama calls independently via the official ``ollama`` Python package.
-
-Uses FastMCP for automatic tool schema generation and protocol handling.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import os
-import sys
-import time
-from typing import Any
-
-import httpx
-from mcp.server.fastmcp import FastMCP
-
-# Ensure the blhackbox package is importable when run as a standalone script
-_project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-if _project_root not in sys.path:
-    sys.path.insert(0, _project_root)
-
-from blhackbox.models.aggregated_payload import (  # noqa: E402
-    AggregatedMetadata,
-    AggregatedPayload,
-    AttackSurface,
-    ErrorLogEntry,
-    ExecutiveSummary,
-    Findings,
-    PipelineStageTiming,
-    RemediationEntry,
-)
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("blhackbox.ollama_mcp")
-
-# ---------------------------------------------------------------------------
-# Configuration from environment
-# ---------------------------------------------------------------------------
-
-OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1:8b")
-
-# Agent container URLs — each agent runs as a separate FastAPI container
-AGENT_INGESTION_URL = os.environ.get(
-    "AGENT_INGESTION_URL", "http://agent-ingestion:8001"
-)
-AGENT_PROCESSING_URL = os.environ.get(
-    "AGENT_PROCESSING_URL", "http://agent-processing:8002"
-)
-AGENT_SYNTHESIS_URL = os.environ.get(
-    "AGENT_SYNTHESIS_URL", "http://agent-synthesis:8003"
-)
-
-# HTTP timeout for agent calls — must exceed the agent's own Ollama timeout
-# (default 300s) multiplied by max attempts (1 + OLLAMA_RETRIES=2 = 3).
-# With OLLAMA_TIMEOUT=300s and 3 attempts, agents can take up to ~900s
-# internally.  Default 1200s provides margin for backoff and overhead.
-AGENT_TIMEOUT = float(os.environ.get("AGENT_TIMEOUT", "1200"))
-
-# Number of retries for transient agent failures (502, 503, connection errors).
-AGENT_RETRIES = int(os.environ.get("AGENT_RETRIES", "2"))
-
-# ---------------------------------------------------------------------------
-# FastMCP Server
-# ---------------------------------------------------------------------------
-
-MCP_PORT = int(os.environ.get("MCP_PORT", "9000"))
-
-mcp = FastMCP("blhackbox-ollama-mcp", host="0.0.0.0", port=MCP_PORT)
-
-
-# ---------------------------------------------------------------------------
-# Core processing logic — calls 3 agent containers sequentially via HTTP
-# ---------------------------------------------------------------------------
-
-
-async def _call_agent(
-    client: httpx.AsyncClient,
-    url: str,
-    data: dict | str,
-    session_id: str,
-    target: str,
-    agent_name: str,
-    warnings: list[str],
-) -> dict[str, Any]:
-    """Call an agent container's POST /process endpoint with retry logic.
-
-    Retries on connection errors and 5xx HTTP status codes using exponential
-    backoff.  Non-retryable errors (4xx, JSON decode failures) fail
-    immediately.
-    """
-    payload = {"data": data, "session_id": session_id, "target": target}
-    last_error: str = ""
-
-    for attempt in range(1 + AGENT_RETRIES):
-        try:
-            response = await client.post(f"{url}/process", json=payload)
-            response.raise_for_status()
-            return response.json()
-        except httpx.ConnectError as exc:
-            last_error = f"{agent_name} unreachable at {url}: {exc}"
-            logger.warning(
-                "%s (attempt %d/%d)", last_error, attempt + 1, 1 + AGENT_RETRIES,
-            )
-        except httpx.HTTPStatusError as exc:
-            status = exc.response.status_code
-            # Extract detail from the JSON error body when available
-            detail = ""
-            try:
-                detail = exc.response.json().get("detail", "")
-            except Exception:
-                detail = exc.response.text[:200]
-            last_error = (
-                f"{agent_name} returned HTTP {status}: {detail}"
-            )
-            logger.warning(
-                "%s (attempt %d/%d)", last_error, attempt + 1, 1 + AGENT_RETRIES,
-            )
-            # Only retry on server errors (5xx), not client errors (4xx)
-            if status < 500:
-                break
-        except Exception as exc:
-            err_detail = str(exc).strip() if str(exc).strip() else type(exc).__name__
-            last_error = f"{agent_name} failed: {err_detail}"
-            logger.warning(
-                "%s (attempt %d/%d)", last_error, attempt + 1, 1 + AGENT_RETRIES,
-            )
-
-        # Exponential backoff before next retry
-        if attempt < AGENT_RETRIES:
-            backoff = 2 ** attempt
-            logger.info("Retrying %s in %ds …", agent_name, backoff)
-            await asyncio.sleep(backoff)
-
-    # All retries exhausted
-    logger.error("%s: all %d attempts failed — %s", agent_name, 1 + AGENT_RETRIES, last_error)
-    warnings.append(last_error)
-    return {}
-
-
-@mcp.tool()
-async def process_scan_results(
-    raw_outputs: dict[str, str],
-    target: str,
-    session_id: str,
-) -> str:
-    """Process raw pentest tool output through three sequential agent containers.
-
-    Calls Ingestion -> Processing -> Synthesis agent containers via HTTP and
-    returns a structured AggregatedPayload. Each agent container calls Ollama
-    independently. THIS IS NOT AN OLLAMA PRODUCT — it is a custom blhackbox
-    component that uses Ollama as its LLM backend.
-
-    Args:
-        raw_outputs: Dict mapping tool names to their raw output strings.
-            E.g. {"nmap": "...", "nikto": "...", "nuclei": "..."}
-        target: The target domain, IP, or URL being assessed.
-        session_id: Unique session identifier for this assessment.
-
-    Returns:
-        JSON string of the AggregatedPayload containing findings, error_log,
-        and metadata from the preprocessing pipeline.
-    """
-    start_time = time.monotonic()
-    warnings: list[str] = []
-
-    # Calculate raw size
-    raw_combined = ""
-    for tool_name, output in raw_outputs.items():
-        raw_combined += f"=== {tool_name} ===\n{output}\n\n"
-    total_raw_size = len(raw_combined.encode("utf-8"))
-
-    async with httpx.AsyncClient(timeout=AGENT_TIMEOUT) as client:
-        # ── Agent 1: Ingestion ────────────────────────────────────────
-        logger.info("Calling IngestionAgent at %s …", AGENT_INGESTION_URL)
-        t1 = time.monotonic()
-        ingestion_output = await _call_agent(
-            client, AGENT_INGESTION_URL, raw_combined,
-            session_id, target, "IngestionAgent", warnings,
-        )
-        t1_elapsed = time.monotonic() - t1
-        logger.info("[TIMING] IngestionAgent: %.1fs", t1_elapsed)
-        if not ingestion_output:
-            warnings.append(
-                f"IngestionAgent returned empty output after {t1_elapsed:.0f}s"
-            )
-
-        # ── Agent 2: Processing ───────────────────────────────────────
-        logger.info("Calling ProcessingAgent at %s …", AGENT_PROCESSING_URL)
-        t2 = time.monotonic()
-        processing_output = await _call_agent(
-            client, AGENT_PROCESSING_URL, ingestion_output,
-            session_id, target, "ProcessingAgent", warnings,
-        )
-        t2_elapsed = time.monotonic() - t2
-        logger.info("[TIMING] ProcessingAgent: %.1fs", t2_elapsed)
-        if not processing_output:
-            warnings.append(
-                f"ProcessingAgent returned empty output after {t2_elapsed:.0f}s"
-            )
-
-        # ── Agent 3: Synthesis ────────────────────────────────────────
-        # Keys match what the synthesis agent prompt expects:
-        #   "ingestion_output" and "processing_output"
-        synthesis_input = {
-            "ingestion_output": ingestion_output,
-            "processing_output": processing_output,
-        }
-        logger.info("Calling SynthesisAgent at %s …", AGENT_SYNTHESIS_URL)
-        t3 = time.monotonic()
-        synthesis_output = await _call_agent(
-            client, AGENT_SYNTHESIS_URL, synthesis_input,
-            session_id, target, "SynthesisAgent", warnings,
-        )
-        t3_elapsed = time.monotonic() - t3
-        logger.info("[TIMING] SynthesisAgent: %.1fs", t3_elapsed)
-        if not synthesis_output:
-            warnings.append(
-                f"SynthesisAgent returned empty output after {t3_elapsed:.0f}s "
-                f"(model: {OLLAMA_MODEL})"
-            )
-
-    logger.info(
-        "[TIMING] Pipeline total: %.1fs (ingestion=%.1fs, processing=%.1fs, synthesis=%.1fs)",
-        t1_elapsed + t2_elapsed + t3_elapsed, t1_elapsed, t2_elapsed, t3_elapsed,
-    )
-
-    duration = time.monotonic() - start_time
-
-    # ── Assemble AggregatedPayload ────────────────────────────────────
-    findings = _build_findings(
-        synthesis_output, processing_output, ingestion_output, warnings,
-        target=target,
-    )
-    error_log = _build_error_log(synthesis_output, processing_output)
-
-    # Calculate structured output size
-    payload_json_preview = json.dumps(findings.model_dump(), default=str)
-    structured_size = len(payload_json_preview.encode("utf-8"))
-    expansion_ratio = (
-        structured_size / total_raw_size if total_raw_size > 0 else 0.0
-    )
-
-    attack_surface = _build_attack_surface(synthesis_output, processing_output)
-    executive_summary = _build_executive_summary(synthesis_output)
-    remediation = _build_remediation(synthesis_output)
-
-    payload = AggregatedPayload(
-        session_id=session_id,
-        target=target,
-        findings=findings,
-        error_log=error_log,
-        attack_surface=attack_surface,
-        executive_summary=executive_summary,
-        remediation=remediation,
-        metadata=AggregatedMetadata(
-            tools_run=list(raw_outputs.keys()),
-            total_raw_size_bytes=total_raw_size,
-            structured_size_bytes=structured_size,
-            expansion_ratio=round(expansion_ratio, 4),
-            ollama_model=OLLAMA_MODEL,
-            duration_seconds=round(duration, 2),
-            stage_timing=PipelineStageTiming(
-                ingestion_seconds=round(t1_elapsed, 2),
-                processing_seconds=round(t2_elapsed, 2),
-                synthesis_seconds=round(t3_elapsed, 2),
-            ),
-            warning="; ".join(warnings) if warnings else "",
-        ),
-    )
-
-    # Optionally store in Neo4j (best-effort, non-blocking)
-    neo4j_uri = os.environ.get("NEO4J_URI", "")
-    if neo4j_uri:
-        try:
-            await _store_in_neo4j(payload)
-        except Exception as exc:
-            logger.warning("Neo4j storage failed (non-fatal): %s", exc)
-
-    return json.dumps(payload.to_dict(), indent=2, default=str)
-
-
-def _build_findings(
-    synthesis_output: dict[str, Any],
-    processing_output: dict[str, Any],
-    ingestion_output: dict[str, Any],
-    warnings: list[str],
-    *,
-    target: str = "",
-) -> Findings:
-    """Build Findings from agent outputs, preferring synthesis > processing > ingestion."""
-    findings_data = synthesis_output.get("findings", {})
-    if not findings_data:
-        findings_data = processing_output.get("findings", {})
-    if not findings_data:
-        findings_data = ingestion_output
-
-    if not findings_data:
-        return Findings()
-
-    try:
-        findings = Findings(**findings_data)
-    except Exception as exc:
-        logger.warning("Could not parse findings data: %s", exc)
-        warnings.append(f"Findings parse failed: {exc}")
-        try:
-            findings = Findings(
-                hosts=findings_data.get("hosts", []),
-                ports=findings_data.get("ports", []),
-                services=findings_data.get("services", []),
-                vulnerabilities=findings_data.get("vulnerabilities", []),
-                endpoints=findings_data.get("endpoints", []),
-                subdomains=findings_data.get("subdomains", []),
-                technologies=findings_data.get("technologies", []),
-                ssl_certs=findings_data.get("ssl_certs", []),
-                credentials=findings_data.get("credentials", []),
-                http_headers=findings_data.get("http_headers", []),
-                whois=findings_data.get("whois", {}),
-                dns_records=findings_data.get("dns_records", []),
-            )
-        except Exception:
-            return Findings()
-
-    # Fallback: if host entries have empty IP, fill from the target parameter.
-    # The ingestion agent sometimes fails to extract the IP from tool output
-    # even when the target was explicitly provided.
-    if target:
-        for host in findings.hosts:
-            if not host.ip:
-                host.ip = target
-
-    return findings
-
-
-def _build_error_log(
-    synthesis_output: dict[str, Any],
-    processing_output: dict[str, Any],
-) -> list[ErrorLogEntry]:
-    """Build error log entries from agent outputs."""
-    raw_entries = synthesis_output.get("error_log", [])
-    if not raw_entries:
-        raw_entries = processing_output.get("error_log", [])
-
-    entries: list[ErrorLogEntry] = []
-    for entry in raw_entries:
-        if not isinstance(entry, dict):
-            continue
-        try:
-            entries.append(ErrorLogEntry(**entry))
-        except Exception:
-            logger.warning("Could not parse error log entry: %s", entry)
-    return entries
-
-
-def _build_attack_surface(
-    synthesis_output: dict[str, Any],
-    processing_output: dict[str, Any],
-) -> AttackSurface:
-    """Build attack surface from agent outputs."""
-    data = synthesis_output.get("attack_surface", {})
-    if not data:
-        data = processing_output.get("attack_surface", {})
-    if not data:
-        return AttackSurface()
-    try:
-        return AttackSurface(**data)
-    except Exception as exc:
-        logger.warning("Could not parse attack_surface data: %s", exc)
-        return AttackSurface()
-
-
-def _build_executive_summary(synthesis_output: dict[str, Any]) -> ExecutiveSummary:
-    """Build executive summary from synthesis output."""
-    data = synthesis_output.get("executive_summary", {})
-    if not data:
-        return ExecutiveSummary()
-    try:
-        return ExecutiveSummary(**data)
-    except Exception as exc:
-        logger.warning("Could not parse executive_summary data: %s", exc)
-        return ExecutiveSummary()
-
-
-def _build_remediation(synthesis_output: dict[str, Any]) -> list[RemediationEntry]:
-    """Build remediation entries from synthesis output."""
-    raw_entries = synthesis_output.get("remediation", [])
-    entries: list[RemediationEntry] = []
-    for entry in raw_entries:
-        if not isinstance(entry, dict):
-            continue
-        try:
-            entries.append(RemediationEntry(**entry))
-        except Exception:
-            logger.warning("Could not parse remediation entry: %s", entry)
-    return entries
-
-
-async def _store_in_neo4j(payload: AggregatedPayload) -> None:
-    """Best-effort storage of the AggregatedPayload in Neo4j."""
-    from blhackbox.core.knowledge_graph import KnowledgeGraphClient
-
-    async with KnowledgeGraphClient() as kg:
-        cypher = """
-        MERGE (s:AggregatedSession {session_id: $session_id})
-        SET s.target = $target,
-            s.scan_timestamp = $scan_timestamp,
-            s.tools_run = $tools_run,
-            s.compression_ratio = $compression_ratio,
-            s.ollama_model = $ollama_model,
-            s.duration_seconds = $duration_seconds,
-            s.warning = $warning
-        """
-        await kg.run_query(cypher, {
-            "session_id": payload.session_id,
-            "target": payload.target,
-            "scan_timestamp": payload.scan_timestamp.isoformat(),
-            "tools_run": payload.metadata.tools_run,
-            "compression_ratio": payload.metadata.compression_ratio,
-            "ollama_model": payload.metadata.ollama_model,
-            "duration_seconds": payload.metadata.duration_seconds,
-            "warning": payload.metadata.warning,
-        })
-
-        target = payload.target
-        if _looks_like_ip(target):
-            link_cypher = """
-            MERGE (t:IPAddress {address: $target})
-            WITH t
-            MATCH (s:AggregatedSession {session_id: $session_id})
-            MERGE (t)-[:HAS_AGGREGATED_SESSION]->(s)
-            """
-        else:
-            link_cypher = """
-            MERGE (t:Domain {name: $target})
-            WITH t
-            MATCH (s:AggregatedSession {session_id: $session_id})
-            MERGE (t)-[:HAS_AGGREGATED_SESSION]->(s)
-            """
-        await kg.run_query(link_cypher, {
-            "target": target,
-            "session_id": payload.session_id,
-        })
-
-        for vuln in payload.findings.vulnerabilities:
-            if vuln.id:
-                vuln_cypher = """
-                MERGE (v:Vulnerability {identifier: $vid})
-                SET v.severity = $severity,
-                    v.cvss = $cvss,
-                    v.description = $description
-                WITH v
-                MATCH (s:AggregatedSession {session_id: $session_id})
-                MERGE (s)-[:HAS_FINDING]->(v)
-                """
-                await kg.run_query(vuln_cypher, {
-                    "vid": vuln.id,
-                    "severity": vuln.severity,
-                    "cvss": vuln.cvss,
-                    "description": vuln.description[:5000],
-                    "session_id": payload.session_id,
-                })
-
-    logger.info("Stored AggregatedPayload in Neo4j for session %s", payload.session_id)
-
-
-def _looks_like_ip(value: str) -> bool:
-    parts = value.split(".")
-    if len(parts) != 4:
-        return False
-    return all(p.isdigit() and 0 <= int(p) <= 255 for p in parts)
-
-
-# ---------------------------------------------------------------------------
-# Entry point
-# ---------------------------------------------------------------------------
-
-if __name__ == "__main__":
-    transport = os.environ.get("MCP_TRANSPORT", "sse")
-    logger.info("Starting Ollama MCP Server (%s on port %d)", transport, MCP_PORT)
-    mcp.run(transport=transport)
diff --git a/pyproject.toml b/pyproject.toml
index dca05c6..fb740aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "blhackbox"
 version = "2.0.0"
-description = "MCP-based autonomous pentesting with knowledge graph and Ollama preprocessing"
+description = "MCP-based autonomous pentesting with knowledge graph"
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.11"
@@ -39,12 +39,6 @@ dependencies = [
     "matplotlib==3.10.0",
     # MCP Server (FastMCP for automatic tool schema generation)
     "mcp>=1.23.0",
-    # Ollama (official Python client for LLM inference)
-    "ollama>=0.4.0",
-    # Agent servers (FastAPI containers)
-    "fastapi>=0.115.8",
-    "starlette>=0.49.1",
-    "uvicorn>=0.34.0",
     # Utilities
     "tenacity==9.0.0",
     "python-dotenv==1.0.1",
@@ -70,7 +64,6 @@ include = ["blhackbox*"]
 [tool.setuptools.package-data]
 "blhackbox.data" = ["*.json"]
 "blhackbox.prompts" = ["*.md"]
-"blhackbox.prompts.agents" = ["*.md"]
 
 [tool.ruff]
 target-version = "py311"
diff --git a/requirements.txt b/requirements.txt
index 0347d0f..d80bcc1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,14 +16,6 @@ matplotlib==3.10.0
 # MCP (FastMCP for automatic tool schema generation)
 mcp>=1.23.0
 
-# Ollama (official Python client for LLM inference)
-ollama>=0.4.0
-
-# Agent servers (FastAPI containers)
-fastapi>=0.115.8
-starlette>=0.49.1
-uvicorn>=0.34.0
-
 # Utilities
 tenacity==9.0.0
 python-dotenv==1.0.1
diff --git a/setup.sh b/setup.sh
index b0f34f4..54ba906 100755
--- a/setup.sh
+++ b/setup.sh
@@ -48,9 +48,8 @@ usage() {
     echo ""
     echo "Options:"
     echo "  --api-key KEY     Set ANTHROPIC_API_KEY (skips interactive prompt)"
-    echo "  --minimal         Core stack only (no Neo4j, no Ollama)"
+    echo "  --minimal         Core stack only (no Neo4j)"
     echo "  --with-neo4j      Enable Neo4j knowledge graph"
-    echo "  --with-ollama     Enable Ollama local pipeline"
     echo "  --with-gateway    Enable MCP Gateway for Claude Desktop/ChatGPT"
     echo "  --skip-pull       Skip docker compose pull (use cached images)"
     echo "  --help            Show this help"
@@ -206,14 +205,6 @@ select_profiles() {
         fi
     fi
 
-    # Ollama
-    if [[ "$PROFILES" != *"ollama"* ]]; then
-        read -rp "  Enable Ollama local pipeline? [y/N] " yn
-        if [[ "$yn" =~ ^[Yy] ]]; then
-            PROFILES="${PROFILES:+$PROFILES }--profile ollama"
-        fi
-    fi
-
     echo ""
 }
 
@@ -345,10 +336,6 @@ while [[ $# -gt 0 ]]; do
             PROFILES="${PROFILES:+$PROFILES }--profile neo4j"
             shift
             ;;
-        --with-ollama)
-            PROFILES="${PROFILES:+$PROFILES }--profile ollama"
-            shift
-            ;;
         --with-gateway)
             PROFILES="${PROFILES:+$PROFILES }--profile gateway"
             shift
diff --git a/tests/test_agent_server.py b/tests/test_agent_server.py
deleted file mode 100644
index d12d740..0000000
--- a/tests/test_agent_server.py
+++ /dev/null
@@ -1,343 +0,0 @@
-"""Tests for the BaseAgentServer FastAPI agent containers.
-
-Each agent runs as a separate Docker container with a FastAPI server.
-These tests verify the server creation, routing, and Ollama integration
-via the official ``ollama`` Python package.
-"""
-
-from __future__ import annotations
-
-import json
-from types import SimpleNamespace
-from unittest.mock import AsyncMock, patch
-
-from fastapi.testclient import TestClient
-from ollama import ResponseError
-
-from blhackbox.agents.base_agent_server import (
-    BaseAgentServer,
-    ProcessRequest,
-    _serialize_data,
-)
-
-# ---------------------------------------------------------------------------
-# BaseAgentServer creation
-# ---------------------------------------------------------------------------
-
-
-class TestBaseAgentServer:
-    def test_creates_fastapi_app(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        assert server.app is not None
-        assert server.agent_name == "ingestionagent"
-
-    def test_loads_prompt_from_file(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        assert "ingestion" in server.system_prompt.lower()
-        assert "json" in server.system_prompt.lower()
-
-    def test_fallback_prompt_for_unknown_agent(self) -> None:
-        server = BaseAgentServer("nonexistentagent")
-        assert "nonexistentagent" in server.system_prompt
-        assert "JSON" in server.system_prompt
-
-    def test_all_agent_prompts_load(self) -> None:
-        for name in ("ingestionagent", "processingagent", "synthesisagent"):
-            server = BaseAgentServer(name)
-            assert len(server.system_prompt) > 50, f"Prompt for {name} too short"
-
-
-# ---------------------------------------------------------------------------
-# Health endpoint
-# ---------------------------------------------------------------------------
-
-
-class TestHealthEndpoint:
-    def test_health_returns_ok(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-        response = client.get("/health")
-        assert response.status_code == 200
-        data = response.json()
-        assert data["status"] == "ok"
-        assert data["agent"] == "ingestionagent"
-
-
-# ---------------------------------------------------------------------------
-# Process endpoint
-# ---------------------------------------------------------------------------
-
-
-class TestProcessEndpoint:
-    def test_process_with_mock_ollama(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(
-                content='{"hosts": [], "subdomains": ["test.example.com"]}'
-            )
-        )
-
-        mock_ollama_client = AsyncMock()
-        mock_ollama_client.chat.return_value = mock_response
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ):
-            response = client.post("/process", json={
-                "data": "nmap output",
-                "session_id": "test-session",
-                "target": "example.com",
-            })
-            assert response.status_code == 200
-            assert response.json()["subdomains"] == ["test.example.com"]
-
-    def test_process_empty_ollama_response(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content="")
-        )
-
-        mock_ollama_client = AsyncMock()
-        mock_ollama_client.chat.return_value = mock_response
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ):
-            response = client.post("/process", json={
-                "data": "test",
-                "session_id": "s1",
-                "target": "t1",
-            })
-            assert response.status_code == 502
-            assert "empty response" in response.json()["detail"]
-
-    def test_process_none_content(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content=None)
-        )
-
-        mock_ollama_client = AsyncMock()
-        mock_ollama_client.chat.return_value = mock_response
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ):
-            response = client.post("/process", json={
-                "data": "test",
-                "session_id": "s1",
-                "target": "t1",
-            })
-            assert response.status_code == 502
-            assert "empty response" in response.json()["detail"]
-
-
-# ---------------------------------------------------------------------------
-# ProcessRequest model
-# ---------------------------------------------------------------------------
-
-
-class TestProcessRequest:
-    def test_dict_data(self) -> None:
-        req = ProcessRequest(data={"key": "value"}, session_id="s1", target="t1")
-        assert req.data == {"key": "value"}
-
-    def test_string_data(self) -> None:
-        req = ProcessRequest(data="raw text", session_id="s1", target="t1")
-        assert req.data == "raw text"
-
-    def test_defaults(self) -> None:
-        req = ProcessRequest(data="test")
-        assert req.session_id == ""
-        assert req.target == ""
-
-
-# ---------------------------------------------------------------------------
-# _serialize_data — ensures dicts become valid JSON, not Python repr
-# ---------------------------------------------------------------------------
-
-
-class TestSerializeData:
-    def test_string_passthrough(self) -> None:
-        """String data is returned as-is."""
-        assert _serialize_data("raw nmap output") == "raw nmap output"
-
-    def test_dict_becomes_json(self) -> None:
-        """Dict data is serialised to valid JSON, NOT Python repr."""
-        data = {"hosts": ["10.0.0.1"], "ports": [80, 443]}
-        result = _serialize_data(data)
-        # Must be valid JSON (str() would produce single-quoted Python repr)
-        parsed = json.loads(result)
-        assert parsed == data
-
-    def test_nested_dict(self) -> None:
-        data = {"ingestion_output": {"hosts": []}, "processing_output": {"findings": {}}}
-        result = _serialize_data(data)
-        parsed = json.loads(result)
-        assert parsed == data
-
-    def test_empty_dict(self) -> None:
-        assert _serialize_data({}) == "{}"
-
-    def test_empty_string(self) -> None:
-        assert _serialize_data("") == ""
-
-    def test_dict_with_special_chars(self) -> None:
-        """Ensure special characters are properly JSON-escaped."""
-        data = {"description": 'He said "hello" & goodbye'}
-        result = _serialize_data(data)
-        parsed = json.loads(result)
-        assert parsed["description"] == data["description"]
-
-
-# ---------------------------------------------------------------------------
-# Process endpoint — dict data sent as valid JSON to Ollama
-# ---------------------------------------------------------------------------
-
-
-class TestProcessEndpointDictData:
-    def test_dict_data_sent_as_json_to_ollama(self) -> None:
-        """When /process receives dict data, Ollama should get valid JSON, not repr."""
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content='{"findings": {}}')
-        )
-
-        mock_ollama_client = AsyncMock()
-        mock_ollama_client.chat.return_value = mock_response
-
-        dict_data = {"hosts": ["10.0.0.1"], "ports": [80]}
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ):
-            response = client.post("/process", json={
-                "data": dict_data,
-                "session_id": "s1",
-                "target": "t1",
-            })
-            assert response.status_code == 200
-
-            # Verify Ollama received valid JSON, not Python repr
-            call_args = mock_ollama_client.chat.call_args
-            user_content = call_args.kwargs["messages"][1]["content"]
-            # Must be valid JSON
-            parsed = json.loads(user_content)
-            assert parsed == dict_data
-
-
-# ---------------------------------------------------------------------------
-# Process endpoint — retry on Ollama errors
-# ---------------------------------------------------------------------------
-
-
-class TestProcessEndpointRetry:
-    def test_retries_on_response_error(self) -> None:
-        """Agent should retry on Ollama ResponseError before returning 502."""
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_ollama_client = AsyncMock()
-        # ResponseError needs a specific format
-        mock_ollama_client.chat.side_effect = ResponseError("model not found")
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ), patch(
-            "blhackbox.agents.base_agent_server.OLLAMA_RETRIES", 1,
-        ), patch(
-            "blhackbox.agents.base_agent_server.asyncio.sleep",
-            new_callable=AsyncMock,
-        ) as mock_sleep:
-            response = client.post("/process", json={
-                "data": "test", "session_id": "s1", "target": "t1",
-            })
-            assert response.status_code == 502
-            # Should have retried (1 retry = 2 total attempts)
-            assert mock_ollama_client.chat.call_count == 2
-            # Should have slept between retries
-            mock_sleep.assert_called_once()
-
-    def test_succeeds_after_retry(self) -> None:
-        """Agent should succeed after a transient failure."""
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content='{"hosts": []}')
-        )
-
-        mock_ollama_client = AsyncMock()
-        # First call fails, second succeeds
-        mock_ollama_client.chat.side_effect = [
-            ResponseError("transient error"),
-            mock_response,
-        ]
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ), patch(
-            "blhackbox.agents.base_agent_server.OLLAMA_RETRIES", 1,
-        ), patch(
-            "blhackbox.agents.base_agent_server.asyncio.sleep",
-            new_callable=AsyncMock,
-        ):
-            response = client.post("/process", json={
-                "data": "test", "session_id": "s1", "target": "t1",
-            })
-            assert response.status_code == 200
-            assert response.json() == {"hosts": []}
-
-
-# ---------------------------------------------------------------------------
-# Health endpoint — Ollama reachability
-# ---------------------------------------------------------------------------
-
-
-class TestHealthEndpointOllamaCheck:
-    def test_health_shows_ollama_reachable(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_ollama_client = AsyncMock()
-        mock_ollama_client.list.return_value = {"models": [{"name": "llama3.3"}]}
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ):
-            response = client.get("/health")
-            data = response.json()
-            assert data["status"] == "ok"
-            assert data["ollama"] == "reachable"
-            assert data["models_loaded"] == 1
-
-    def test_health_shows_ollama_unreachable(self) -> None:
-        server = BaseAgentServer("ingestionagent")
-        client = TestClient(server.app)
-
-        mock_ollama_client = AsyncMock()
-        mock_ollama_client.list.side_effect = ConnectionError("unreachable")
-
-        with patch(
-            "blhackbox.agents.base_agent_server.AsyncClient",
-            return_value=mock_ollama_client,
-        ):
-            response = client.get("/health")
-            data = response.json()
-            assert data["status"] == "ok"
-            assert data["ollama"] == "unreachable"
diff --git a/tests/test_agents.py b/tests/test_agents.py
deleted file mode 100644
index a86dff0..0000000
--- a/tests/test_agents.py
+++ /dev/null
@@ -1,305 +0,0 @@
-"""Tests for Ollama preprocessing agent classes (v2 architecture).
-
-The v2 pipeline has three agents:
-  1. IngestionAgent  — parse raw tool output into structured data
-  2. ProcessingAgent — deduplicate, compress, annotate error_log
-  3. SynthesisAgent  — merge into final AggregatedPayload
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from types import SimpleNamespace
-from unittest.mock import AsyncMock, patch
-
-import pytest
-
-from blhackbox.agents.base_agent import BaseAgent, _serialize_data
-from blhackbox.agents.ingestion_agent import IngestionAgent
-from blhackbox.agents.processing_agent import ProcessingAgent
-from blhackbox.agents.synthesis_agent import SynthesisAgent
-
-# ---------------------------------------------------------------------------
-# BaseAgent
-# ---------------------------------------------------------------------------
-
-
-class TestBaseAgent:
-    def test_default_params(self) -> None:
-        agent = BaseAgent()
-        assert agent.ollama_host == "http://localhost:11434"
-        assert agent.model == "llama3.1:8b"
-
-    def test_custom_params(self) -> None:
-        agent = BaseAgent(ollama_host="http://custom:9999", model="mistral")
-        assert agent.ollama_host == "http://custom:9999"
-        assert agent.model == "mistral"
-
-    def test_trailing_slash_stripped(self) -> None:
-        agent = BaseAgent(ollama_host="http://localhost:11434/")
-        assert agent.ollama_host == "http://localhost:11434"
-
-    def test_load_prompt_fallback(self) -> None:
-        """BaseAgent has no prompt file, so it should use the fallback prompt."""
-        agent = BaseAgent()
-        # Fallback prompt contains the class name and "JSON"
-        assert "BaseAgent" in agent.system_prompt
-        assert "JSON" in agent.system_prompt
-
-    def test_parse_valid_json(self) -> None:
-        agent = BaseAgent()
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content='{"key": "value"}')
-        )
-        result = agent._parse(mock_response)
-        assert result == {"key": "value"}
-
-    def test_parse_empty_response(self) -> None:
-        agent = BaseAgent()
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content="")
-        )
-        result = agent._parse(mock_response)
-        assert result == {}
-
-    def test_parse_invalid_json(self) -> None:
-        agent = BaseAgent()
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content="not json at all")
-        )
-        result = agent._parse(mock_response)
-        assert result == {}
-
-    def test_parse_json_with_preamble(self) -> None:
-        agent = BaseAgent()
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content='Here is the result: {"key": "value"} done')
-        )
-        result = agent._parse(mock_response)
-        assert result == {"key": "value"}
-
-    def test_parse_none_content(self) -> None:
-        agent = BaseAgent()
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content=None)
-        )
-        result = agent._parse(mock_response)
-        assert result == {}
-
-    @pytest.mark.asyncio
-    async def test_process_ollama_unreachable(self) -> None:
-        """When Ollama is unreachable, process returns empty dict."""
-        agent = BaseAgent(ollama_host="http://unreachable:99999")
-        result = await agent.process("some raw data")
-        assert result == {}
-
-    @pytest.mark.asyncio
-    async def test_process_with_mock(self) -> None:
-        """Test process with a mocked Ollama response."""
-        agent = IngestionAgent()
-        expected_data = {
-            "hosts": [{"ip": "10.0.0.1", "hostname": "", "ports": []}],
-            "subdomains": ["api.example.com"],
-            "services": [],
-            "vulnerabilities": [],
-            "endpoints": [],
-            "technologies": [],
-            "ports": [],
-        }
-
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content=json.dumps(expected_data))
-        )
-
-        mock_client = AsyncMock()
-        mock_client.chat.return_value = mock_response
-
-        with patch("blhackbox.agents.base_agent.AsyncClient", return_value=mock_client):
-            result = await agent.process("nmap output:\n80/tcp open http")
-            assert result["subdomains"] == ["api.example.com"]
-            assert result["hosts"][0]["ip"] == "10.0.0.1"
-
-
-# ---------------------------------------------------------------------------
-# Agent names (used for prompt file loading)
-# ---------------------------------------------------------------------------
-
-
-class TestAgentNames:
-    def test_ingestion_agent_name(self) -> None:
-        agent = IngestionAgent()
-        assert agent.__class__.__name__ == "IngestionAgent"
-
-    def test_processing_agent_name(self) -> None:
-        agent = ProcessingAgent()
-        assert agent.__class__.__name__ == "ProcessingAgent"
-
-    def test_synthesis_agent_name(self) -> None:
-        agent = SynthesisAgent()
-        assert agent.__class__.__name__ == "SynthesisAgent"
-
-    def test_all_are_base_agent_subclasses(self) -> None:
-        assert issubclass(IngestionAgent, BaseAgent)
-        assert issubclass(ProcessingAgent, BaseAgent)
-        assert issubclass(SynthesisAgent, BaseAgent)
-
-
-# ---------------------------------------------------------------------------
-# Prompt loading from .md files
-# ---------------------------------------------------------------------------
-
-
-class TestPromptLoading:
-    def test_ingestion_prompt_loaded(self) -> None:
-        agent = IngestionAgent()
-        prompt_lower = agent.system_prompt.lower()
-        assert "ingestion" in prompt_lower
-        assert "json" in prompt_lower
-
-    def test_processing_prompt_loaded(self) -> None:
-        agent = ProcessingAgent()
-        prompt_lower = agent.system_prompt.lower()
-        assert "processing" in prompt_lower or "deduplic" in prompt_lower
-        assert "json" in prompt_lower
-
-    def test_synthesis_prompt_loaded(self) -> None:
-        agent = SynthesisAgent()
-        prompt_lower = agent.system_prompt.lower()
-        assert "synthesis" in prompt_lower or "merge" in prompt_lower
-        assert "json" in prompt_lower
-
-    def test_all_prompts_are_md_files(self) -> None:
-        prompts_dir = Path(__file__).resolve().parent.parent / "blhackbox" / "prompts" / "agents"
-        expected = {
-            "ingestionagent.md",
-            "processingagent.md",
-            "synthesisagent.md",
-        }
-        actual = {f.name for f in prompts_dir.glob("*.md")}
-        assert expected.issubset(actual), f"Missing prompts: {expected - actual}"
-
-    def test_prompt_file_name_matches_class_name(self) -> None:
-        """Prompt file is <classname>.lower().md — verify the naming convention."""
-        for cls in (IngestionAgent, ProcessingAgent, SynthesisAgent):
-            cls()  # ensure instantiation works
-            expected_file = cls.__name__.lower() + ".md"
-            prompts_dir = (
-                Path(__file__).resolve().parent.parent / "blhackbox" / "prompts" / "agents"
-            )
-            assert (prompts_dir / expected_file).exists(), (
-                f"Expected prompt file {expected_file} for {cls.__name__}"
-            )
-
-
-# ---------------------------------------------------------------------------
-# Agent instantiation with custom params
-# ---------------------------------------------------------------------------
-
-
-class TestAgentInstantiation:
-    def test_ingestion_agent_custom_params(self) -> None:
-        agent = IngestionAgent(ollama_host="http://custom:1234", model="codellama")
-        assert agent.ollama_host == "http://custom:1234"
-        assert agent.model == "codellama"
-
-    def test_processing_agent_custom_params(self) -> None:
-        agent = ProcessingAgent(ollama_host="http://custom:5678", model="phi3")
-        assert agent.ollama_host == "http://custom:5678"
-        assert agent.model == "phi3"
-
-    def test_synthesis_agent_custom_params(self) -> None:
-        agent = SynthesisAgent(ollama_host="http://custom:9012", model="gemma2")
-        assert agent.ollama_host == "http://custom:9012"
-        assert agent.model == "gemma2"
-
-    def test_default_params_inherited(self) -> None:
-        for cls in (IngestionAgent, ProcessingAgent, SynthesisAgent):
-            agent = cls()
-            assert agent.ollama_host == "http://localhost:11434"
-            assert agent.model == "llama3.1:8b"
-
-
-# ---------------------------------------------------------------------------
-# _serialize_data — ensures dicts become valid JSON
-# ---------------------------------------------------------------------------
-
-
-class TestSerializeData:
-    def test_string_passthrough(self) -> None:
-        assert _serialize_data("raw text") == "raw text"
-
-    def test_dict_to_json(self) -> None:
-        data = {"key": "value", "nested": {"a": 1}}
-        result = _serialize_data(data)
-        parsed = json.loads(result)
-        assert parsed == data
-
-    def test_empty_dict(self) -> None:
-        assert _serialize_data({}) == "{}"
-
-    def test_dict_not_python_repr(self) -> None:
-        """Ensure the output is JSON with double quotes, not Python repr."""
-        data = {"key": "value"}
-        result = _serialize_data(data)
-        # JSON uses double quotes; Python repr uses single quotes
-        assert '"key"' in result
-        assert "'" not in result
-
-
-# ---------------------------------------------------------------------------
-# BaseAgent retry logic
-# ---------------------------------------------------------------------------
-
-
-class TestBaseAgentRetry:
-    @pytest.mark.asyncio
-    async def test_retries_on_failure(self) -> None:
-        """Should retry on transient failures before returning empty dict."""
-        agent = IngestionAgent()
-        mock_client = AsyncMock()
-        mock_client.chat.side_effect = Exception("transient error")
-
-        with patch("blhackbox.agents.base_agent.AsyncClient", return_value=mock_client), \
-             patch("blhackbox.agents.base_agent._OLLAMA_RETRIES", 1), \
-             patch("blhackbox.agents.base_agent.asyncio.sleep", new_callable=AsyncMock):
-            result = await agent.process("some data")
-            assert result == {}
-            # 1 retry = 2 total attempts
-            assert mock_client.chat.call_count == 2
-
-    @pytest.mark.asyncio
-    async def test_succeeds_after_retry(self) -> None:
-        """Should succeed if the retry works."""
-        agent = IngestionAgent()
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content='{"hosts": []}')
-        )
-        mock_client = AsyncMock()
-        mock_client.chat.side_effect = [Exception("transient"), mock_response]
-
-        with patch("blhackbox.agents.base_agent.AsyncClient", return_value=mock_client), \
-             patch("blhackbox.agents.base_agent._OLLAMA_RETRIES", 1), \
-             patch("blhackbox.agents.base_agent.asyncio.sleep", new_callable=AsyncMock):
-            result = await agent.process("some data")
-            assert result == {"hosts": []}
-
-    @pytest.mark.asyncio
-    async def test_dict_data_sent_as_json(self) -> None:
-        """Verify dict data is serialised to JSON for Ollama, not Python repr."""
-        agent = IngestionAgent()
-        mock_response = SimpleNamespace(
-            message=SimpleNamespace(content='{"hosts": []}')
-        )
-        mock_client = AsyncMock()
-        mock_client.chat.return_value = mock_response
-
-        dict_data = {"ingestion_output": {"hosts": []}, "processing_output": {}}
-
-        with patch("blhackbox.agents.base_agent.AsyncClient", return_value=mock_client):
-            await agent.process(dict_data)
-            call_args = mock_client.chat.call_args
-            user_content = call_args.kwargs["messages"][1]["content"]
-            # Must be valid JSON
-            parsed = json.loads(user_content)
-            assert parsed == dict_data
diff --git a/tests/test_aggregated_payload.py b/tests/test_aggregated_payload.py
index 363dfb0..f5e8a26 100644
--- a/tests/test_aggregated_payload.py
+++ b/tests/test_aggregated_payload.py
@@ -191,7 +191,7 @@ def test_full_creation(self) -> None:
                 total_raw_size_bytes=50000,
                 structured_size_bytes=2500,
                 expansion_ratio=0.05,
-                ollama_model="llama3.3",
+                model="llama3.3",
                 duration_seconds=12.5,
             ),
         )
@@ -387,13 +387,13 @@ def test_defaults(self) -> None:
         assert meta.total_raw_size_bytes == 0
         assert meta.structured_size_bytes == 0
         assert meta.expansion_ratio == 0.0
-        assert meta.ollama_model == ""
+        assert meta.model == ""
         assert meta.duration_seconds == 0.0
         assert meta.warning == ""
 
     def test_warning(self) -> None:
-        meta = AggregatedMetadata(warning="Ollama unreachable")
-        assert meta.warning == "Ollama unreachable"
+        meta = AggregatedMetadata(warning="Service unreachable")
+        assert meta.warning == "Service unreachable"
         assert meta.tools_run == []
 
     def test_full_metadata(self) -> None:
@@ -402,7 +402,7 @@ def test_full_metadata(self) -> None:
             total_raw_size_bytes=100000,
             structured_size_bytes=5000,
             expansion_ratio=0.05,
-            ollama_model="llama3.3",
+            model="llama3.3",
             duration_seconds=25.3,
             warning="",
         )
diff --git a/tests/test_config.py b/tests/test_config.py
index 64b6800..8ea103d 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,6 +1,6 @@
 """Tests for configuration (v2 architecture).
 
-The v2 Settings has Neo4j, Ollama, MCP Gateway, Screenshot MCP,
+The v2 Settings has Neo4j, MCP Gateway, Screenshot MCP,
 and general settings.
 """
 
@@ -40,19 +40,6 @@ def test_neo4j_defaults(self) -> None:
         assert s.neo4j_user == "neo4j"
         assert s.neo4j_password == ""
 
-    def test_ollama_defaults(self) -> None:
-        s = Settings()
-        assert "ollama" in s.ollama_url or "localhost" in s.ollama_url
-        assert s.ollama_model == "llama3.1:8b"
-
-    def test_ollama_url_override(self) -> None:
-        s = Settings(ollama_url="http://custom-ollama:9999")
-        assert s.ollama_url == "http://custom-ollama:9999"
-
-    def test_ollama_model_override(self) -> None:
-        s = Settings(ollama_model="mistral")
-        assert s.ollama_model == "mistral"
-
     def test_mcp_gateway_port_default(self) -> None:
         s = Settings()
         assert s.mcp_gateway_port == 8080
diff --git a/tests/test_ollama_mcp.py b/tests/test_ollama_mcp.py
deleted file mode 100644
index 5976b85..0000000
--- a/tests/test_ollama_mcp.py
+++ /dev/null
@@ -1,390 +0,0 @@
-"""Tests for the blhackbox Ollama MCP server (v2 architecture).
-
-Tests the ollama_mcp_server.py which acts as a thin orchestrator that calls
-3 agent containers (Ingestion, Processing, Synthesis) via HTTP and assembles
-the final AggregatedPayload.
-
-Uses FastMCP for automatic tool schema generation.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import sys
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import httpx
-import pytest
-
-# Ensure the mcp_servers directory is importable
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "mcp_servers"))
-
-from mcp_servers.ollama_mcp_server import (  # noqa: E402
-    _build_error_log,
-    _build_findings,
-    _call_agent,
-    _looks_like_ip,
-    mcp,
-)
-
-# ---------------------------------------------------------------------------
-# Tool definitions (FastMCP auto-generated)
-# ---------------------------------------------------------------------------
-
-
-class TestToolDefinitions:
-    def test_has_process_scan_results_tool(self) -> None:
-        tools = asyncio.get_event_loop().run_until_complete(mcp.list_tools())
-        names = [t.name for t in tools]
-        assert "process_scan_results" in names
-
-    def test_only_one_tool(self) -> None:
-        tools = asyncio.get_event_loop().run_until_complete(mcp.list_tools())
-        assert len(tools) == 1
-
-    def test_tool_schema(self) -> None:
-        tools = asyncio.get_event_loop().run_until_complete(mcp.list_tools())
-        tool = next(t for t in tools if t.name == "process_scan_results")
-        schema = tool.inputSchema
-        assert "raw_outputs" in schema["properties"]
-        assert "target" in schema["properties"]
-        assert "session_id" in schema["properties"]
-        assert set(schema["required"]) == {"raw_outputs", "target", "session_id"}
-
-    def test_tool_has_description(self) -> None:
-        tools = asyncio.get_event_loop().run_until_complete(mcp.list_tools())
-        tool = tools[0]
-        assert tool.description
-        assert "agent" in tool.description.lower()
-
-    def test_tool_description_mentions_containers(self) -> None:
-        tools = asyncio.get_event_loop().run_until_complete(mcp.list_tools())
-        tool = tools[0]
-        assert "container" in tool.description.lower()
-
-
-# ---------------------------------------------------------------------------
-# _looks_like_ip
-# ---------------------------------------------------------------------------
-
-
-class TestLooksLikeIP:
-    def test_valid_ip(self) -> None:
-        assert _looks_like_ip("192.168.1.1") is True
-        assert _looks_like_ip("10.0.0.1") is True
-        assert _looks_like_ip("0.0.0.0") is True
-        assert _looks_like_ip("255.255.255.255") is True
-
-    def test_invalid_ip(self) -> None:
-        assert _looks_like_ip("example.com") is False
-        assert _looks_like_ip("256.1.1.1") is False
-        assert _looks_like_ip("not.an.ip") is False
-        assert _looks_like_ip("1.2.3") is False
-        assert _looks_like_ip("1.2.3.4.5") is False
-        assert _looks_like_ip("") is False
-
-
-# ---------------------------------------------------------------------------
-# _call_agent
-# ---------------------------------------------------------------------------
-
-
-class TestCallAgent:
-    @pytest.mark.asyncio
-    async def test_successful_call(self) -> None:
-        mock_response = MagicMock()
-        mock_response.json.return_value = {"subdomains": ["a.example.com"]}
-        mock_response.raise_for_status = MagicMock()
-
-        mock_client = AsyncMock()
-        mock_client.post.return_value = mock_response
-
-        warnings: list[str] = []
-        result = await _call_agent(
-            mock_client, "http://agent:8001", "data",
-            "session1", "example.com", "TestAgent", warnings,
-        )
-        assert result == {"subdomains": ["a.example.com"]}
-        assert warnings == []
-
-    @pytest.mark.asyncio
-    async def test_connect_error(self) -> None:
-        mock_client = AsyncMock()
-        mock_client.post.side_effect = httpx.ConnectError("unreachable")
-
-        warnings: list[str] = []
-        result = await _call_agent(
-            mock_client, "http://agent:8001", "data",
-            "session1", "example.com", "TestAgent", warnings,
-        )
-        assert result == {}
-        assert len(warnings) == 1
-        assert "unreachable" in warnings[0]
-
-    @pytest.mark.asyncio
-    async def test_http_error(self) -> None:
-        mock_response = MagicMock()
-        mock_response.status_code = 503
-
-        mock_client = AsyncMock()
-        mock_client.post.side_effect = httpx.HTTPStatusError(
-            "error", request=MagicMock(), response=mock_response,
-        )
-
-        warnings: list[str] = []
-        result = await _call_agent(
-            mock_client, "http://agent:8001", "data",
-            "session1", "example.com", "TestAgent", warnings,
-        )
-        assert result == {}
-        assert len(warnings) == 1
-        assert "HTTP" in warnings[0]
-
-
-# ---------------------------------------------------------------------------
-# _build_findings
-# ---------------------------------------------------------------------------
-
-
-class TestBuildFindings:
-    def test_empty_data(self) -> None:
-        """All empty agent outputs should produce an empty Findings."""
-        warnings: list[str] = []
-        result = _build_findings({}, {}, {}, warnings)
-        assert result.hosts == []
-        assert result.ports == []
-        assert result.services == []
-        assert result.vulnerabilities == []
-        assert result.endpoints == []
-        assert result.subdomains == []
-        assert result.technologies == []
-
-    def test_synthesis_output_preferred(self) -> None:
-        """_build_findings prefers synthesis output > processing > ingestion."""
-        warnings: list[str] = []
-        synthesis = {
-            "findings": {
-                "subdomains": ["from-synthesis.example.com"],
-            }
-        }
-        processing = {
-            "findings": {
-                "subdomains": ["from-processing.example.com"],
-            }
-        }
-        ingestion = {
-            "subdomains": ["from-ingestion.example.com"],
-        }
-        result = _build_findings(synthesis, processing, ingestion, warnings)
-        assert "from-synthesis.example.com" in result.subdomains
-
-    def test_falls_back_to_processing(self) -> None:
-        warnings: list[str] = []
-        processing = {
-            "findings": {
-                "subdomains": ["from-processing.example.com"],
-            }
-        }
-        result = _build_findings({}, processing, {}, warnings)
-        assert "from-processing.example.com" in result.subdomains
-
-    def test_falls_back_to_ingestion(self) -> None:
-        warnings: list[str] = []
-        ingestion = {
-            "subdomains": ["from-ingestion.example.com"],
-        }
-        result = _build_findings({}, {}, ingestion, warnings)
-        assert "from-ingestion.example.com" in result.subdomains
-
-    def test_malformed_findings_falls_back(self) -> None:
-        """If findings data has wrong types, it should not crash."""
-        warnings: list[str] = []
-        synthesis = {
-            "findings": {
-                "subdomains": "not a list",
-                "hosts": "also not a list",
-            }
-        }
-        result = _build_findings(synthesis, {}, {}, warnings)
-        # Should fall back gracefully without crashing
-        assert result is not None
-
-
-# ---------------------------------------------------------------------------
-# _build_error_log
-# ---------------------------------------------------------------------------
-
-
-class TestBuildErrorLog:
-    def test_valid_entries(self) -> None:
-        entries = _build_error_log(
-            {"error_log": [
-                {"type": "timeout", "count": 3, "locations": ["10.0.0.1"]},
-            ]},
-            {},
-        )
-        assert len(entries) == 1
-        assert entries[0].type == "timeout"
-        assert entries[0].count == 3
-
-    def test_falls_back_to_processing(self) -> None:
-        entries = _build_error_log(
-            {},
-            {"error_log": [
-                {"type": "dns_failure", "count": 1, "locations": ["ns1.example.com"]},
-            ]},
-        )
-        assert len(entries) == 1
-        assert entries[0].type == "dns_failure"
-
-    def test_malformed_entry_skipped(self) -> None:
-        entries = _build_error_log(
-            {"error_log": [
-                {"type": "timeout", "count": 3},
-                "not a dict",  # should be skipped
-            ]},
-            {},
-        )
-        assert len(entries) == 1
-        assert entries[0].type == "timeout"
-
-    def test_empty_error_log(self) -> None:
-        entries = _build_error_log({}, {})
-        assert entries == []
-
-    def test_entry_with_all_fields(self) -> None:
-        entries = _build_error_log(
-            {"error_log": [
-                {
-                    "type": "rate_limit",
-                    "count": 10,
-                    "locations": ["10.0.0.1:443", "10.0.0.1:8080"],
-                    "likely_cause": "WAF rate limiting",
-                    "security_relevance": "high",
-                    "security_note": "Active rate limiting suggests WAF presence",
-                },
-            ]},
-            {},
-        )
-        assert len(entries) == 1
-        assert entries[0].security_relevance == "high"
-        assert entries[0].security_note == "Active rate limiting suggests WAF presence"
-
-
-# ---------------------------------------------------------------------------
-# _call_agent retry logic
-# ---------------------------------------------------------------------------
-
-
-class TestCallAgentRetry:
-    @pytest.mark.asyncio
-    async def test_retries_on_connect_error(self) -> None:
-        """Should retry on connection errors with exponential backoff."""
-        mock_client = AsyncMock()
-        mock_client.post.side_effect = httpx.ConnectError("unreachable")
-
-        warnings: list[str] = []
-        sleep_path = "mcp_servers.ollama_mcp_server.asyncio.sleep"
-        with patch("mcp_servers.ollama_mcp_server.AGENT_RETRIES", 2), \
-             patch(sleep_path, new_callable=AsyncMock) as mock_sleep:
-            result = await _call_agent(
-                mock_client, "http://agent:8001", "data",
-                "session1", "example.com", "TestAgent", warnings,
-            )
-            assert result == {}
-            # 2 retries = 3 total attempts
-            assert mock_client.post.call_count == 3
-            # Should have slept between retries
-            assert mock_sleep.call_count == 2
-
-    @pytest.mark.asyncio
-    async def test_retries_on_5xx_error(self) -> None:
-        """Should retry on 5xx HTTP errors."""
-        mock_response = MagicMock()
-        mock_response.status_code = 502
-        mock_response.json.return_value = {"detail": "Ollama error"}
-
-        mock_client = AsyncMock()
-        mock_client.post.side_effect = httpx.HTTPStatusError(
-            "error", request=MagicMock(), response=mock_response,
-        )
-
-        warnings: list[str] = []
-        with patch("mcp_servers.ollama_mcp_server.AGENT_RETRIES", 1), \
-             patch("mcp_servers.ollama_mcp_server.asyncio.sleep", new_callable=AsyncMock):
-            result = await _call_agent(
-                mock_client, "http://agent:8001", "data",
-                "session1", "example.com", "TestAgent", warnings,
-            )
-            assert result == {}
-            # 1 retry = 2 total attempts
-            assert mock_client.post.call_count == 2
-
-    @pytest.mark.asyncio
-    async def test_no_retry_on_4xx_error(self) -> None:
-        """Should NOT retry on client errors (4xx)."""
-        mock_response = MagicMock()
-        mock_response.status_code = 400
-        mock_response.json.return_value = {"detail": "bad request"}
-
-        mock_client = AsyncMock()
-        mock_client.post.side_effect = httpx.HTTPStatusError(
-            "error", request=MagicMock(), response=mock_response,
-        )
-
-        warnings: list[str] = []
-        with patch("mcp_servers.ollama_mcp_server.AGENT_RETRIES", 2), \
-             patch("mcp_servers.ollama_mcp_server.asyncio.sleep", new_callable=AsyncMock):
-            result = await _call_agent(
-                mock_client, "http://agent:8001", "data",
-                "session1", "example.com", "TestAgent", warnings,
-            )
-            assert result == {}
-            # No retry on 4xx — should only attempt once
-            assert mock_client.post.call_count == 1
-
-    @pytest.mark.asyncio
-    async def test_succeeds_after_retry(self) -> None:
-        """Should succeed if the second attempt works."""
-        mock_success = MagicMock()
-        mock_success.json.return_value = {"hosts": ["10.0.0.1"]}
-        mock_success.raise_for_status = MagicMock()
-
-        mock_client = AsyncMock()
-        mock_client.post.side_effect = [
-            httpx.ConnectError("transient"),
-            mock_success,
-        ]
-
-        warnings: list[str] = []
-        with patch("mcp_servers.ollama_mcp_server.AGENT_RETRIES", 1), \
-             patch("mcp_servers.ollama_mcp_server.asyncio.sleep", new_callable=AsyncMock):
-            result = await _call_agent(
-                mock_client, "http://agent:8001", "data",
-                "session1", "example.com", "TestAgent", warnings,
-            )
-            assert result == {"hosts": ["10.0.0.1"]}
-            assert warnings == []
-
-    @pytest.mark.asyncio
-    async def test_error_detail_extracted(self) -> None:
-        """Should extract error detail from the JSON response body."""
-        mock_response = MagicMock()
-        mock_response.status_code = 502
-        mock_response.json.return_value = {"detail": "Ollama error: model not found"}
-
-        mock_client = AsyncMock()
-        mock_client.post.side_effect = httpx.HTTPStatusError(
-            "error", request=MagicMock(), response=mock_response,
-        )
-
-        warnings: list[str] = []
-        with patch("mcp_servers.ollama_mcp_server.AGENT_RETRIES", 0):
-            result = await _call_agent(
-                mock_client, "http://agent:8001", "data",
-                "session1", "example.com", "TestAgent", warnings,
-            )
-            assert result == {}
-            assert len(warnings) == 1
-            assert "model not found" in warnings[0]
diff --git a/tests/test_prompts.py b/tests/test_prompts.py
index f5d90fb..88865a5 100644
--- a/tests/test_prompts.py
+++ b/tests/test_prompts.py
@@ -92,18 +92,6 @@ def test_known_templates_present(self) -> None:
 class TestLoadAgentPrompt:
     """Test loading agent prompts."""
 
-    def test_load_ingestion_prompt(self) -> None:
-        content = load_agent_prompt("ingestionagent")
-        assert len(content) > 50
-
-    def test_load_processing_prompt(self) -> None:
-        content = load_agent_prompt("processingagent")
-        assert len(content) > 50
-
-    def test_load_synthesis_prompt(self) -> None:
-        content = load_agent_prompt("synthesisagent")
-        assert len(content) > 50
-
     def test_unknown_agent_raises(self) -> None:
         with pytest.raises(FileNotFoundError):
             load_agent_prompt("nonexistent_agent")