From d5866fb965a69ba0916c2d68968d0ec228c6516e Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 7 Mar 2026 07:21:48 +0000 Subject: [PATCH 1/3] Move Ollama pipeline to optional profile; MCP host handles aggregation directly The MCP host (Claude Code, Claude Desktop, ChatGPT) is dramatically more capable than llama3.1:8b at parsing, deduplication, and synthesis. This change eliminates 2-5 minutes of Ollama latency per scan by having the MCP host structure raw tool outputs directly. Changes: - Add aggregate_results and get_payload_schema tools to blhackbox MCP server - Move all Ollama services (ollama, ollama-mcp, 3 agents) to --profile ollama - Core stack reduced from 9 to 4 containers (kali, wire, screenshot, portainer) - RAM requirement reduced from 16GB to 8GB for core stack - Update all 11 prompt templates to use direct aggregation - Update playbook, entrypoint, Makefile, .env.example, README, CLAUDE.md - Keep Ollama pipeline as optional fallback (make up-ollama) https://claude.ai/code/session_01MXWTGUUSheo3EkgHrzRmjy --- .env.example | 31 ++- CLAUDE.md | 17 +- Makefile | 39 ++-- README.md | 194 ++++++++---------- blhackbox-mcp-catalog.yaml | 15 +- blhackbox/mcp/server.py | 145 ++++++++++++- blhackbox/models/aggregated_payload.py | 28 ++- blhackbox/prompts/claude_playbook.md | 66 +++--- blhackbox/prompts/templates/README.md | 4 +- blhackbox/prompts/templates/api-security.md | 13 +- blhackbox/prompts/templates/bug-bounty.md | 13 +- .../prompts/templates/full-attack-chain.md | 13 +- blhackbox/prompts/templates/full-pentest.md | 15 +- .../templates/network-infrastructure.md | 13 +- .../prompts/templates/osint-gathering.md | 13 +- blhackbox/prompts/templates/quick-scan.md | 12 +- blhackbox/prompts/templates/recon-deep.md | 13 +- .../prompts/templates/vuln-assessment.md | 13 +- .../prompts/templates/web-app-assessment.md | 13 +- docker-compose.yml | 48 +++-- docker/claude-code-entrypoint.sh | 17 +- 21 files changed, 449 insertions(+), 286 deletions(-) diff --git a/.env.example b/.env.example index afd42dc..a7f4754 100644 --- a/.env.example +++ b/.env.example @@ -8,25 +8,18 @@ # Get your key at platform.openai.com # OPENAI_API_KEY=sk-... -# ── Ollama ────────────────────────────────────────────────────────── -# Model used by the 3-agent preprocessing pipeline. -OLLAMA_MODEL=llama3.1:8b -# Timeout (seconds) for Ollama LLM calls — covers cold-start model loading. -OLLAMA_TIMEOUT=300 -# Context window size — increase for large pentest outputs. -OLLAMA_NUM_CTX=8192 -# Keep model in memory between sequential agent calls (prevents repeated cold starts). -OLLAMA_KEEP_ALIVE=10m -# Retry count for transient Ollama failures (per agent). -OLLAMA_RETRIES=2 - -# ── Ollama MCP Orchestrator ──────────────────────────────────────── -# Timeout (seconds) for HTTP calls from the MCP orchestrator to agent containers. -# Must exceed OLLAMA_TIMEOUT * (1 + OLLAMA_RETRIES) to prevent premature timeout -# while agents are still retrying internally. Default: 1200s (20 min). -AGENT_TIMEOUT=1200 -# Retry count for failed agent HTTP calls. -AGENT_RETRIES=2 +# ── Ollama (OPTIONAL — legacy local pipeline) ────────────────────── +# The MCP host (Claude) now handles data aggregation directly. +# These settings are only needed if you enable the Ollama pipeline: +# docker compose --profile ollama up -d +# +# OLLAMA_MODEL=llama3.1:8b +# OLLAMA_TIMEOUT=300 +# OLLAMA_NUM_CTX=8192 +# OLLAMA_KEEP_ALIVE=10m +# OLLAMA_RETRIES=2 +# AGENT_TIMEOUT=1200 +# AGENT_RETRIES=2 # ── Kali MCP Server ────────────────────────────────────────────── # Port exposed on the host for direct SSE connections (http://localhost:9001/sse). diff --git a/CLAUDE.md b/CLAUDE.md index 6783a17..36eaee1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,7 +16,7 @@ Read the following before writing a single line: - `CLAUDE.md` (this file), `README.md` - `docker-compose.yml`, `Makefile`, `.env.example` - `blhackbox/mcp/server.py` — blhackbox stdio MCP server (Claude Code Web path) -- `mcp_servers/ollama_mcp_server.py` — Ollama MCP orchestrator +- `mcp_servers/ollama_mcp_server.py` — Ollama MCP orchestrator (optional, `--profile ollama`) - Every file directly relevant to the task: the relevant `Dockerfile`, `*_server.py`, `*_agent.py`, agent prompts in `blhackbox/prompts/agents/` — whatever applies - Do not rely on memory from previous sessions. Read the actual current files. @@ -24,7 +24,7 @@ Read the following before writing a single line: Before writing code, answer these internally: 1. What is the root cause — not the symptom, the actual root cause? 2. Does the fix conflict with anything else in the codebase? -3. Does it break the Ollama pipeline contract? (`AggregatedPayload` schema must stay stable across Ingestion → Processing → Synthesis) +3. Does it break the `AggregatedPayload` schema contract? (Must stay stable for `aggregate_results`, report generation, and the optional Ollama pipeline) 4. Does it violate the `shell=False` rule? 5. Am I touching agent prompts in `blhackbox/prompts/agents/`? If so — do I need a rebuild, or can I use a volume mount override? 6. Is there a simpler fix that achieves the same result? @@ -36,8 +36,11 @@ Only after answering all six — write the fix. ## Project Purpose BLHACKBOX is an MCP-based autonomous pentesting framework. The AI client (Claude Code, Claude Desktop, or ChatGPT) IS the orchestrator — it decides which tools to call, -collects raw outputs, and sends them to the Ollama pipeline for preprocessing before -writing the final pentest report. +collects raw outputs, and structures them directly into an `AggregatedPayload` via +the `aggregate_results` MCP tool before writing the final pentest report. + +The Ollama preprocessing pipeline (3 agents) is now optional (`--profile ollama`) +for local-only / offline processing. By default, the MCP host handles aggregation. ## Code Standards - All Python code must be type-annotated @@ -45,7 +48,7 @@ writing the final pentest report. - All subprocess calls must use `subprocess.run(args_list, shell=False)` - Never use `shell=True` in subprocess calls - Never log API keys or secrets -- `AggregatedPayload` schema (`blhackbox/models/aggregated_payload.py`) is the contract between the pipeline and the AI — do not break it without updating all three agents +- `AggregatedPayload` schema (`blhackbox/models/aggregated_payload.py`) is the contract between the MCP host and the reporting tools — do not break it without updating all consumers ## Adding a New MCP Server 1. Create `new-mcp/` directory with your server code @@ -57,8 +60,8 @@ writing the final pentest report. 7. Document tools in README.md components table 8. Add unit tests -## Adding or Tuning an Agent Prompt -Agent prompts are in `blhackbox/prompts/agents/`: +## Adding or Tuning an Agent Prompt (Optional Ollama Pipeline) +Agent prompts are in `blhackbox/prompts/agents/` (only relevant if using `--profile ollama`): - `ingestionagent.md` — Ingestion Agent system prompt - `processingagent.md` — Processing Agent system prompt - `synthesisagent.md` — Synthesis Agent system prompt diff --git a/Makefile b/Makefile index 490ecf9..0aa9078 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help up up-full up-gateway down logs test test-local lint format clean nuke \ +.PHONY: help up up-full up-ollama up-gateway down logs test test-local lint format clean nuke \ pull status health portainer gateway-logs ollama-pull ollama-shell \ claude-code \ neo4j-browser logs-ollama-mcp logs-kali \ @@ -18,20 +18,23 @@ help: ## Show this help pull: ## Pull all pre-built images from Docker Hub $(COMPOSE) pull -up: ## Start core stack (10 containers — no gateway) +up: ## Start core stack (4 containers — no Ollama, no gateway) $(COMPOSE) up -d +up-ollama: ## Start with Ollama pipeline (9 containers — legacy local processing) + $(COMPOSE) --profile ollama up -d + down: ## Stop all services (all profiles) - $(COMPOSE) --profile gateway --profile neo4j --profile claude-code down + $(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama down logs: ## Tail logs from all services $(COMPOSE) logs -f # ── Stack variations ───────────────────────────────────────────── -up-full: ## Start full stack: core + Neo4j (11 containers) +up-full: ## Start full stack: core + Neo4j (5 containers) $(COMPOSE) --profile neo4j up -d -up-gateway: ## Start core + MCP Gateway for Claude Desktop / ChatGPT (11 containers) +up-gateway: ## Start core + MCP Gateway for Claude Desktop / ChatGPT (5 containers) $(COMPOSE) --profile gateway up -d # ── Testing & Code Quality ───────────────────────────────────── @@ -48,7 +51,7 @@ format: ## Auto-format code ruff format blhackbox/ tests/ clean: ## Remove containers, volumes, networks, and build artifacts (keeps images) - $(COMPOSE) --profile gateway --profile neo4j --profile claude-code down -v --remove-orphans + $(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama down -v --remove-orphans find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true rm -rf dist/ build/ *.egg-info @@ -56,7 +59,7 @@ nuke: ## Full cleanup: containers + volumes + ALL images (frees max disk space) @echo "\033[1;33m WARNING: This will remove ALL blhackbox containers, volumes, AND images.\033[0m" @echo "\033[2m You will need to 'docker compose pull' or 'docker compose build' again.\033[0m" @echo "" - $(COMPOSE) --profile gateway --profile neo4j --profile claude-code down -v --remove-orphans --rmi all + $(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama down -v --remove-orphans --rmi all @echo "" @echo "\033[2m Pruning dangling images and build cache...\033[0m" docker image prune -f @@ -91,7 +94,7 @@ status: ## Health status of all containers @echo "" @echo "\033[1m blhackbox Container Status\033[0m" @echo "\033[2m ──────────────────────────────────────\033[0m" - @$(COMPOSE) --profile gateway --profile neo4j --profile claude-code ps --format "table {{.Name}}\t{{.Status}}\t{{.Ports}}" 2>/dev/null || $(COMPOSE) ps + @$(COMPOSE) --profile gateway --profile neo4j --profile claude-code --profile ollama ps --format "table {{.Name}}\t{{.Status}}\t{{.Ports}}" 2>/dev/null || $(COMPOSE) ps @echo "" health: ## Quick health check of all MCP servers @@ -108,20 +111,20 @@ health: ## Quick health check of all MCP servers docker exec blhackbox-screenshot-mcp python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:9004/health')" > /dev/null 2>&1 \ && echo "\033[32m[OK]\033[0m" || echo "\033[31m[FAIL]\033[0m" @printf " %-22s " "Ollama MCP (9000)"; \ - docker exec blhackbox-ollama-mcp python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:9000/sse')" > /dev/null 2>&1 \ - && echo "\033[32m[OK]\033[0m" || echo "\033[31m[FAIL]\033[0m" + docker inspect --format='{{.State.Running}}' blhackbox-ollama-mcp 2>/dev/null | grep -q "true" \ + && echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m (optional — enable with: make up-ollama)" @printf " %-22s " "Ollama (11434)"; \ - docker exec blhackbox-ollama ollama list > /dev/null 2>&1 \ - && echo "\033[32m[OK]\033[0m" || echo "\033[31m[FAIL]\033[0m" + docker inspect --format='{{.State.Running}}' blhackbox-ollama 2>/dev/null | grep -q "true" \ + && echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m (optional — enable with: make up-ollama)" @printf " %-22s " "Agent Ingestion"; \ - docker exec blhackbox-agent-ingestion python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8001/health')" > /dev/null 2>&1 \ - && echo "\033[32m[OK]\033[0m" || echo "\033[31m[FAIL]\033[0m" + docker inspect --format='{{.State.Running}}' blhackbox-agent-ingestion 2>/dev/null | grep -q "true" \ + && echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m (optional)" @printf " %-22s " "Agent Processing"; \ - docker exec blhackbox-agent-processing python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8002/health')" > /dev/null 2>&1 \ - && echo "\033[32m[OK]\033[0m" || echo "\033[31m[FAIL]\033[0m" + docker inspect --format='{{.State.Running}}' blhackbox-agent-processing 2>/dev/null | grep -q "true" \ + && echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m (optional)" @printf " %-22s " "Agent Synthesis"; \ - docker exec blhackbox-agent-synthesis python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8003/health')" > /dev/null 2>&1 \ - && echo "\033[32m[OK]\033[0m" || echo "\033[31m[FAIL]\033[0m" + docker inspect --format='{{.State.Running}}' blhackbox-agent-synthesis 2>/dev/null | grep -q "true" \ + && echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m (optional)" @printf " %-22s " "MCP Gateway (8080)"; \ docker inspect --format='{{.State.Running}}' blhackbox-mcp-gateway 2>/dev/null | grep -q "true" \ && echo "\033[32m[OK]\033[0m" || echo "\033[33m[OFF]\033[0m (optional — enable with: make up-gateway)" diff --git a/README.md b/README.md index f54091d..0a22bd0 100644 --- a/README.md +++ b/README.md @@ -28,13 +28,13 @@ - [How Prompts Flow Through the System](#how-prompts-flow-through-the-system) - [Do I Need the MCP Gateway?](#do-i-need-the-mcp-gateway) - [Portainer Setup](#portainer-setup) -- [Ollama Preprocessing Pipeline](#ollama-mcp-server--preprocessing-pipeline) +- [Ollama Preprocessing Pipeline (Optional)](#ollama-preprocessing-pipeline-optional) - [Troubleshooting](#troubleshooting) - [CLI Reference](#cli-reference) - [Makefile Shortcuts](#makefile-shortcuts) - [Docker Hub Images](#docker-hub-images) - [Neo4j (Optional)](#neo4j-optional) -- [GPU Support for Ollama](#gpu-support-for-ollama) +- [GPU Support for Ollama (Optional)](#gpu-support-for-ollama-optional) - [Security Notes](#security-notes) - [Project Structure](#project-structure) - [License](#license) @@ -48,11 +48,10 @@ internal LangGraph orchestrator or LLM planner. Here is what happens when you type a prompt: 1. **You type a prompt** in your AI client (Claude Code, Claude Desktop, or ChatGPT). -2. **The AI decides which tools to call** from four MCP servers: Kali Linux MCP (70+ security tools including Metasploit), WireMCP (7 packet analysis tools), Screenshot MCP (4 evidence capture tools), and the Ollama preprocessing pipeline. +2. **The AI decides which tools to call** from three MCP servers: Kali Linux MCP (70+ security tools including Metasploit), WireMCP (7 packet analysis tools), and Screenshot MCP (4 evidence capture tools). 3. **Each MCP server executes the tool call** in its Docker container and returns raw output to the AI. -4. **The AI collects all raw outputs** and sends them to the Ollama MCP server via `process_scan_results()`. -5. **Ollama preprocesses the data** through 3 agent containers in sequence (Ingestion -> Processing -> Synthesis), each calling the local Ollama LLM independently. -6. **The structured `AggregatedPayload` returns to the AI**, which writes the final pentest report. +4. **The AI structures the results itself** — parsing, deduplicating, correlating, and building an `AggregatedPayload` directly. +5. **The AI validates and persists** the payload via `aggregate_results()`, then writes the final pentest report. Everything runs inside Docker containers. No tools are installed on your host machine. @@ -87,27 +86,14 @@ CLAUDE CODE (Docker container on blhackbox_net) | 4 tools: web page screenshots, | element capture, annotations | - |--- ollama-pipeline (SSE, port 9000) ───> OLLAMA MCP SERVER - | - | Calls 3 agents in sequence: - | - |---> INGESTION (port 8001) - | Calls Ollama -> structured data - | - |---> PROCESSING (port 8002) - | Calls Ollama -> deduplicated - | - |---> SYNTHESIS (port 8003) - Calls Ollama -> AggregatedPayload - | - v - AggregatedPayload -> back to Claude Code - | - v - AI writes final pentest report - | - v (optional) - NEO4J — cross-session memory + | After collecting raw outputs, Claude structures them directly: + | get_payload_schema() → parse/dedup/correlate → aggregate_results() + | + v +AggregatedPayload → generate_report() → final pentest report + | + v (optional) +NEO4J — cross-session memory PORTAINER (https://localhost:9443) — Web UI for all containers ``` @@ -125,24 +111,24 @@ aggregates all servers behind `localhost:8080/mcp`. See | **Kali MCP** | Kali Linux security tools + Metasploit Framework — 70+ tools (nmap, sqlmap, hydra, msfconsole, msfvenom, etc.) | 9001 | default | | **WireMCP** | Wireshark/tshark — 7 packet capture and analysis tools | 9003 | default | | **Screenshot MCP** | Headless Chromium — 4 screenshot and annotation tools | 9004 | default | -| **Ollama MCP** | Thin orchestrator — calls 3 agent containers in sequence | 9000 | default | -| **Agent: Ingestion** | Parses raw tool output into structured typed data | 8001 | default | -| **Agent: Processing** | Deduplicates, compresses, annotates errors | 8002 | default | -| **Agent: Synthesis** | Merges into final `AggregatedPayload` | 8003 | default | -| **Ollama** | Local LLM inference backend (llama3.1:8b by default) | 11434 | default | | **Portainer** | Web UI for managing all containers | 9443 | default | | **Claude Code** | Anthropic CLI MCP client in Docker | — | `claude-code` | | **MCP Gateway** | Single entry point for host-based MCP clients | 8080 | `gateway` | | **Neo4j** | Cross-session knowledge graph | 7474/7687 | `neo4j` | +| **Ollama MCP** | Legacy thin orchestrator — calls 3 agent containers | 9000 | `ollama` | +| **Agent: Ingestion** | Parses raw tool output into structured typed data | 8001 | `ollama` | +| **Agent: Processing** | Deduplicates, compresses, annotates errors | 8002 | `ollama` | +| **Agent: Synthesis** | Merges into final `AggregatedPayload` | 8003 | `ollama` | +| **Ollama** | Local LLM inference backend (llama3.1:8b by default) | 11434 | `ollama` | --- ## Prerequisites - **Docker** and **Docker Compose** (Docker Engine on Linux, or Docker Desktop) -- At least **16 GB RAM** recommended (Ollama + all containers). The default model (`llama3.1:8b`) requires ~8 GB; larger models need more. +- At least **8 GB RAM** recommended (4 containers in the core stack). If using the optional Ollama pipeline (`--profile ollama`), 16 GB+ is recommended. - An **Anthropic API key** from [console.anthropic.com](https://console.anthropic.com) (**required** for Claude Code) -- **NVIDIA Container Toolkit** (optional — for GPU-accelerated Ollama inference. See [GPU Support](#gpu-support-for-ollama)) +- **NVIDIA Container Toolkit** (optional — only needed if using `--profile ollama` with GPU. See [GPU Support](#gpu-support-for-ollama)) --- @@ -161,15 +147,9 @@ cp .env.example .env # 3. Pull all pre-built Docker images docker compose pull -# NOTE: The ollama image is built locally — this is normal. -# Docker Compose will build it automatically in the next step. -# 4. Start the core stack (9 containers) +# 4. Start the core stack (4 containers) docker compose up -d - -# 5. Download the Ollama model (required — runs inside the container) -make ollama-pull -# This pulls llama3.1:8b (~4.7 GB download). First run may take several minutes. ``` **Verify everything is running:** @@ -179,17 +159,15 @@ make status # Container status make health # Quick health check of all MCP servers ``` -You should see 9 containers, all "Up" or "healthy": +You should see 4 containers, all "Up" or "healthy": - `blhackbox-kali-mcp` - `blhackbox-wire-mcp` - `blhackbox-screenshot-mcp` -- `blhackbox-ollama-mcp` -- `blhackbox-agent-ingestion` -- `blhackbox-agent-processing` -- `blhackbox-agent-synthesis` -- `blhackbox-ollama` - `blhackbox-portainer` +> **Want local-only processing?** Use `make up-ollama` to also start the +> Ollama pipeline (adds 5 more containers, requires 16 GB+ RAM). + > **First time?** Open Portainer at `https://localhost:9443` and create an admin > account within 5 minutes. See [Portainer Setup](#portainer-setup). @@ -204,7 +182,7 @@ no MCP Gateway, no host install, no Node.js. ### Step 1: Start the stack Follow [Installation](#installation) above. Make sure `ANTHROPIC_API_KEY` is -set in your `.env` file. All 9 containers must be healthy (`make health`). +set in your `.env` file. All core containers must be healthy (`make health`). ### Step 2: Launch Claude Code @@ -232,16 +210,19 @@ Checking service connectivity... Kali MCP [ OK ] WireMCP [ OK ] Screenshot MCP [ OK ] - Ollama Pipeline [ OK ] + Ollama Pipeline [ WARN ] (optional — not running) ────────────────────────────────────────────────── - All 4 services connected. + All 3 services connected. MCP servers (connected via SSE): kali Kali Linux security tools + Metasploit (70+ tools) wireshark WireMCP — tshark packet capture & analysis screenshot Screenshot MCP — headless Chromium evidence capture - ollama-pipeline Ollama preprocessing (3-agent pipeline) + + Data aggregation: + You (Claude) handle parsing, deduplication, and synthesis directly. + Use get_payload_schema + aggregate_results to validate & persist. Quick start: /mcp Check MCP server status @@ -257,8 +238,8 @@ You are now inside an interactive Claude Code session. /mcp ``` -You should see the MCP servers listed: `kali`, `wireshark`, -`screenshot`, and `ollama-pipeline`, each with their available tools. +You should see the MCP servers listed: `kali`, `wireshark`, and +`screenshot`, each with their available tools. ### Step 4: Run your first pentest @@ -270,8 +251,8 @@ Claude Code will autonomously: 1. Call Kali tools (nmap, subfinder, nikto, etc.) 2. Search for exploits using Metasploit (`msf_search`) 3. Collect raw outputs from all tools -4. Send them to the Ollama preprocessing pipeline -5. Write a structured pentest report +4. Structure, deduplicate, and correlate findings into an `AggregatedPayload` +5. Validate via `aggregate_results()` and write a structured pentest report ### Monitoring (separate terminal) @@ -279,9 +260,6 @@ Claude Code will autonomously: make logs-kali # Kali MCP server activity (includes Metasploit) make logs-wireshark # WireMCP activity make logs-screenshot # Screenshot MCP activity -make logs-ollama-mcp # Ollama MCP server activity -make logs-agent-ingestion # Ingestion Agent processing -make logs-agent-synthesis # Synthesis Agent building payload ``` Or use **Portainer** at `https://localhost:9443` to see all container logs and @@ -308,8 +286,8 @@ configures itself automatically. 3. Type your prompt: `Scan example.com for open ports and web vulnerabilities` > **Note:** The web session uses the blhackbox stdio MCP server directly -> (not the Docker stack). For the full Docker pipeline with Kali tools, -> Metasploit, and Ollama preprocessing, use [Tutorial 1](#tutorial-1-claude-code-docker--recommended). +> (not the Docker stack). For the full Docker stack with Kali tools and +> Metasploit, use [Tutorial 1](#tutorial-1-claude-code-docker--recommended). --- @@ -353,7 +331,7 @@ Restart Claude Desktop. You should see a hammer icon with available tools. ### Step 3: Run a pentest Type your prompt in Claude Desktop. The flow is identical to Tutorial 1 — the -gateway routes tool calls to the same MCP servers (kali, wireshark, screenshot, ollama). +gateway routes tool calls to the same MCP servers (kali, wireshark, screenshot). --- @@ -402,23 +380,15 @@ STEP 3: TOOLS EXECUTE IN DOCKER CONTAINERS Each tool runs in its container and returns raw text. | v -STEP 4: AI SENDS RAW OUTPUTS TO OLLAMA FOR PROCESSING - After collecting all raw outputs, the AI calls: - process_scan_results(raw_outputs) - on the Ollama MCP Server. +STEP 4: AI STRUCTURES THE RESULTS ITSELF + The AI (Claude/ChatGPT) parses, deduplicates, correlates, and + structures all raw outputs into an AggregatedPayload directly. + No external pipeline needed — the MCP host is the brain. | v -STEP 5: OLLAMA PIPELINE (3 AGENTS IN SEQUENCE) - Agent 1: INGESTION (port 8001) - Calls Ollama -> structured typed data - | - v - Agent 2: PROCESSING (port 8002) - Calls Ollama -> deduplicated + compressed - | - v - Agent 3: SYNTHESIS (port 8003) - Calls Ollama -> final AggregatedPayload +STEP 5: AI VALIDATES AND PERSISTS + The AI calls aggregate_results(payload=...) to validate the + AggregatedPayload against the Pydantic schema and save it. | v STEP 6: AI WRITES THE FINAL REPORT @@ -441,7 +411,7 @@ STEP 7 (OPTIONAL): RESULTS STORED IN NEO4J | **ChatGPT / OpenAI** | **Yes** | GUI/web app on host; needs `localhost:8080/mcp` gateway | The MCP Gateway (`docker/mcp-gateway:latest`) aggregates all MCP servers -(kali, wireshark, screenshot, ollama) behind a single Streamable +(kali, wireshark, screenshot) behind a single Streamable HTTP endpoint at `localhost:8080/mcp`. It requires: - Docker socket mount (`/var/run/docker.sock`) - The `--profile gateway` flag to enable @@ -487,7 +457,13 @@ Then open `https://localhost:9443` again and create your account. --- -## Ollama MCP Server — Preprocessing Pipeline +## Ollama Preprocessing Pipeline (Optional) + +> **Since v2.1, the MCP host (Claude) handles data aggregation directly.** +> The Ollama pipeline is kept as an optional fallback for local-only / offline +> processing where you don't want to use the MCP host's intelligence. + +Enable with: `docker compose --profile ollama up -d` (or `make up-ollama`). The Ollama MCP Server is a thin orchestrator built with [FastMCP](https://github.com/modelcontextprotocol/python-sdk) that calls 3 @@ -521,7 +497,6 @@ If a service shows `FAIL`, restart it: ```bash docker compose restart kali-mcp # restart one service -make restart-agents # restart all 3 agents ``` ### Metasploit tools are slow or fail @@ -545,7 +520,7 @@ missed it, restart: docker compose restart portainer ``` -### Ollama model not pulled +### Ollama model not pulled (only if using --profile ollama) The agents need a model loaded in Ollama. Without it, the preprocessing pipeline returns empty results: @@ -564,6 +539,9 @@ OLLAMA_MODEL=llama3.2:3b make ollama-pull ``` +> **Note:** If you're not using `--profile ollama`, you don't need to pull any +> model. The MCP host (Claude) handles aggregation directly. + ### MCP Gateway doesn't start The gateway is **optional** — Claude Code in Docker does not use it. If you @@ -589,9 +567,9 @@ docker compose logs # e.g., kali-mcp, ollama-mcp ``` Common causes: -- Missing Ollama model (agents can't start processing) - Port conflict on the host -- Insufficient memory (increase to 16GB+) +- Insufficient memory +- Missing Ollama model (only if using `--profile ollama`) --- @@ -626,26 +604,27 @@ blhackbox mcp ```bash make help # Show all available targets make pull # Pull all pre-built images from Docker Hub -make up # Start core stack (9 containers) -make up-full # Start with Neo4j (10 containers) -make up-gateway # Start with MCP Gateway for Claude Desktop (10 containers) +make up # Start core stack (4 containers) +make up-ollama # Start with Ollama pipeline (9 containers, legacy) +make up-full # Start with Neo4j (5 containers) +make up-gateway # Start with MCP Gateway for Claude Desktop (5 containers) make down # Stop all services make claude-code # Build and launch Claude Code in Docker make status # Container status table -make health # Quick health check of all MCP servers +make health # Quick health check of all services make test # Run tests make lint # Run linter -make ollama-pull # Pull Ollama model +make ollama-pull # Pull Ollama model (only if using --profile ollama) make portainer # Open Portainer dashboard (shows setup instructions) make gateway-logs # Live MCP Gateway logs (requires --profile gateway) -make restart-agents # Restart all 3 agent containers +make restart-agents # Restart all 3 agent containers (requires --profile ollama) make logs-kali # Tail Kali MCP logs (includes Metasploit) make logs-wireshark # Tail WireMCP logs make logs-screenshot # Tail Screenshot MCP logs -make logs-ollama-mcp # Tail Ollama MCP logs -make logs-agent-ingestion # Tail Ingestion Agent logs -make logs-agent-processing # Tail Processing Agent logs -make logs-agent-synthesis # Tail Synthesis Agent logs +make logs-ollama-mcp # Tail Ollama MCP logs (requires --profile ollama) +make logs-agent-ingestion # Tail Ingestion Agent logs (requires --profile ollama) +make logs-agent-processing # Tail Processing Agent logs (requires --profile ollama) +make logs-agent-synthesis # Tail Synthesis Agent logs (requires --profile ollama) make push-all # Build and push all images to Docker Hub ``` @@ -672,14 +651,14 @@ All custom images are published to `crhacky/blhackbox`: | `crhacky/blhackbox:kali-mcp` | Kali Linux MCP Server (70+ tools + Metasploit Framework) | | `crhacky/blhackbox:wire-mcp` | WireMCP Server (tshark, 7 tools) | | `crhacky/blhackbox:screenshot-mcp` | Screenshot MCP Server (headless Chromium, 4 tools) | -| `crhacky/blhackbox:ollama-mcp` | Ollama MCP Server (thin orchestrator) | -| `crhacky/blhackbox:agent-ingestion` | Agent 1: Ingestion | -| `crhacky/blhackbox:agent-processing` | Agent 2: Processing | -| `crhacky/blhackbox:agent-synthesis` | Agent 3: Synthesis | | `crhacky/blhackbox:claude-code` | Claude Code CLI client (direct SSE to MCP servers) | +| `crhacky/blhackbox:ollama-mcp` | Ollama MCP Server — optional, `--profile ollama` | +| `crhacky/blhackbox:agent-ingestion` | Agent 1: Ingestion — optional, `--profile ollama` | +| `crhacky/blhackbox:agent-processing` | Agent 2: Processing — optional, `--profile ollama` | +| `crhacky/blhackbox:agent-synthesis` | Agent 3: Synthesis — optional, `--profile ollama` | Custom-built locally (no pre-built image on Docker Hub): -- `crhacky/blhackbox:ollama` (wraps `ollama/ollama:latest` with auto-pull entrypoint) +- `crhacky/blhackbox:ollama` (wraps `ollama/ollama:latest` with auto-pull entrypoint — optional, `--profile ollama`) Official images pulled directly: - `portainer/portainer-ce:latest` @@ -701,7 +680,10 @@ Useful for recurring engagements against the same targets. --- -## GPU Support for Ollama +## GPU Support for Ollama (Optional) + +> **Only relevant if using `--profile ollama`.** The default stack does not +> use Ollama — the MCP host handles aggregation directly. GPU acceleration is **disabled by default** in `docker-compose.yml` for broad compatibility. Ollama runs on CPU out of the box. @@ -733,8 +715,8 @@ inference for the preprocessing pipeline. ports 8080 or 9443 to the public internet. - **Authorization**: Ensure you have written permission before scanning any target. - **Neo4j**: Set a strong password in `.env`. Never use defaults in production. -- **Agent containers**: Communicate only on the internal `blhackbox_net` Docker - network. No ports are exposed to the host. +- **Agent containers** (optional Ollama pipeline): Communicate only on the + internal `blhackbox_net` Docker network. No ports are exposed to the host. - **Portainer**: Uses HTTPS with a self-signed certificate. Create a strong admin password on first run. @@ -753,11 +735,11 @@ blhackbox/ │ ├── kali-mcp.Dockerfile # Kali Linux + Metasploit Framework │ ├── wire-mcp.Dockerfile │ ├── screenshot-mcp.Dockerfile -│ ├── ollama.Dockerfile -│ ├── ollama-mcp.Dockerfile -│ ├── agent-ingestion.Dockerfile -│ ├── agent-processing.Dockerfile -│ ├── agent-synthesis.Dockerfile +│ ├── ollama.Dockerfile # optional (--profile ollama) +│ ├── ollama-mcp.Dockerfile # optional (--profile ollama) +│ ├── agent-ingestion.Dockerfile # optional (--profile ollama) +│ ├── agent-processing.Dockerfile # optional (--profile ollama) +│ ├── agent-synthesis.Dockerfile # optional (--profile ollama) │ ├── claude-code.Dockerfile # MCP client container │ └── claude-code-entrypoint.sh # Startup script with health checks ├── kali-mcp/ # Kali MCP server (70+ tools + Metasploit) @@ -765,7 +747,7 @@ blhackbox/ ├── screenshot-mcp/ # Screenshot MCP server (Playwright, 4 tools) ├── metasploit-mcp/ # [DEPRECATED] Standalone MSF RPC server (kept for reference) ├── mcp_servers/ -│ └── ollama_mcp_server.py # thin MCP orchestrator +│ └── ollama_mcp_server.py # thin MCP orchestrator (optional) ├── blhackbox/ │ ├── mcp/ │ │ └── server.py # blhackbox MCP server (stdio) diff --git a/blhackbox-mcp-catalog.yaml b/blhackbox-mcp-catalog.yaml index d2b5afb..a16d6d9 100644 --- a/blhackbox-mcp-catalog.yaml +++ b/blhackbox-mcp-catalog.yaml @@ -39,10 +39,11 @@ registry: url: "http://screenshot-mcp:9004/sse" transport_type: sse - ollama-mcp: - description: "blhackbox Ollama preprocessing pipeline — 3-agent data pipeline for scan result aggregation" - title: "Ollama MCP Server" - type: "server" - remote: - url: "http://ollama-mcp:9000/sse" - transport_type: sse + # ollama-mcp is optional (--profile ollama). Uncomment if using the legacy pipeline. + # ollama-mcp: + # description: "blhackbox Ollama preprocessing pipeline — 3-agent data pipeline for scan result aggregation" + # title: "Ollama MCP Server" + # type: "server" + # remote: + # url: "http://ollama-mcp:9000/sse" + # transport_type: sse diff --git a/blhackbox/mcp/server.py b/blhackbox/mcp/server.py index 0ab927c..5c0a209 100644 --- a/blhackbox/mcp/server.py +++ b/blhackbox/mcp/server.py @@ -5,13 +5,15 @@ knowledge graph, and generate reports. Blhackbox MCP provides *orchestrated workflows*: - - run_tool → execute a single tool via best available backend - - query_graph → Cypher queries against the knowledge graph - - get_findings → retrieve structured findings for a target - - list_tools → discover available tools across all backends - - generate_report → produce HTML/PDF reports from session data - - list_templates → discover available prompt templates - - get_template → retrieve a prompt template for autonomous pentesting + - run_tool → execute a single tool via best available backend + - query_graph → Cypher queries against the knowledge graph + - get_findings → retrieve structured findings for a target + - list_tools → discover available tools across all backends + - generate_report → produce HTML/PDF reports from session data + - list_templates → discover available prompt templates + - get_template → retrieve a prompt template for autonomous pentesting + - aggregate_results → validate & store structured findings (MCP host does the analysis) + - get_payload_schema → return the AggregatedPayload JSON schema """ from __future__ import annotations @@ -162,7 +164,7 @@ async def _get_backend() -> ToolBackend: "Retrieve a prompt template by name. Returns the full template " "content with [TARGET] placeholders replaced if a target is provided. " "Each template instructs the AI to use all available MCP servers " - "(Kali MCP, Metasploit MCP, WireMCP, Screenshot MCP, Ollama pipeline)." + "(Kali MCP, WireMCP, Screenshot MCP) and aggregate results directly." ), inputSchema={ "type": "object", @@ -183,6 +185,44 @@ async def _get_backend() -> ToolBackend: "required": ["name"], }, ), + Tool( + name="aggregate_results", + description=( + "Validate and store structured pentest findings produced by the " + "MCP host (Claude). The MCP host parses raw tool outputs, " + "deduplicates, correlates, and structures them into an " + "AggregatedPayload — then calls this tool to validate and persist " + "the payload for report generation and optional Neo4j storage. " + "Use get_payload_schema first to see the expected JSON schema." + ), + inputSchema={ + "type": "object", + "properties": { + "payload": { + "type": "object", + "description": ( + "Complete AggregatedPayload JSON object. Must include " + "session_id, target, and at least one of: findings, " + "error_log, executive_summary, remediation." + ), + }, + }, + "required": ["payload"], + }, + ), + Tool( + name="get_payload_schema", + description=( + "Return the AggregatedPayload JSON schema so the MCP host knows " + "exactly what structure to produce when aggregating raw tool " + "outputs. Call this before aggregate_results to understand the " + "expected format." + ), + inputSchema={ + "type": "object", + "properties": {}, + }, + ), Tool( name="take_screenshot", description=( @@ -339,6 +379,10 @@ async def _dispatch(name: str, args: dict[str, Any]) -> str: return await _do_list_templates() elif name == "get_template": return await _do_get_template(args) + elif name == "aggregate_results": + return await _do_aggregate_results(args) + elif name == "get_payload_schema": + return await _do_get_payload_schema() elif name == "take_screenshot": return await _do_take_screenshot(args) elif name == "take_element_screenshot": @@ -466,6 +510,91 @@ async def _do_get_template(args: dict[str, Any]) -> str: return json.dumps({"error": str(exc)}) +# --------------------------------------------------------------------------- +# Aggregate results — MCP host (Claude) does the analysis, this validates +# --------------------------------------------------------------------------- + + +async def _do_aggregate_results(args: dict[str, Any]) -> str: + from pathlib import Path + + from blhackbox.models.aggregated_payload import AggregatedPayload + + raw_payload = args["payload"] + if not isinstance(raw_payload, dict): + return json.dumps({"error": "payload must be a JSON object"}) + + # Require at minimum session_id and target + if "session_id" not in raw_payload or "target" not in raw_payload: + return json.dumps({ + "error": "payload must include 'session_id' and 'target'" + }) + + try: + payload = AggregatedPayload(**raw_payload) + except Exception as exc: + return json.dumps({ + "error": f"Payload validation failed: {exc}", + "hint": "Use get_payload_schema to see the expected format.", + }) + + # Persist as JSON for report generation + from blhackbox.config import settings + + results_dir = settings.results_dir + results_dir.mkdir(parents=True, exist_ok=True) + session_file = results_dir / f"session-{payload.session_id}.json" + session_file.write_text( + json.dumps(payload.to_dict(), indent=2, default=str), + encoding="utf-8", + ) + + # Optional Neo4j storage (best-effort) + try: + from blhackbox.core.knowledge_graph import KnowledgeGraphClient + + async with KnowledgeGraphClient() as kg: + await kg.run_query( + """ + MERGE (s:AggregatedSession {session_id: $session_id}) + SET s.target = $target, + s.scan_timestamp = $scan_timestamp, + s.tools_run = $tools_run + """, + { + "session_id": payload.session_id, + "target": payload.target, + "scan_timestamp": payload.scan_timestamp.isoformat(), + "tools_run": payload.metadata.tools_run, + }, + ) + except Exception: + logger.debug("Neo4j storage skipped (not available or failed)") + + vuln_count = len(payload.findings.vulnerabilities) + host_count = len(payload.findings.hosts) + return json.dumps({ + "status": "ok", + "session_id": payload.session_id, + "session_file": str(session_file), + "summary": { + "hosts": host_count, + "vulnerabilities": vuln_count, + "endpoints": len(payload.findings.endpoints), + "subdomains": len(payload.findings.subdomains), + "risk_level": payload.executive_summary.risk_level, + }, + "hint": f"Use generate_report with session_id='{session_file}' to create the report.", + }) + + +async def _do_get_payload_schema() -> str: + from blhackbox.models.aggregated_payload import AggregatedPayload + + schema = AggregatedPayload.model_json_schema() + return json.dumps(schema, indent=2) + + # --------------------------------------------------------------------------- # Screenshot helpers — proxy to screenshot-mcp via HTTP # --------------------------------------------------------------------------- diff --git a/blhackbox/models/aggregated_payload.py b/blhackbox/models/aggregated_payload.py index ece5b8e..a5d93b9 100644 --- a/blhackbox/models/aggregated_payload.py +++ b/blhackbox/models/aggregated_payload.py @@ -1,9 +1,13 @@ -"""AggregatedPayload — structured output from the Ollama preprocessing pipeline. +"""AggregatedPayload — structured pentest findings for report generation. -This model represents the final assembled payload that the blhackbox -Ollama MCP server returns to Claude after all three agents (Ingestion, -Processing, Synthesis) have run. Claude uses this payload to write the -final pentest report. +This model represents the structured payload that the MCP host (Claude Code, +Claude Desktop, or ChatGPT) produces after collecting raw tool outputs, +parsing, deduplicating, and synthesizing them. The MCP host calls +``aggregate_results`` to validate and persist this payload, then +``generate_report`` to produce the final pentest report. + +Legacy: previously assembled by a 3-agent Ollama pipeline (Ingestion → +Processing → Synthesis). That pipeline is now optional (``--profile ollama``). """ from __future__ import annotations @@ -285,7 +289,14 @@ class AggregatedMetadata(BaseModel): "output is larger than the raw input." ), ) - ollama_model: str = "" + # Which model performed the aggregation. When the MCP host (Claude) + # does it directly, set to the host model name (e.g. "claude-opus-4-6"). + # When the legacy Ollama pipeline is used, set to the Ollama model name. + model: str = "" + ollama_model: str = Field( + default="", + description="Deprecated — use 'model' instead. Kept for backward compatibility.", + ) duration_seconds: float = 0.0 stage_timing: PipelineStageTiming = Field( default_factory=PipelineStageTiming, @@ -301,8 +312,9 @@ class AggregatedMetadata(BaseModel): class AggregatedPayload(BaseModel): """The complete aggregated pentest data payload. - Returned by the blhackbox Ollama MCP server to Claude after all three - preprocessing agents (Ingestion, Processing, Synthesis) have run. + Produced by the MCP host (Claude) after collecting and structuring raw + tool outputs. Validated and persisted via the ``aggregate_results`` MCP + tool, then used by ``generate_report`` to produce the final report. """ session_id: str diff --git a/blhackbox/prompts/claude_playbook.md b/blhackbox/prompts/claude_playbook.md index 4927a45..a22f5b4 100644 --- a/blhackbox/prompts/claude_playbook.md +++ b/blhackbox/prompts/claude_playbook.md @@ -8,10 +8,16 @@ you will need them in Phase 4. ## Available Resources -You have access to multiple MCP servers and APIs providing a wide range of -security capabilities — network scanning, DNS enumeration, web vulnerability -testing, exploit lifecycle management, packet capture and traffic analysis, -AI-augmented security agents, and an AI preprocessing pipeline. +You have access to multiple MCP servers providing a wide range of security +capabilities — network scanning, DNS enumeration, web vulnerability testing, +exploit lifecycle management, packet capture and traffic analysis, and +evidence capture via headless Chromium screenshots. + +**You are the orchestrator.** You decide which tools to call, collect the raw +outputs, then parse, deduplicate, correlate, and structure them into an +`AggregatedPayload` yourself. Use `get_payload_schema` to see the expected +format, then `aggregate_results` to validate and persist your structured +payload for report generation. > Query each server's tool listing at the start of every engagement to discover > which tools and capabilities are available. Choose the best tool for each task @@ -83,38 +89,32 @@ Append every raw output to `raw_outputs`. --- -## Phase 4 -- Process (MANDATORY) +## Phase 4 -- Aggregate (MANDATORY) -**Objective:** Send **all** collected raw data to the Ollama MCP preprocessing -pipeline for AI-powered aggregation, deduplication, and structured extraction. +**Objective:** Structure all collected raw data into an `AggregatedPayload`. -> **This step is required.** All raw outputs from Phases 1-3 must be processed -> through the Ollama agents before generating the final report. +> **You do this yourself.** Parse, deduplicate, correlate, and structure the +> raw outputs from Phases 1-3 directly. No external pipeline needed. -1. Call `process_scan_results()` on the **Ollama MCP Server**, passing - `raw_outputs` (the dict of all tool outputs collected in Phases 1-3). +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema + (only needed once per session — cache the result). -2. **Wait** for the server to return an `AggregatedPayload` object. This may - take several minutes depending on data volume. +2. Process the raw outputs yourself: + - **Parse** raw tool output into structured typed data (hosts, ports, + services, vulnerabilities, endpoints, subdomains, technologies, etc.) + - **Deduplicate** findings across tools (same CVE from nikto + nuclei → one entry) + - **Correlate** cross-tool evidence (nmap version + nikto CVE → higher confidence) + - **Assess severity** using pentesting rules (RCE = critical, XSS = medium, etc.) + - **Extract errors** (timeouts, WAF blocks, rate limits) into `error_log` + with `security_relevance` ratings + - **Generate executive summary** with risk level, top findings, and attack chains + - **Provide remediation** recommendations prioritized by severity and exploitability -3. The returned `AggregatedPayload` contains: - - `payload.findings` -- a `Findings` model with: - - `.hosts` -- discovered hosts and their ports/services - - `.vulnerabilities` -- deduplicated, severity-rated vulnerabilities - - `.endpoints` -- discovered web endpoints - - *(and other sub-fields as applicable)* - - `payload.error_log` -- a list of `ErrorLogEntry` items (scan errors, - timeouts, anomalies, each with a `security_relevance` rating) - - `payload.metadata` -- an `AggregatedMetadata` model with: - - `.tools_run` -- list of tool names that produced output - - `.total_raw_size_bytes` -- total bytes of raw input processed - - `.compressed_size_bytes` -- size after deduplication/compression - - `.compression_ratio` -- ratio of raw to compressed - - `.ollama_model` -- the model used for aggregation - - `.duration_seconds` -- wall-clock time for aggregation - - `.warning` -- optional warning string (e.g., token limits hit) +3. Call `aggregate_results(payload=)` to + validate and persist the payload. The tool returns a summary and the + session file path for report generation. -Do not modify the payload. Proceed directly to Phase 5. +Proceed directly to Phase 5. --- @@ -181,9 +181,9 @@ Provide prioritized, actionable remediation guidance: - **Tools used:** full list from `payload.metadata.tools_run` - **Scan metadata:** - Total raw size: `payload.metadata.total_raw_size_bytes` bytes - - Compressed size: `payload.metadata.compressed_size_bytes` bytes - - Compression ratio: `payload.metadata.compression_ratio` - - Ollama model: `payload.metadata.ollama_model` + - Structured size: `payload.metadata.structured_size_bytes` bytes + - Expansion ratio: `payload.metadata.expansion_ratio` + - Model: `payload.metadata.model` - Processing duration: `payload.metadata.duration_seconds` seconds - **Warnings:** any value from `payload.metadata.warning` - **Host inventory:** full table from `payload.findings.hosts` with ports, diff --git a/blhackbox/prompts/templates/README.md b/blhackbox/prompts/templates/README.md index 953f772..111381a 100644 --- a/blhackbox/prompts/templates/README.md +++ b/blhackbox/prompts/templates/README.md @@ -4,8 +4,8 @@ These templates provide structured workflows for autonomous penetration tests through the blhackbox framework. Each template describes **what** needs to be done in each phase — the MCP host decides **which** tools and servers to use. -All raw outputs must pass through the **Ollama MCP preprocessing pipeline** -(Ingestion → Processing → Synthesis) before the final report is generated. +All raw outputs must be structured into an `AggregatedPayload` by the MCP host +before the final report is generated. ## Available Templates diff --git a/blhackbox/prompts/templates/api-security.md b/blhackbox/prompts/templates/api-security.md index 4928419..542927d 100644 --- a/blhackbox/prompts/templates/api-security.md +++ b/blhackbox/prompts/templates/api-security.md @@ -116,14 +116,15 @@ Look for: - Information disclosure in headers (server version, framework) - Missing security headers (CSP, HSTS, X-Content-Type-Options) -### Step 8: Data Processing (REQUIRED) +### Step 8: Data Aggregation (REQUIRED) -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 9: API Security Report diff --git a/blhackbox/prompts/templates/bug-bounty.md b/blhackbox/prompts/templates/bug-bounty.md index d4ba393..7021227 100644 --- a/blhackbox/prompts/templates/bug-bounty.md +++ b/blhackbox/prompts/templates/bug-bounty.md @@ -135,14 +135,15 @@ For each confirmed vulnerability, capture visual proof using the Screenshot MCP 2. **AI vulnerability scanning** — Vulnerability scan agents 3. Check for known CVEs in identified frameworks and versions -### Step 7: Data Processing (REQUIRED) +### Step 7: Data Aggregation (REQUIRED) -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs from Steps 1-6 into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 8: Bug Bounty Report diff --git a/blhackbox/prompts/templates/full-attack-chain.md b/blhackbox/prompts/templates/full-attack-chain.md index be1205c..29224bc 100644 --- a/blhackbox/prompts/templates/full-attack-chain.md +++ b/blhackbox/prompts/templates/full-attack-chain.md @@ -177,15 +177,16 @@ Document each chain with: 3. Which tools/findings enabled each step 4. Business impact assessment -### Phase 6: Data Processing (REQUIRED) +### Phase 6: Data Aggregation (REQUIRED) Make sure to use all tools (all the MCP Servers available) and execute everything in parallel. Then: -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs from Phases 1-5 into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Phase 7: Comprehensive Report (really make it comprehensive, be specific and detailed) diff --git a/blhackbox/prompts/templates/full-pentest.md b/blhackbox/prompts/templates/full-pentest.md index e0c7a76..60726de 100644 --- a/blhackbox/prompts/templates/full-pentest.md +++ b/blhackbox/prompts/templates/full-pentest.md @@ -81,16 +81,17 @@ Append ALL raw outputs to the dict. 7. **Credential discovery** — Extract cleartext credentials from captured traffic 8. **Screenshot evidence** — Capture web page screenshots of confirmed vulnerabilities for PoC documentation -### Phase 5: Data Processing (REQUIRED) +### Phase 5: Data Aggregation (REQUIRED) -**Goal:** Transform raw data into structured, actionable intelligence. +**Goal:** Structure all collected data into an AggregatedPayload. -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Send all collected raw outputs to the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) with the target identifier -2. Wait for the pipeline to return an `AggregatedPayload` -3. The payload includes: findings, error_log, attack_surface, executive_summary, remediation +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Phase 6: Report Generation diff --git a/blhackbox/prompts/templates/network-infrastructure.md b/blhackbox/prompts/templates/network-infrastructure.md index e2ff47c..17a8179 100644 --- a/blhackbox/prompts/templates/network-infrastructure.md +++ b/blhackbox/prompts/templates/network-infrastructure.md @@ -80,14 +80,15 @@ For discovered services (SSH, FTP, HTTP auth, databases): **Important:** Use only default/common credential lists. Do not run exhaustive brute force attacks without explicit authorization. -### Step 7: Data Processing (REQUIRED) +### Step 7: Data Aggregation (REQUIRED) -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 8: Network Assessment Report diff --git a/blhackbox/prompts/templates/osint-gathering.md b/blhackbox/prompts/templates/osint-gathering.md index 15eeff5..f606646 100644 --- a/blhackbox/prompts/templates/osint-gathering.md +++ b/blhackbox/prompts/templates/osint-gathering.md @@ -74,14 +74,15 @@ If any packet captures or traffic samples are available for analysis: 1. **Exploit search** — Search for known exploits targeting discovered technologies and services 2. Document the target's potential risk exposure based on known exploit availability -### Step 7: Data Processing (REQUIRED) +### Step 7: Data Aggregation (REQUIRED) -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs from Steps 1-6 into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 8: OSINT Report diff --git a/blhackbox/prompts/templates/quick-scan.md b/blhackbox/prompts/templates/quick-scan.md index 640928d..b6cecc7 100644 --- a/blhackbox/prompts/templates/quick-scan.md +++ b/blhackbox/prompts/templates/quick-scan.md @@ -40,11 +40,15 @@ Run these steps concurrently where possible for speed: 2. **Traffic statistics** — Quick protocol distribution overview 3. **Exploit validation** — Validate any high-severity findings -### Step 3: Data Processing (REQUIRED) +### Step 3: Data Aggregation (REQUIRED) -1. Collect ALL raw outputs from previous steps into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. + +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 4: Quick Report diff --git a/blhackbox/prompts/templates/recon-deep.md b/blhackbox/prompts/templates/recon-deep.md index 461b413..3014f33 100644 --- a/blhackbox/prompts/templates/recon-deep.md +++ b/blhackbox/prompts/templates/recon-deep.md @@ -62,14 +62,15 @@ Identify the technology stack for all web-facing services: 3. **Metadata extraction** — Extract metadata from any downloadable files 4. **Web reconnaissance** — Web recon agents for automated web technology analysis -### Step 5: Data Processing (REQUIRED) +### Step 5: Data Aggregation (REQUIRED) -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs from Steps 1-4 into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` to return +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 6: Reconnaissance Report diff --git a/blhackbox/prompts/templates/vuln-assessment.md b/blhackbox/prompts/templates/vuln-assessment.md index 6446b2d..5321a53 100644 --- a/blhackbox/prompts/templates/vuln-assessment.md +++ b/blhackbox/prompts/templates/vuln-assessment.md @@ -99,14 +99,15 @@ For each web service discovered: 4. Test only default/common credentials for discovered login services 5. Focus on: admin panels, SSH, FTP, database ports -### Step 8: Data Processing (REQUIRED) +### Step 8: Data Aggregation (REQUIRED) -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs into a single dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 9: Vulnerability Report diff --git a/blhackbox/prompts/templates/web-app-assessment.md b/blhackbox/prompts/templates/web-app-assessment.md index 8bf790e..f729e2d 100644 --- a/blhackbox/prompts/templates/web-app-assessment.md +++ b/blhackbox/prompts/templates/web-app-assessment.md @@ -100,14 +100,15 @@ For each discovered form, parameter, or input point: - `Referrer-Policy` - `Permissions-Policy` -### Step 8: Data Processing (REQUIRED) +### Step 8: Data Aggregation (REQUIRED) -> **This step is mandatory.** All raw outputs must be processed through the -> Ollama agents before generating the final report. +> **This step is mandatory.** You handle data aggregation directly — no +> external pipeline needed. -1. Collect ALL raw outputs into a dict keyed by tool/source name -2. Send all collected data through the **Ollama MCP preprocessing pipeline** (`process_scan_results()`) -3. Wait for the `AggregatedPayload` +1. Call `get_payload_schema()` to retrieve the `AggregatedPayload` JSON schema (cache after first call) +2. Parse, deduplicate, and correlate all raw outputs into the schema yourself +3. Call `aggregate_results(payload=)` to validate and persist +4. The payload includes: findings, error_log, attack_surface, executive_summary, remediation ### Step 9: Web Application Report diff --git a/docker-compose.yml b/docker-compose.yml index 296d6e7..f83c93e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,17 +2,27 @@ # # Usage: # docker compose pull Pull all pre-built images -# docker compose up -d Start core stack (9 containers) -# docker compose --profile gateway up -d Start with MCP Gateway (10 containers) +# docker compose up -d Start core stack (4 containers) +# docker compose --profile ollama up -d Start with Ollama pipeline (9 containers, legacy) +# docker compose --profile gateway up -d Start with MCP Gateway # docker compose --profile claude-code up -d Start with Claude Code container # docker compose --profile neo4j up -d Start with Neo4j # docker compose build Build all custom images locally # -# MCP servers: +# MCP servers (core — always started): # kali-mcp (FastMCP SSE, port 9001) — Kali Linux security tools (70+) + Metasploit Framework # wire-mcp (FastMCP SSE, port 9003) — Wireshark/tshark (7 tools) # screenshot-mcp (FastMCP SSE, port 9004) — Headless Chromium screenshots (4 tools) -# ollama-mcp (FastMCP SSE, port 9000) — Ollama preprocessing pipeline +# +# Optional (--profile ollama): +# ollama-mcp (FastMCP SSE, port 9000) — Ollama preprocessing pipeline (legacy) +# agent-ingestion, agent-processing, agent-synthesis, ollama +# +# The MCP host (Claude Code, Claude Desktop, ChatGPT) now handles data +# aggregation directly — it parses raw tool outputs, deduplicates, and +# structures them into an AggregatedPayload, then validates via the +# aggregate_results tool. The Ollama pipeline is kept as an optional +# fallback for local-only / offline processing. # # Claude Code (Docker) connects directly to MCP servers via SSE. # Claude Desktop / ChatGPT connect via the MCP Gateway (--profile gateway). @@ -64,7 +74,7 @@ services: - --transport=streaming - --port=8080 - --catalog=/catalog.yaml - - --servers=kali-mcp,wire-mcp,screenshot-mcp,ollama-mcp + - --servers=kali-mcp,wire-mcp,screenshot-mcp - --log-calls networks: - blhackbox_net @@ -75,8 +85,6 @@ services: condition: service_healthy screenshot-mcp: condition: service_healthy - ollama-mcp: - condition: service_healthy # -- KALI MCP SERVER ------------------------------------------------------- # Host-based clients can connect directly via SSE at http://localhost:9001/sse @@ -184,17 +192,21 @@ services: networks: - blhackbox_net - # -- OLLAMA MCP SERVER (CUSTOM) -------------------------------------------- + # -- OLLAMA MCP SERVER (OPTIONAL — LEGACY) ---------------------------------- # blhackbox custom component. NOT an official Ollama product. # Thin MCP orchestrator -- calls the 3 agent containers in sequence # via HTTP, assembles the AggregatedPayload, and returns it to Claude. - # Does NOT call Ollama directly. + # + # OPTIONAL since v2.1: The MCP host (Claude) now handles aggregation + # directly via the aggregate_results tool, which is faster and more + # accurate. Enable this pipeline with: docker compose --profile ollama up -d ollama-mcp: image: crhacky/blhackbox:ollama-mcp build: context: . dockerfile: docker/ollama-mcp.Dockerfile container_name: blhackbox-ollama-mcp + profiles: ["ollama"] restart: unless-stopped init: true healthcheck: @@ -224,15 +236,17 @@ services: networks: - blhackbox_net - # -- AGENT 1: INGESTION --------------------------------------------------- + # -- AGENT 1: INGESTION (OPTIONAL — LEGACY) -------------------------------- # Parses and structures raw Kali tool output. # Calls Ollama /api/chat with ingestion system prompt. + # Enable with: docker compose --profile ollama up -d agent-ingestion: image: crhacky/blhackbox:agent-ingestion build: context: . dockerfile: docker/agent-ingestion.Dockerfile container_name: blhackbox-agent-ingestion + profiles: ["ollama"] restart: unless-stopped environment: OLLAMA_HOST: "http://ollama:11434" @@ -252,15 +266,17 @@ services: networks: - blhackbox_net - # -- AGENT 2: PROCESSING -------------------------------------------------- + # -- AGENT 2: PROCESSING (OPTIONAL — LEGACY) -------------------------------- # Deduplicates, extracts errors, compresses data into efficient blobs. # Annotates error_log entries with security_relevance. + # Enable with: docker compose --profile ollama up -d agent-processing: image: crhacky/blhackbox:agent-processing build: context: . dockerfile: docker/agent-processing.Dockerfile container_name: blhackbox-agent-processing + profiles: ["ollama"] restart: unless-stopped environment: OLLAMA_HOST: "http://ollama:11434" @@ -280,15 +296,17 @@ services: networks: - blhackbox_net - # -- AGENT 3: SYNTHESIS ---------------------------------------------------- + # -- AGENT 3: SYNTHESIS (OPTIONAL — LEGACY) -------------------------------- # Merges Agent 1 + Agent 2 output into final AggregatedPayload. # Adds metadata, resolves conflicts, sends back to Claude. + # Enable with: docker compose --profile ollama up -d agent-synthesis: image: crhacky/blhackbox:agent-synthesis build: context: . dockerfile: docker/agent-synthesis.Dockerfile container_name: blhackbox-agent-synthesis + profiles: ["ollama"] restart: unless-stopped environment: OLLAMA_HOST: "http://ollama:11434" @@ -308,16 +326,18 @@ services: networks: - blhackbox_net - # -- OLLAMA ---------------------------------------------------------------- + # -- OLLAMA (OPTIONAL — LEGACY) ------------------------------------------- # Custom entrypoint that auto-pulls and warms up the configured model on # startup, eliminating cold-start delays (~17 min → seconds on subsequent # requests). All 3 agent containers call this via /api/chat independently. + # Enable with: docker compose --profile ollama up -d ollama: image: crhacky/blhackbox:ollama build: context: . dockerfile: docker/ollama.Dockerfile container_name: blhackbox-ollama + profiles: ["ollama"] restart: unless-stopped environment: OLLAMA_MODEL: "${OLLAMA_MODEL:-llama3.1:8b}" @@ -416,8 +436,6 @@ services: condition: service_healthy screenshot-mcp: condition: service_healthy - ollama-mcp: - condition: service_healthy networks: - blhackbox_net diff --git a/docker/claude-code-entrypoint.sh b/docker/claude-code-entrypoint.sh index d3b0497..c8e2d7e 100755 --- a/docker/claude-code-entrypoint.sh +++ b/docker/claude-code-entrypoint.sh @@ -104,10 +104,13 @@ else MCP_FAIL=$((MCP_FAIL + 1)) fi -if wait_for_service "Ollama Pipeline" "http://ollama-mcp:9000/sse"; then - MCP_OK=$((MCP_OK + 1)) +# Ollama Pipeline is optional — check but don't count as failure +OLLAMA_STATUS="OFF" +if check_service "Ollama Pipeline" "http://ollama-mcp:9000/sse"; then + printf " %-22s [ ${CHECK} ]\n" "Ollama Pipeline" + OLLAMA_STATUS="ON" else - MCP_FAIL=$((MCP_FAIL + 1)) + printf " %-22s [ ${WARN} ] (optional — not running)\n" "Ollama Pipeline" fi # Summary @@ -131,7 +134,13 @@ echo -e " ${BOLD}MCP servers (connected via SSE):${NC}" echo -e " kali ${DIM}Kali Linux security tools + Metasploit (70+ tools)${NC}" echo -e " wireshark ${DIM}WireMCP — tshark packet capture & analysis${NC}" echo -e " screenshot ${DIM}Screenshot MCP — headless Chromium evidence capture${NC}" -echo -e " ollama-pipeline ${DIM}Ollama preprocessing (3-agent pipeline)${NC}" +if [ "$OLLAMA_STATUS" = "ON" ]; then +echo -e " ollama-pipeline ${DIM}Ollama preprocessing (3-agent pipeline, optional)${NC}" +fi +echo "" +echo -e " ${BOLD}Data aggregation:${NC}" +echo -e " ${DIM}You (Claude) handle parsing, deduplication, and synthesis directly.${NC}" +echo -e " ${DIM}Use get_payload_schema + aggregate_results to validate & persist.${NC}" echo "" echo -e " ${BOLD}Prompt templates (autonomous pentesting):${NC}" echo -e " ${CYAN}full-pentest${NC} ${DIM}Complete end-to-end penetration test${NC}" From a34966f548efdab42bb32c5912772d1e9c02ded0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 7 Mar 2026 08:03:45 +0000 Subject: [PATCH 2/3] Remove unused pathlib.Path import in server.py https://claude.ai/code/session_01MXWTGUUSheo3EkgHrzRmjy --- blhackbox/mcp/server.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/blhackbox/mcp/server.py b/blhackbox/mcp/server.py index 5c0a209..5a21412 100644 --- a/blhackbox/mcp/server.py +++ b/blhackbox/mcp/server.py @@ -516,8 +516,6 @@ async def _do_get_template(args: dict[str, Any]) -> str: async def _do_aggregate_results(args: dict[str, Any]) -> str: - from pathlib import Path - from blhackbox.models.aggregated_payload import AggregatedPayload raw_payload = args["payload"] From d5af4267e5997a0443eefa51157dd61be8f18a02 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 7 Mar 2026 08:19:49 +0000 Subject: [PATCH 3/3] Update tests for aggregate_results and get_payload_schema tools - Add aggregate_results and get_payload_schema to expected tool sets - Update tool counts from 11 to 13 in test_mcp_server and test_screenshot_mcp - Update test_prompts to check for aggregation pipeline instead of Ollama https://claude.ai/code/session_01MXWTGUUSheo3EkgHrzRmjy --- tests/test_mcp_server.py | 3 ++- tests/test_prompts.py | 6 +++--- tests/test_screenshot_mcp.py | 6 ++++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 1596b09..0c7b59f 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -33,6 +33,7 @@ def test_expected_tools_present(self) -> None: "list_tools", "generate_report", "list_templates", "get_template", "take_screenshot", "take_element_screenshot", "list_screenshots", "annotate_screenshot", + "aggregate_results", "get_payload_schema", } assert expected == names @@ -52,7 +53,7 @@ def test_query_graph_requires_cypher(self) -> None: class TestMCPListTools: async def test_list_tools_returns_all(self) -> None: tools = await handle_list_tools() - assert len(tools) == 11 + assert len(tools) == 13 names = {t.name for t in tools} assert "run_tool" in names assert "list_templates" in names diff --git a/tests/test_prompts.py b/tests/test_prompts.py index 27d1c22..f5d90fb 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -42,11 +42,11 @@ def test_all_templates_have_data_processing_step(self) -> None: f"Template {name} missing mandatory data processing step" ) - def test_all_templates_mention_ollama_pipeline(self) -> None: + def test_all_templates_mention_aggregation_pipeline(self) -> None: for name in TEMPLATES: content = load_template(name) - assert "Ollama" in content or "process_scan_results" in content, ( - f"Template {name} does not reference Ollama preprocessing pipeline" + assert "aggregate_results" in content or "AggregatedPayload" in content, ( + f"Template {name} does not reference data aggregation pipeline" ) def test_all_templates_have_placeholder_section(self) -> None: diff --git a/tests/test_screenshot_mcp.py b/tests/test_screenshot_mcp.py index 81593f1..5343fa3 100644 --- a/tests/test_screenshot_mcp.py +++ b/tests/test_screenshot_mcp.py @@ -68,8 +68,8 @@ class TestScreenshotToolCount: async def test_list_tools_includes_screenshot_tools(self) -> None: tools = await handle_list_tools() - # 7 core tools + 4 screenshot tools = 11 - assert len(tools) == 11 + # 9 core tools + 4 screenshot tools = 13 + assert len(tools) == 13 async def test_list_tools_has_all_expected_names(self) -> None: tools = await handle_list_tools() @@ -86,6 +86,8 @@ async def test_list_tools_has_all_expected_names(self) -> None: "take_element_screenshot", "list_screenshots", "annotate_screenshot", + "aggregate_results", + "get_payload_schema", } assert expected == names